Source code for nemo_automodel.cli.app
#!/usr/bin/env python3
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unified CLI entry-point for NeMo AutoModel.
Usage
-----
::
# Recommended — the CLI handles torchrun internally:
automodel <config.yaml> [--nproc-per-node N] [--key.subkey=override ...]
# Also supported — external torchrun launch:
torchrun --nproc-per-node N -m nemo_automodel.cli.app <config.yaml> [--key.subkey=override ...]
# Convenience wrapper for development (not installed):
python app.py <config.yaml> [--nproc-per-node N] [--key.subkey=override ...]
The YAML config must specify which recipe class to instantiate. All three
forms are accepted::
recipe: TrainFinetuneRecipeForNextTokenPrediction # bare class name
recipe: nemo_automodel.recipes.llm.train_ft.TrainFin... # fully-qualified
recipe:
_target_: nemo_automodel.recipes.llm.train_ft.Trai... # Hydra-style
For SLURM clusters, use ``sbatch slurm.sub`` directly (see the reference
script at the repo root). Add a ``skypilot:`` or ``nemo_run:`` section
in the YAML for those launchers.
When launched via ``torchrun``, the CLI detects the existing distributed
environment and runs the recipe in-process on each worker instead of
re-spawning torchrun.
"""
import argparse
import logging
import sys
from pathlib import Path
from nemo_automodel.cli.utils import load_yaml, resolve_recipe_name
logging.getLogger().setLevel(logging.INFO)
logger = logging.getLogger(__name__)
[docs]
def build_parser() -> argparse.ArgumentParser:
"""Build the CLI argument parser.
Returns:
argparse.ArgumentParser
"""
parser = argparse.ArgumentParser(
prog="automodel",
description=("CLI for NeMo AutoModel recipes. The YAML config specifies both the recipe and the launcher."),
)
parser.add_argument(
"config",
metavar="<config.yaml>",
type=Path,
help="Path to YAML configuration file (must specify a recipe target)",
)
parser.add_argument(
"--nproc-per-node",
"--nproc_per_node",
type=int,
default=None,
help=(
"Number of workers per node for local/interactive jobs. "
"Ignored when a skypilot/nemo_run section is present."
),
)
return parser
[docs]
def main():
"""CLI for running recipes with NeMo-AutoModel.
Supports interactive (local), SkyPilot, and NeMo-Run launchers.
For SLURM, use ``sbatch slurm.sub`` directly.
Returns:
int: Job's exit code.
"""
args, extra = build_parser().parse_known_args()
config_path = args.config.resolve()
logger.info("Config: %s", config_path)
config = load_yaml(config_path)
recipe_section = config.get("recipe")
if isinstance(recipe_section, str) and recipe_section.strip():
raw_target = recipe_section.strip()
elif isinstance(recipe_section, dict) and "_target_" in recipe_section:
raw_target = recipe_section["_target_"]
else:
logger.error(
"YAML config must specify a recipe target.\n"
"Examples:\n"
" recipe: TrainFinetuneRecipeForNextTokenPrediction\n"
" recipe: nemo_automodel.recipes.llm.train_ft."
"TrainFinetuneRecipeForNextTokenPrediction\n"
" recipe:\n"
" _target_: nemo_automodel.recipes.llm.train_ft."
"TrainFinetuneRecipeForNextTokenPrediction\n\n"
"See BREAKING_CHANGES.md for the full list of available recipe targets."
)
sys.exit(1)
try:
recipe_target = resolve_recipe_name(raw_target)
except ValueError as exc:
logger.error("%s", exc)
sys.exit(1)
logger.info("Recipe: %s", recipe_target)
if skypilot_config := config.pop("skypilot", None):
logger.info("Launching job via SkyPilot")
from nemo_automodel.components.launcher.skypilot.launcher import SkyPilotLauncher
return SkyPilotLauncher().launch(config, config_path, recipe_target, skypilot_config, extra)
elif nemo_run_config := config.pop("nemo_run", None):
logger.info("Launching job via NeMo-Run")
from nemo_automodel.components.config._arg_parser import parse_args_and_load_config
from nemo_automodel.components.launcher.nemo_run.launcher import NemoRunLauncher
cfg = parse_args_and_load_config(str(config_path))
return NemoRunLauncher().launch(cfg, config_path, recipe_target, nemo_run_config, extra)
else:
logger.info("Launching job interactively (local)")
from nemo_automodel.components.config._arg_parser import parse_args_and_load_config
from nemo_automodel.components.launcher.interactive import InteractiveLauncher
cfg = parse_args_and_load_config(str(config_path))
return InteractiveLauncher().launch(cfg, config_path, recipe_target, args.nproc_per_node, extra)
if __name__ == "__main__":
main()