Python API#

The NeMo Evaluator Launcher provides a Python API for programmatic access to evaluation functionality. This allows you to integrate evaluations into your Python workflows, Jupyter notebooks, and automated pipelines.

Installation#

pip install nemo-evaluator-launcher

# With optional exporters
pip install nemo-evaluator-launcher[mlflow,wandb,gsheets]

Core Functions#

Running Evaluations#

from nemo_evaluator_launcher.api import RunConfig, run_eval

# Run evaluation with configuration
config = RunConfig.from_hydra(
    config="examples/local_basic.yaml",
    hydra_overrides=[
        "execution.output_dir=my_results"
    ]
)
invocation_id = run_eval(config)

# Returns invocation ID for tracking
print(f"Started evaluation: {invocation_id}")

Listing Available Tasks#

from nemo_evaluator_launcher.api import get_tasks_list

# Get all available evaluation tasks
tasks = get_tasks_list()

# Each task contains: [task_name, endpoint_type, harness, container]
for task in tasks[:5]:
    task_name, endpoint_type, harness, container = task
    print(f"Task: {task_name}, Type: {endpoint_type}")

Checking Job Status#

from nemo_evaluator_launcher.api import get_status

# Check status of a specific invocation or job
status = get_status(["abc12345"])

# Returns list of status dictionaries with keys: invocation, job_id, status, progress, data
for job_status in status:
    print(f"Job {job_status['job_id']}: {job_status['status']}")

Configuration Management#

Creating Configuration with Hydra#

from nemo_evaluator_launcher.api import RunConfig
from omegaconf import OmegaConf

# Load default configuration
config = RunConfig.from_hydra()
print(OmegaConf.to_yaml(config))

Loading Existing Configuration#

from nemo_evaluator_launcher.api import RunConfig

# Load a specific configuration file
config = RunConfig.from_hydra(
    config="examples/local_basic.yaml"
)

Configuration with Overrides#

import tempfile
from nemo_evaluator_launcher.api import RunConfig, run_eval

# Create configuration with both Hydra overrides and dictionary overrides
config = RunConfig.from_hydra(
    hydra_overrides=[
        "execution.output_dir=" + tempfile.mkdtemp()
    ],
    dict_overrides={
        "target": {
            "api_endpoint": {
                "url": "https://integrate.api.nvidia.com/v1/chat/completions",
                "model_id": "meta/llama-3.2-3b-instruct",
                "api_key_name": "NGC_API_KEY"
            }
        },
        "evaluation": [
            {
                "name": "ifeval",
                "overrides": {
                    "config.params.limit_samples": 10
                }
            }
        ]
    }
)

# Run evaluation
invocation_id = run_eval(config)

Exploring Deployment Options#

from nemo_evaluator_launcher.api import RunConfig
from omegaconf import OmegaConf

# Load configuration with different deployment backend
config = RunConfig.from_hydra(
    hydra_overrides=["deployment=vllm"]
)
print(OmegaConf.to_yaml(config))

Jupyter Notebook Integration#

# Cell 1: Setup
import tempfile
from omegaconf import OmegaConf
from nemo_evaluator_launcher.api import RunConfig, get_status, get_tasks_list, run_eval

# Cell 2: List available tasks
tasks = get_tasks_list()
print("Available tasks:")
for task in tasks[:10]:  # Show first 10
    print(f"  - {task[0]} ({task[1]})")

# Cell 3: Create and run evaluation
config = RunConfig.from_hydra(
    hydra_overrides=[
        "execution.output_dir=" + tempfile.mkdtemp()
    ],
    dict_overrides={
        "target": {
            "api_endpoint": {
                "url": "https://integrate.api.nvidia.com/v1/chat/completions",
                "model_id": "meta/llama-3.2-3b-instruct",
                "api_key_name": "NGC_API_KEY"
            }
        },
        "evaluation": [
            {
                "name": "ifeval",
                "overrides": {
                    "config.params.limit_samples": 10
                }
            }
        ]
    }
)
invocation_id = run_eval(config)
print(f"Started evaluation: {invocation_id}")

# Cell 4: Check status
status_list = get_status([invocation_id])
status = status_list[0]
print(f"Status: {status['status']}")
print(f"Output directory: {status['data']['output_dir']}")

Watching Checkpoints#

The watcher module provides continuous checkpoint discovery and evaluation submission. See Continuous Checkpoint Evaluation for a full walkthrough.

Running the Watcher#

from pathlib import Path

from nemo_evaluator_launcher.watcher.configs import WatchConfig
from nemo_evaluator_launcher.watcher.run import watch_and_evaluate

# Load the watch config (supports Hydra config groups and overrides)
watch_config = WatchConfig.from_hydra(
    path=Path("my-watch-config.yaml"),
    overrides=["monitoring_config.interval=60"],
)

# Run until all directories are exhausted or Ctrl+C
submissions = watch_and_evaluate(
    watch_config=watch_config,
    resubmit_previous_sessions=False,
    dry_run=False,
)

for s in submissions:
    print(f"{s.checkpoint} -> {s.invocation_id}")

Discover Checkpoints Without Submitting#

from pathlib import Path

from nemo_evaluator_launcher.watcher.configs import ClusterConfig
from nemo_evaluator_launcher.watcher.run import discover_checkpoints

cluster_config = ClusterConfig(
    username="myuser",
    hostname="my-cluster-login.example.com",
    account="my-account",
    output_dir="/shared/results",
)

checkpoints = discover_checkpoints(
    watch_dir=Path("/checkpoints/my-training-run"),
    cluster_config=cluster_config,
    ready_markers=["metadata.json", "config.yaml"],
    checkpoint_patterns=["step_*", "iter_*"],
)
print(f"Found {len(checkpoints)} ready checkpoints")