Python API#

The NeMo Evaluator Launcher provides a Python API for programmatic access to evaluation functionality. This allows you to integrate evaluations into your Python workflows, Jupyter notebooks, and automated pipelines.

Installation#

pip install nemo-evaluator-launcher

# With optional exporters
pip install nemo-evaluator-launcher[mlflow,wandb,gsheets]

Core Functions#

Running Evaluations#

from nemo_evaluator_launcher.api import RunConfig, run_eval

# Run evaluation with configuration
config = RunConfig.from_hydra(
    config_name="local_llama_3_1_8b_instruct",
    config_dir="examples",
    hydra_overrides=[
        "execution.output_dir=my_results"
    ]
)
invocation_id = run_eval(config)

# Returns invocation ID for tracking
print(f"Started evaluation: {invocation_id}")

Listing Available Tasks#

from nemo_evaluator_launcher.api import get_tasks_list

# Get all available evaluation tasks
tasks = get_tasks_list()

# Each task contains: [task_name, endpoint_type, harness, container]
for task in tasks[:5]:
    task_name, endpoint_type, harness, container = task
    print(f"Task: {task_name}, Type: {endpoint_type}")

Checking Job Status#

from nemo_evaluator_launcher.api import get_status

# Check status of a specific invocation or job
status = get_status(["abc12345"])

# Returns list of status dictionaries with keys: invocation, job_id, status, progress, data
for job_status in status:
    print(f"Job {job_status['job_id']}: {job_status['status']}")

Configuration Management#

Creating Configuration with Hydra#

from nemo_evaluator_launcher.api import RunConfig
from omegaconf import OmegaConf

# Load default configuration
config = RunConfig.from_hydra()
print(OmegaConf.to_yaml(config))

Loading Existing Configuration#

from nemo_evaluator_launcher.api import RunConfig

# Load a specific configuration file
config = RunConfig.from_hydra(
    config_name="local_llama_3_1_8b_instruct",
    config_dir="examples"
)

Configuration with Overrides#

import tempfile
from nemo_evaluator_launcher.api import RunConfig, run_eval

# Create configuration with both Hydra overrides and dictionary overrides
config = RunConfig.from_hydra(
    hydra_overrides=[
        "execution.output_dir=" + tempfile.mkdtemp()
    ],
    dict_overrides={
        "target": {
            "api_endpoint": {
                "url": "https://integrate.api.nvidia.com/v1/chat/completions",
                "model_id": "meta/llama-3.1-8b-instruct",
                "api_key_name": "NGC_API_KEY"
            }
        },
        "evaluation": [
            {
                "name": "ifeval",
                "overrides": {
                    "config.params.limit_samples": 10
                }
            }
        ]
    }
)

# Run evaluation
invocation_id = run_eval(config)

Exploring Deployment Options#

from nemo_evaluator_launcher.api import RunConfig
from omegaconf import OmegaConf

# Load configuration with different deployment backend
config = RunConfig.from_hydra(
    hydra_overrides=["deployment=vllm"]
)
print(OmegaConf.to_yaml(config))

Jupyter Notebook Integration#

# Cell 1: Setup
import tempfile
from omegaconf import OmegaConf
from nemo_evaluator_launcher.api import RunConfig, get_status, get_tasks_list, run_eval

# Cell 2: List available tasks
tasks = get_tasks_list()
print("Available tasks:")
for task in tasks[:10]:  # Show first 10
    print(f"  - {task[0]} ({task[1]})")

# Cell 3: Create and run evaluation
config = RunConfig.from_hydra(
    hydra_overrides=[
        "execution.output_dir=" + tempfile.mkdtemp()
    ],
    dict_overrides={
        "target": {
            "api_endpoint": {
                "url": "https://integrate.api.nvidia.com/v1/chat/completions",
                "model_id": "meta/llama-3.1-8b-instruct",
                "api_key_name": "NGC_API_KEY"
            }
        },
        "evaluation": [
            {
                "name": "ifeval",
                "overrides": {
                    "config.params.limit_samples": 10
                }
            }
        ]
    }
)
invocation_id = run_eval(config)
print(f"Started evaluation: {invocation_id}")

# Cell 4: Check status
status_list = get_status([invocation_id])
status = status_list[0]
print(f"Status: {status['status']}")
print(f"Output directory: {status['data']['output_dir']}")

See Also#

CLI Reference - Command-line interface documentation
Configuration - Configuration system overview
Exporters - Result export options