Create an Evaluation Configuration#

To create a configuration for an evaluation, send a POST request to the /evaluation/configs API. The URL of the evaluator API depends on where you deploy evaluator and how you configure it. For more information, refer to Job Target and Configuration Matrix.

Prerequisites#

Set your EVALUATOR_BASE_URL environment variable to your evaluator service endpoint:
```
export EVALUATOR_BASE_URL="https://your-evaluator-service-endpoint"
```
Review the available evaluation configuration types

curl

import os
from nemo_microservices import NeMoMicroservices

# Initialize the client
client = NeMoMicroservices(
    base_url=os.environ['EVALUATOR_BASE_URL']
)

# Create an evaluation config
config = client.evaluation.configs.create(
    type="gsm8k",
    name="my-configuration-lm-harness-gsm8k-1",
    namespace="my-organization",
    params={
        "temperature": 0.00001,
        "top_p": 0.00001,
        "max_tokens": 256,
        "stop": ["<|eot|>"],
        "extra": {
            "num_fewshot": 8,
            "batch_size": 16,
            "bootstrap_iters": 100000,
            "dataset_seed": 42,
            "use_greedy": True,
            "top_k": 1,
            "hf_token": "<my-token>",
            "tokenizer_backend": "hf",
            "tokenizer": "meta-llama/Llama-3.1-8B-Instruct",
            "apply_chat_template": True,
            "fewshot_as_multiturn": True
        }
    }
)

print("Evaluation config created successfully")

Python

curl -X "POST" "${EVALUATOR_BASE_URL}/evaluation/configs" \
    -H 'accept: application/json' \
    -H 'Content-Type: application/json' \
    -d '{
        "type": "gsm8k",
        "name": "my-configuration-lm-harness-gsm8k-1",
        "namespace": "my-organization",
        "params": {
            "temperature": 0.00001,
            "top_p": 0.00001,
            "max_tokens": 256,
            "stop": ["<|eot|>"],
            "extra": {
                "num_fewshot": 8,
                "batch_size": 16,
                "bootstrap_iters": 100000,
                "dataset_seed": 42,
                "use_greedy": true,
                "top_k": 1,
                "hf_token": "<my-token>",
                "tokenizer_backend": "hf",
                "tokenizer": "meta-llama/Llama-3.1-8B-Instruct",
                "apply_chat_template": true,
                "fewshot_as_multiturn": true
            }
        }
    }'

Options#

API#

Perform a POST request to the /v1/evaluation/configs endpoint.

curl

curl -X "POST" "${EVALUATOR_SERVICE_URL}/v1/evaluation/configs" \
    -H 'accept: application/json' \
    -H 'Content-Type: application/json' \
    -d '
    {
        "type": "<evaluation-type>",
        "name": "<my-configuration-name>",
        "namespace": "<my-namespace>",
        // More config details
    }'

Python

data = {
    "type": "<evaluation-type>",
    "name": "<my-configuration-name>",
    "namespace": "<my-namespace>",
    # More config details
}

endpoint = f"{EVALUATOR_SERVICE_URL}/v1/evaluation/configs"

response = requests.post(endpoint, json=data).json()

Review the returned configuration.

SDK#

Python

from nemo_microservices import NeMoMicroservices

# Initialize the client
client = NeMoMicroservices(
    base_url=f"{EVALUATOR_SERVICE_URL}"
)

# Create an evaluation config
config = client.evaluation.configs.create(
    type="gsm8k",
    name="my-configuration-lm-harness-gsm8k-1",
    namespace="my-organization",
    params={
        "temperature": 0.00001,
        "top_p": 0.00001,
        "max_tokens": 256,
        "stop": ["<|eot|>"],
        "extra": {
            "num_fewshot": 8,
            "batch_size": 16,
            "bootstrap_iters": 100000,
            "dataset_seed": 42,
            "use_greedy": True,
            "top_k": 1,
            "hf_token": "<my-token>",
            "tokenizer_backend": "hf",
            "tokenizer": "meta-llama/Llama-3.1-8B-Instruct",
            "apply_chat_template": True,
            "fewshot_as_multiturn": True
        }
    }
)

print("Evaluation config created successfully")

Python (Async)

import asyncio
from nemo_microservices import AsyncNeMoMicroservices

async def create_evaluation_config():
    # Initialize the async client
    client = AsyncNeMoMicroservices(
        base_url=f"{EVALUATOR_SERVICE_URL}"
    )
    
    # Create an evaluation config
    config = await client.evaluation.configs.create(
        type="gsm8k",
        name="my-configuration-lm-harness-gsm8k-1",
        namespace="my-organization",
        params={
            "temperature": 0.00001,
            "top_p": 0.00001,
            "max_tokens": 256,
            "stop": ["<|eot|>"],
            "extra": {
                "num_fewshot": 8,
                "batch_size": 16,
                "bootstrap_iters": 100000,
                "dataset_seed": 42,
                "use_greedy": True,
                "top_k": 1,
                "hf_token": "<my-token>",
                "tokenizer_backend": "hf",
                "tokenizer": "meta-llama/Llama-3.1-8B-Instruct",
                "apply_chat_template": True,
                "fewshot_as_multiturn": True
            }
        }
    )
    
    print("Evaluation config created successfully")
    return config

# Run the async function
asyncio.run(create_evaluation_config())