Create an Evaluation Configuration#
To create a configuration for an evaluation, send a POST
request to the /evaluation/configs
API. The URL of the evaluator API depends on where you deploy evaluator and how you configure it. For more information, refer to Job Target and Configuration Matrix.
Prerequisites#
Set your
EVALUATOR_BASE_URL
environment variable to your evaluator service endpoint:export EVALUATOR_BASE_URL="https://your-evaluator-service-endpoint"
Review the available evaluation configuration types
import os
from nemo_microservices import NeMoMicroservices
# Initialize the client
client = NeMoMicroservices(
base_url=os.environ['EVALUATOR_BASE_URL']
)
# Create an evaluation config
config = client.evaluation.configs.create(
type="gsm8k",
name="my-configuration-lm-harness-gsm8k-1",
namespace="my-organization",
params={
"temperature": 0.00001,
"top_p": 0.00001,
"max_tokens": 256,
"stop": ["<|eot|>"],
"extra": {
"num_fewshot": 8,
"batch_size": 16,
"bootstrap_iters": 100000,
"dataset_seed": 42,
"use_greedy": True,
"top_k": 1,
"hf_token": "<my-token>",
"tokenizer_backend": "hf",
"tokenizer": "meta-llama/Llama-3.1-8B-Instruct",
"apply_chat_template": True,
"fewshot_as_multiturn": True
}
}
)
print("Evaluation config created successfully")
curl -X "POST" "${EVALUATOR_BASE_URL}/evaluation/configs" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"type": "gsm8k",
"name": "my-configuration-lm-harness-gsm8k-1",
"namespace": "my-organization",
"params": {
"temperature": 0.00001,
"top_p": 0.00001,
"max_tokens": 256,
"stop": ["<|eot|>"],
"extra": {
"num_fewshot": 8,
"batch_size": 16,
"bootstrap_iters": 100000,
"dataset_seed": 42,
"use_greedy": true,
"top_k": 1,
"hf_token": "<my-token>",
"tokenizer_backend": "hf",
"tokenizer": "meta-llama/Llama-3.1-8B-Instruct",
"apply_chat_template": true,
"fewshot_as_multiturn": true
}
}
}'
Options#
API#
Perform a POST request to the
/v1/evaluation/configs
endpoint.curl -X "POST" "${EVALUATOR_SERVICE_URL}/v1/evaluation/configs" \ -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -d ' { "type": "<evaluation-type>", "name": "<my-configuration-name>", "namespace": "<my-namespace>", // More config details }'
data = { "type": "<evaluation-type>", "name": "<my-configuration-name>", "namespace": "<my-namespace>", # More config details } endpoint = f"{EVALUATOR_SERVICE_URL}/v1/evaluation/configs" response = requests.post(endpoint, json=data).json()
Review the returned configuration.
Example Response
{ "created_at": "2025-03-19T22:50:02.206136", "updated_at": "2025-03-19T22:50:02.206138", "id": "eval-config-MNOP1234QRST5678", "name": "my-configuration-lm-harness-gsm8k-1", "namespace": "my-organization", "type": "gsm8k", "params": { "temperature": 0.00001, "top_p": 0.00001, "max_tokens": 256, "stop": ["<|eot|>"], "extra": { "num_fewshot": 8, "batch_size": 16, "bootstrap_iters": 100000, "dataset_seed": 42, "use_greedy": true, "top_k": 1, "hf_token": "<my-token>", "tokenizer_backend": "hf", "tokenizer": "meta-llama/Llama-3.1-8B-Instruct", "apply_chat_template": true, "fewshot_as_multiturn": true } }, "custom_fields": {} }
SDK#
from nemo_microservices import NeMoMicroservices
# Initialize the client
client = NeMoMicroservices(
base_url=f"{EVALUATOR_SERVICE_URL}"
)
# Create an evaluation config
config = client.evaluation.configs.create(
type="gsm8k",
name="my-configuration-lm-harness-gsm8k-1",
namespace="my-organization",
params={
"temperature": 0.00001,
"top_p": 0.00001,
"max_tokens": 256,
"stop": ["<|eot|>"],
"extra": {
"num_fewshot": 8,
"batch_size": 16,
"bootstrap_iters": 100000,
"dataset_seed": 42,
"use_greedy": True,
"top_k": 1,
"hf_token": "<my-token>",
"tokenizer_backend": "hf",
"tokenizer": "meta-llama/Llama-3.1-8B-Instruct",
"apply_chat_template": True,
"fewshot_as_multiturn": True
}
}
)
print("Evaluation config created successfully")
import asyncio
from nemo_microservices import AsyncNeMoMicroservices
async def create_evaluation_config():
# Initialize the async client
client = AsyncNeMoMicroservices(
base_url=f"{EVALUATOR_SERVICE_URL}"
)
# Create an evaluation config
config = await client.evaluation.configs.create(
type="gsm8k",
name="my-configuration-lm-harness-gsm8k-1",
namespace="my-organization",
params={
"temperature": 0.00001,
"top_p": 0.00001,
"max_tokens": 256,
"stop": ["<|eot|>"],
"extra": {
"num_fewshot": 8,
"batch_size": 16,
"bootstrap_iters": 100000,
"dataset_seed": 42,
"use_greedy": True,
"top_k": 1,
"hf_token": "<my-token>",
"tokenizer_backend": "hf",
"tokenizer": "meta-llama/Llama-3.1-8B-Instruct",
"apply_chat_template": True,
"fewshot_as_multiturn": True
}
}
)
print("Evaluation config created successfully")
return config
# Run the async function
asyncio.run(create_evaluation_config())