Create Customization Config#
Prerequisites#
Before you can create a customization job, make sure that you have:
Set the
CUSTOMIZER_BASE_URL
environment variable to your NeMo Customizer service endpoint
export CUSTOMIZER_BASE_URL="https://your-customizer-service-url"
To Create a Customization Config#
You can create a customization configuration in the following ways.
The following example defines a pod_spec
that allows jobs to be scheduled only on nodes tainted with app=a100-workload
by specifying the required toleration.
For more information about GPU cluster configurations, see Configure Cluster GPUs.
import os
from nemo_microservices import NeMoMicroservices
# Initialize the client
client = NeMoMicroservices(
base_url=os.environ['CUSTOMIZER_BASE_URL']
)
# Create a customization config
config = client.customization.configs.create(
name="llama-3.1-8b-instruct@v1.0.0+A100",
namespace="default",
description="Configuration for Llama 3.1 8B on A100 GPUs",
target="meta/llama-3.1-8b-instruct@2.0",
training_options=[
{
"training_type": "sft",
"finetuning_type": "lora",
"num_gpus": 2,
"micro_batch_size": 8,
"tensor_parallel_size": 1,
"pipeline_parallel_size": 1,
"use_sequence_parallel": False
}
],
training_precision="bf16",
max_seq_length=2048,
prompt_template="{input} {output}",
pod_spec={
"tolerations": [
{
"key": "app",
"operator": "Equal",
"value": "a100-workload",
"effect": "NoSchedule"
}
],
"node_selectors": {
"nvidia.com/gpu.product": "NVIDIA-A100-80GB"
},
"annotations": {
"sidecar.istio.io/inject": "false"
}
}
)
print(f"Created config: {config.name}")
print(f"Config ID: {config.id}")
curl -X POST \
"${CUSTOMIZER_BASE_URL}/customization/configs" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"name": "llama-3.1-8b-instruct@v1.0.0+A100",
"namespace": "default",
"description": "Configuration for Llama 3.1 8B on A100 GPUs",
"target": "meta/llama-3.1-8b-instruct@2.0",
"training_options": [
{
"training_type": "sft",
"finetuning_type": "lora",
"num_gpus": 2,
"micro_batch_size": 8,
"tensor_parallel_size": 1,
"pipeline_parallel_size": 1,
"use_sequence_parallel": false
}
],
"training_precision": "bf16",
"max_seq_length": 2048,
"prompt_template": "{input} {output}",
"pod_spec": {
"tolerations": [
{
"key": "app",
"operator": "Equal",
"value": "a100-workload",
"effect": "NoSchedule"
}
],
"node_selectors": {
"nvidia.com/gpu.product": "NVIDIA-A100-80GB"
},
"annotations": {
"sidecar.istio.io/inject": "false"
}
}
}' | jq
Example Response
{
"id": "customization_config-MedVscVbr4pgLhLgKTLbv9",
"name": "llama-3.1-8b-instruct@v1.0.0+A100",
"namespace": "default",
"description": "Configuration for Llama 3.1 8B on A100 GPUs",
"target": {
"id": "customization_target-AbCdEfGhIjKlMnOpQrStUv",
"name": "meta/llama-3.1-8b-instruct@2.0",
"namespace": "default",
"base_model": "meta/llama-3.1-8b-instruct",
"enabled": true,
"num_parameters": 8000000000,
"precision": "bf16",
"status": "ready"
},
"training_options": [
{
"training_type": "sft",
"finetuning_type": "lora",
"num_gpus": 2,
"num_nodes": 1,
"micro_batch_size": 8,
"tensor_parallel_size": 1,
"pipeline_parallel_size": 1,
"use_sequence_parallel": false
}
],
"training_precision": "bf16",
"max_seq_length": 2048,
"pod_spec": {
"tolerations": [
{
"key": "app",
"operator": "Equal",
"value": "a100-workload",
"effect": "NoSchedule"
}
],
"node_selectors": {
"nvidia.com/gpu.product": "NVIDIA-A100-80GB"
},
"annotations": {
"sidecar.istio.io/inject": "false"
}
},
"prompt_template": "{input} {output}",
"chat_prompt_template": null,
"dataset_schemas": [],
"custom_fields": {},
"ownership": {},
"created_at": "2024-01-15T10:30:00.000Z",
"updated_at": "2024-01-15T10:30:00.000Z"
}