List Models#
Prerequisites#
Before you can list existing models, make sure that you have:
Access to the NeMo Entity Store Microservice.
To List Models#
Choose one of the following options of listing models.
Set up a NeMoMicroservices
client instance using the base URL of the NeMo Entity Store microservice and perform the task as follows.
from nemo_microservices import NeMoMicroservices
client = NeMoMicroservices(
base_url=os.environ["ENTITY_STORE_BASE_URL"]
)
response = client.models.list()
print(response)
Make a GET request to the /v1/models
endpoint.
export ENTITY_STORE_BASE_URL=<URL for NeMo Entity Store>
curl -X GET "${ENTITY_STORE_BASE_URL}/v1/models" \
-H 'Accept: application/json' \
-H 'Content-Type: application/json' | jq
Example Response
{
"object": "list",
"data": [
{
"created_at": "2025-01-07T16:08:49.525322",
"updated_at": "2025-01-07T16:08:49.525324",
"name": "meta_llama-3_1-8b-instruct-e2e-test.nightly-ndsv2-2025-01-07@1736266127.9570587",
"namespace": "default",
"description": "None",
"spec": {
"num_parameters": 8000000000,
"context_size": 4096,
"num_virtual_tokens": 0,
"is_chat": false
},
"artifact": {
"gpu_arch": "Ampere",
"precision": "bf16",
"tensor_parallelism": 1,
"backend_engine": "nemo",
"status": "created",
"files_url": "hf://default/meta_llama-3_1-8b-instruct-e2e-test.nightly-ndsv2-2025-01-07@1736266127.9570587"
},
"base_model": "meta/llama-3_1-8b-instruct",
"peft": {
"finetuning_type": "lora"
},
"schema_version": "1.0",
"project": "customizer",
"custom_fields": {}
},
{
"created_at": "2025-01-07T16:09:50.309769",
"updated_at": "2025-01-07T16:09:50.309770",
"name": "meta_llama-3_1-8b-instruct-e2e-test.nightly-ndsv2-2025-01-07@1736266189.8198516",
"namespace": "default",
"description": "None",
"spec": {
"num_parameters": 8000000000,
"context_size": 4096,
"num_virtual_tokens": 0,
"is_chat": false
},
"artifact": {
"gpu_arch": "Ampere",
"precision": "bf16",
"tensor_parallelism": 1,
"backend_engine": "nemo",
"status": "upload_completed",
"files_url": "hf://default/meta_llama-3_1-8b-instruct-e2e-test.nightly-ndsv2-2025-01-07@1736266189.8198516"
},
"base_model": "meta/llama-3_1-8b-instruct",
"peft": {
"finetuning_type": "lora"
},
"schema_version": "1.0",
"project": "customizer",
"custom_fields": {}
},
{
"created_at": "2025-01-07T16:51:36.891808",
"updated_at": "2025-01-07T16:51:36.891810",
"name": "test-example-model@agv1",
"namespace": "default",
"description": "None",
"spec": {
"num_parameters": 8000000000,
"context_size": 4096,
"num_virtual_tokens": 0,
"is_chat": false
},
"artifact": {
"gpu_arch": "Ampere",
"precision": "bf16",
"tensor_parallelism": 1,
"backend_engine": "nemo",
"status": "created",
"files_url": "hf://default/test-example-model@agv1"
},
"base_model": "meta/llama-3.1-8b-instruct",
"peft": {
"finetuning_type": "lora"
},
"schema_version": "1.0",
"project": "customizer",
"custom_fields": {}
},
{
"created_at": "2025-01-07T16:52:07.544858",
"updated_at": "2025-01-07T16:52:07.544860",
"name": "test-example-model@agv2",
"namespace": "default",
"description": "None",
"spec": {
"num_parameters": 8000000000,
"context_size": 4096,
"num_virtual_tokens": 0,
"is_chat": false
},
"artifact": {
"gpu_arch": "Ampere",
"precision": "bf16",
"tensor_parallelism": 1,
"backend_engine": "nemo",
"status": "created",
"files_url": "hf://default/test-example-model@agv2"
},
"base_model": "meta/llama-3.1-8b-instruct",
"peft": {
"finetuning_type": "lora"
},
"schema_version": "1.0",
"project": "customizer",
"custom_fields": {}
},
{
"created_at": "2025-01-07T22:40:49.220257",
"updated_at": "2025-01-07T22:40:49.220258",
"name": "meta-llama-3_1-8b-instruct-dataset-3-lora@cust-KhiGQ5WQGBDCe3QyDczW5K",
"namespace": "default",
"description": "Medical",
"spec": {
"num_parameters": 8000000000,
"context_size": 4096,
"num_virtual_tokens": 0,
"is_chat": false
},
"artifact": {
"gpu_arch": "Ampere",
"precision": "bf16",
"tensor_parallelism": 1,
"backend_engine": "nemo",
"status": "upload_completed",
"files_url": "hf://default/meta-llama-3_1-8b-instruct-dataset-3-lora@cust-KhiGQ5WQGBDCe3QyDczW5K"
},
"base_model": "meta/llama-3_1-8b-instruct",
"peft": {
"finetuning_type": "lora"
},
"schema_version": "1.0",
"project": "customizer",
"custom_fields": {}
},
{
"created_at": "2025-01-08T04:07:38.330000",
"updated_at": "2025-01-08T04:07:38.330002",
"name": "model-C6S3JuwuTj8VXcVjUemTJY",
"namespace": "default",
"description": "A model created by E2E test suite.",
"base_model": "meta/llama-3.1-70b-instruct",
"prompt": {
"inference_params": {
"temperature": 1.0,
"max_tokens": 128
}
},
"schema_version": "1.0",
"project": "project-TbUgg5qAme3hBCLBVeRFdG",
"custom_fields": {}
},
{
"created_at": "2025-01-08T11:18:44.703602",
"updated_at": "2025-01-08T11:18:44.703604",
"name": "test-cust@v2",
"namespace": "user1",
"description": "None",
"spec": {
"num_parameters": 8000000000,
"context_size": 4096,
"num_virtual_tokens": 0,
"is_chat": false
},
"artifact": {
"gpu_arch": "Ampere",
"precision": "bf16",
"tensor_parallelism": 1,
"backend_engine": "nemo",
"status": "upload_completed",
"files_url": "hf://user1/test-cust@v2"
},
"base_model": "gpt8b-4k",
"peft": {
"finetuning_type": "lora"
},
"schema_version": "1.0",
"project": "customizer",
"custom_fields": {}
},
{
"created_at": "2025-01-08T11:27:07.989030",
"updated_at": "2025-01-08T11:27:07.989031",
"name": "test-cust@v3",
"namespace": "user1",
"description": "None",
"spec": {
"num_parameters": 8000000000,
"context_size": 4096,
"num_virtual_tokens": 0,
"is_chat": false
},
"artifact": {
"gpu_arch": "Ampere",
"precision": "bf16",
"tensor_parallelism": 1,
"backend_engine": "nemo",
"status": "upload_completed",
"files_url": "hf://user1/test-cust@v3"
},
"base_model": "gpt8b-4k",
"peft": {
"finetuning_type": "lora"
},
"schema_version": "1.0",
"project": "customizer",
"custom_fields": {}
}
],
"pagination": {
"page": 1,
"page_size": 10,
"current_page_size": 10,
"total_pages": 73,
"total_results": 726
},
"sort": "created_at"
}