List Models#
Prerequisites#
Before you can list existing models, make sure that you have:
Access to the NeMo Entity Store Microservice.
How to List Models#
API#
You can send requests to the NeMo Entity Store Microservice to list models.
Make a GET request to the
/v1/models
endpoint.export ENTITY_STORE_BASE_URL=<URL for NeMo Entity Store> curl -X GET "${ENTITY_STORE_BASE_URL}/v1/models" \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' | jq
Check the list of available models by reviewing the response.
Example Response
{ "object": "list", "data": [ { "created_at": "2025-01-07T16:08:49.525322", "updated_at": "2025-01-07T16:08:49.525324", "name": "meta_llama-3_1-8b-instruct-e2e-test.nightly-ndsv2-2025-01-07@1736266127.9570587", "namespace": "default", "description": "None", "spec": { "num_parameters": 8000000000, "context_size": 4096, "num_virtual_tokens": 0, "is_chat": false }, "artifact": { "gpu_arch": "Ampere", "precision": "bf16", "tensor_parallelism": 1, "backend_engine": "nemo", "status": "created", "files_url": "hf://default/meta_llama-3_1-8b-instruct-e2e-test.nightly-ndsv2-2025-01-07@1736266127.9570587" }, "base_model": "meta/llama-3_1-8b-instruct", "peft": { "finetuning_type": "lora" }, "schema_version": "1.0", "project": "customizer", "custom_fields": {} }, { "created_at": "2025-01-07T16:09:50.309769", "updated_at": "2025-01-07T16:09:50.309770", "name": "meta_llama-3_1-8b-instruct-e2e-test.nightly-ndsv2-2025-01-07@1736266189.8198516", "namespace": "default", "description": "None", "spec": { "num_parameters": 8000000000, "context_size": 4096, "num_virtual_tokens": 0, "is_chat": false }, "artifact": { "gpu_arch": "Ampere", "precision": "bf16", "tensor_parallelism": 1, "backend_engine": "nemo", "status": "upload_completed", "files_url": "hf://default/meta_llama-3_1-8b-instruct-e2e-test.nightly-ndsv2-2025-01-07@1736266189.8198516" }, "base_model": "meta/llama-3_1-8b-instruct", "peft": { "finetuning_type": "lora" }, "schema_version": "1.0", "project": "customizer", "custom_fields": {} }, { "created_at": "2025-01-07T16:51:36.891808", "updated_at": "2025-01-07T16:51:36.891810", "name": "test-example-model@agv1", "namespace": "default", "description": "None", "spec": { "num_parameters": 8000000000, "context_size": 4096, "num_virtual_tokens": 0, "is_chat": false }, "artifact": { "gpu_arch": "Ampere", "precision": "bf16", "tensor_parallelism": 1, "backend_engine": "nemo", "status": "created", "files_url": "hf://default/test-example-model@agv1" }, "base_model": "meta/llama-3.1-8b-instruct", "peft": { "finetuning_type": "lora" }, "schema_version": "1.0", "project": "customizer", "custom_fields": {} }, { "created_at": "2025-01-07T16:52:07.544858", "updated_at": "2025-01-07T16:52:07.544860", "name": "test-example-model@agv2", "namespace": "default", "description": "None", "spec": { "num_parameters": 8000000000, "context_size": 4096, "num_virtual_tokens": 0, "is_chat": false }, "artifact": { "gpu_arch": "Ampere", "precision": "bf16", "tensor_parallelism": 1, "backend_engine": "nemo", "status": "created", "files_url": "hf://default/test-example-model@agv2" }, "base_model": "meta/llama-3.1-8b-instruct", "peft": { "finetuning_type": "lora" }, "schema_version": "1.0", "project": "customizer", "custom_fields": {} }, { "created_at": "2025-01-07T22:40:49.220257", "updated_at": "2025-01-07T22:40:49.220258", "name": "meta-llama-3_1-8b-instruct-dataset-3-lora@cust-KhiGQ5WQGBDCe3QyDczW5K", "namespace": "default", "description": "Medical", "spec": { "num_parameters": 8000000000, "context_size": 4096, "num_virtual_tokens": 0, "is_chat": false }, "artifact": { "gpu_arch": "Ampere", "precision": "bf16", "tensor_parallelism": 1, "backend_engine": "nemo", "status": "upload_completed", "files_url": "hf://default/meta-llama-3_1-8b-instruct-dataset-3-lora@cust-KhiGQ5WQGBDCe3QyDczW5K" }, "base_model": "meta/llama-3_1-8b-instruct", "peft": { "finetuning_type": "lora" }, "schema_version": "1.0", "project": "customizer", "custom_fields": {} }, { "created_at": "2025-01-08T04:07:38.330000", "updated_at": "2025-01-08T04:07:38.330002", "name": "model-C6S3JuwuTj8VXcVjUemTJY", "namespace": "default", "description": "A model created by E2E test suite.", "base_model": "meta/llama-3.1-70b-instruct", "prompt": { "inference_params": { "temperature": 1.0, "max_tokens": 128 } }, "schema_version": "1.0", "project": "project-TbUgg5qAme3hBCLBVeRFdG", "custom_fields": {} }, { "created_at": "2025-01-08T11:18:44.703602", "updated_at": "2025-01-08T11:18:44.703604", "name": "test-cust@v2", "namespace": "user1", "description": "None", "spec": { "num_parameters": 8000000000, "context_size": 4096, "num_virtual_tokens": 0, "is_chat": false }, "artifact": { "gpu_arch": "Ampere", "precision": "bf16", "tensor_parallelism": 1, "backend_engine": "nemo", "status": "upload_completed", "files_url": "hf://user1/test-cust@v2" }, "base_model": "gpt8b-4k", "peft": { "finetuning_type": "lora" }, "schema_version": "1.0", "project": "customizer", "custom_fields": {} }, { "created_at": "2025-01-08T11:27:07.989030", "updated_at": "2025-01-08T11:27:07.989031", "name": "test-cust@v3", "namespace": "user1", "description": "None", "spec": { "num_parameters": 8000000000, "context_size": 4096, "num_virtual_tokens": 0, "is_chat": false }, "artifact": { "gpu_arch": "Ampere", "precision": "bf16", "tensor_parallelism": 1, "backend_engine": "nemo", "status": "upload_completed", "files_url": "hf://user1/test-cust@v3" }, "base_model": "gpt8b-4k", "peft": { "finetuning_type": "lora" }, "schema_version": "1.0", "project": "customizer", "custom_fields": {} } ], "pagination": { "page": 1, "page_size": 10, "current_page_size": 10, "total_pages": 73, "total_results": 726 }, "sort": "created_at" }