> For clean Markdown of any page, append .md to the page URL.
> For a complete documentation index, see https://docs.nvidia.com/nemo-platform/llms.txt.
> For AI client integration (Claude Code, Cursor, etc.), connect to the MCP server at https://docs.nvidia.com/nemo-platform/_mcp/server.

# Create Model

POST https://host.com/apis/models/v2/workspaces/{workspace}/models
Content-Type: application/json

Create a new model entity.

This endpoint creates a new Model Entity in the Models service database.
The Model Entity will be registered for use within the platform.

Reference: https://docs.nvidia.com/nemo-platform/nemo-platform/documentation/reference/api-reference/models/create-model-apis-models-v-2-workspaces-workspace-models-post

## OpenAPI Specification

```yaml
openapi: 3.1.0
info:
  title: Nemo Platform API
  version: 1.0.0
paths:
  /apis/models/v2/workspaces/{workspace}/models:
    post:
      operationId: create-model-apis-models-v-2-workspaces-workspace-models-post
      summary: Create Model
      description: |-
        Create a new model entity.

        This endpoint creates a new Model Entity in the Models service database.
        The Model Entity will be registered for use within the platform.
      tags:
        - subpackage_models
      parameters:
        - name: workspace
          in: path
          required: true
          schema:
            type: string
      responses:
        '201':
          description: Create a new model entity
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ModelEntity'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateModelEntityRequest'
servers:
  - url: https://host.com
    description: Default
components:
  schemas:
    MoEConfig:
      type: object
      properties:
        num_experts:
          type: integer
          description: Total number of routed experts (sharded by EP)
        num_experts_per_tok:
          type: integer
          description: Number of experts activated per token (top-k routing)
        num_expert_layers:
          type: integer
          description: Number of layers with MoE
        expert_ffn_size:
          type: integer
          description: FFN size for experts (if different from main FFN)
        num_shared_experts:
          type: integer
          default: 0
          description: Number of shared experts (replicated, not sharded by EP)
      required:
        - num_experts
        - num_experts_per_tok
        - num_expert_layers
      description: Mixture of Experts configuration.
      title: MoEConfig
    MambaConfig:
      type: object
      properties:
        is_hybrid:
          type: boolean
          description: Whether model is Mamba-Transformer hybrid
        num_mamba_layers:
          type: integer
          description: Number of Mamba/SSM layers
        num_attention_layers:
          type: integer
          default: 0
          description: Number of attention layers (for hybrids)
        num_mlp_layers:
          type: integer
          default: 0
          description: Number of standalone MLP layers (for interleaved architectures)
        state_size:
          type: integer
          default: 16
          description: SSM state expansion factor (d_state)
        conv_kernel:
          type: integer
          default: 4
          description: Convolution kernel size for Mamba (d_conv)
      required:
        - is_hybrid
        - num_mamba_layers
      description: Mamba/State Space Model configuration.
      title: MambaConfig
    SlidingWindowConfig:
      type: object
      properties:
        window_size:
          type: integer
          description: Sliding window size (attends to last N tokens)
      required:
        - window_size
      description: Sliding window attention configuration.
      title: SlidingWindowConfig
    LinearLayerSpec:
      type: object
      properties:
        name:
          type: string
          description: Module name (e.g., 'model.layers.0.self_attn.q_proj')
        in_features:
          type: integer
          description: Input feature dimension
        out_features:
          type: integer
          description: Output feature dimension
      required:
        - name
        - in_features
        - out_features
      description: Specification for a single linear layer in the model.
      title: LinearLayerSpec
    ToolCallConfig:
      type: object
      properties:
        tool_call_parser:
          type: string
          description: >-
            Name of the tool call parser to use (e.g., 'openai', 'hermes',
            'pythonic', 'llama3_json', 'mistral').
        tool_call_plugin:
          type: string
          description: >-
            Reference to a fileset containing the custom tool call plugin Python
            file. Expected format: '{workspace}/{fileset_name}'. The fileset is
            mounted separately from the model checkpoint at deployment time.
        auto_tool_choice:
          type: boolean
          description: >-
            Whether to enable automatic tool choice. When enabled, the model can
            decide to call tools without explicit user instruction.
      description: Configuration for tool calling support in NIM deployments.
      title: ToolCallConfig
    ModelSpec:
      type: object
      properties:
        context_size:
          type: integer
          description: Context window size
        num_virtual_tokens:
          type: integer
          description: Number of virtual tokens for prompt tuning
        is_chat:
          type: boolean
          description: Whether this is a chat model
        is_embedding_model:
          type: boolean
          default: false
          description: Whether this is an embedding model
        checkpoint_model_name:
          type: string
          description: Checkpoint Model identifier or model path
        family:
          type: string
          description: Model architecture family (e.g., 'llama', 'mixtral', 'gpt2')
        num_layers:
          type: integer
          description: Number of transformer layers
        hidden_size:
          type: integer
          description: Hidden dimension size
        num_attention_heads:
          type: integer
          description: Number of attention heads
        num_kv_heads:
          type: integer
          description: Number of key-value heads (for GQA/MQA)
        ffn_hidden_size:
          type: integer
          description: FFN intermediate size
        vocab_size:
          type: integer
          description: Vocabulary size
        tied_embeddings:
          type: boolean
          description: Whether embeddings are tied
        gated_mlp:
          type: boolean
          description: Whether MLP uses gated activation
        base_num_parameters:
          type: integer
          description: Total model parameters
        precision:
          type: string
          description: >-
            Model precision (e.g., 'float16', 'bfloat16', 'float32', 'int8',
            'int4')
        moe_config:
          $ref: '#/components/schemas/MoEConfig'
          description: MoE configuration if applicable
        mamba_config:
          $ref: '#/components/schemas/MambaConfig'
          description: Mamba/SSM configuration if applicable
        sliding_window_config:
          $ref: '#/components/schemas/SlidingWindowConfig'
          description: Sliding window attention config if applicable
        linear_layers:
          type: array
          items:
            $ref: '#/components/schemas/LinearLayerSpec'
          description: >-
            List of all linear/Conv1D layers with their dimensions. Used for
            LoRA parameter estimation without requiring model instantiation.
            Each entry contains the module name, in_features, and out_features.
        chat_template:
          type: string
          description: >-
            Jinja2 chat template string for the model. Used by NIM to format
            chat completions. If not set, the model's built-in tokenizer
            template is used.
        tool_call_config:
          $ref: '#/components/schemas/ToolCallConfig'
          description: >-
            Tool calling configuration for NIM deployments. Controls how the
            model handles function/tool calling in chat completions.
        minimum_gpus_all_weights:
          type: integer
          description: >-
            Minimum GPUs required for full fine-tuning using default
            configurations.
        minimum_gpus_lora:
          type: integer
          description: >-
            Minimum GPUs required for LoRA fine-tuning using default
            configurations.
      required:
        - checkpoint_model_name
        - family
        - num_layers
        - hidden_size
        - num_attention_heads
        - num_kv_heads
        - ffn_hidden_size
        - vocab_size
        - tied_embeddings
        - gated_mlp
        - base_num_parameters
        - precision
      description: Detailed specification for a model.
      title: ModelSpec
    FinetuningType:
      type: string
      enum:
        - lora_merged
        - all_weights
        - last_layer
        - top_layers
        - gradual_unfreezing
        - bias_only
        - attention_only
        - lora
        - qlora
        - adalora
        - dora
        - lora_plus
        - prompt_tuning
        - prefix_tuning
        - p_tuning
        - p_tuning_v2
        - soft_prompt
        - ppo
        - dpo
        - cdpo
        - ipo
        - orpo
        - kto
        - rrhf
        - grpo
      description: Finetuning types.
      title: FinetuningType
    APIEndpointData:
      type: object
      properties:
        url:
          type: string
          format: uri
          description: Endpoint URL
        model_id:
          type: string
          description: Model identifier at the endpoint
        api_key:
          type: string
          description: API key for authentication
        format:
          type: string
          description: API format (e.g., openai, nvidia)
      description: Data about an inference endpoint.
      title: APIEndpointData
    BackendFormat:
      type: string
      enum:
        - OPENAI_CHAT
        - ANTHROPIC_MESSAGES
      description: >-
        Inference backend API wire formats understood by IGW and middleware
        plugins.
      title: BackendFormat
    InferenceParams:
      type: object
      properties:
        model:
          type: string
          description: Model identifier
        temperature:
          type: number
          format: double
          description: >-
            Float value between 0 and 1. temp of 0 indicates greedy decoding,
            where the token with highest prob is chosen. Temperature can't be
            set to 0.0 currently
        max_tokens:
          type: integer
          description: Max tokens to generate
        max_completion_tokens:
          type: integer
          description: Max tokens to generate
        top_p:
          type: number
          format: double
          description: >-
            Float value between 0 and 1; limits to the top tokens within a
            certain probability. top_p=0 means the model will only consider the
            single most likely token for the next prediction
        stop:
          type: array
          items:
            type: string
      description: >-
        Parameters for model inference. Extra fields can be supplied for
        additional options applied to the inference request directly. Fields not
        supported by the model may cause inference errors during evaluation.
      title: InferenceParams
    PromptData:
      type: object
      properties:
        system_prompt:
          type: string
          description: System prompt template
        icl_few_shot_examples:
          type: string
          description: In-context learning examples
        inference_params:
          $ref: '#/components/schemas/InferenceParams'
          description: Inference parameters that should be overridden.
        system_prompt_template:
          type: string
          description: >-
            The template which will be used to compile the final prompt used for
            prompting the LLM. Currently supports only {{icl_few_shot_examples}}
      description: Configuration for prompt engineering.
      title: PromptData
    CreateModelEntityRequest:
      type: object
      properties:
        name:
          type: string
          description: >-
            Name of the model entity. Allowed characters: letters (a-z, A-Z),
            digits (0-9), underscores, hyphens, and dots.
        project:
          type: string
          description: The URN of the project associated with this model entity
        description:
          type: string
          description: Optional description of the model
        spec:
          $ref: '#/components/schemas/ModelSpec'
          description: >-
            Detailed specification for the model - Automatically generated by
            the platform at creation when fileset provided.
        finetuning_type:
          $ref: '#/components/schemas/FinetuningType'
          description: Set for full weight finetuned models
        fileset:
          type: string
          description: >-
            A set of checkpoint files, configs, and other auxiliary info
            associated with this model - expected format
            {workspace}/{fileset_name}
        base_model:
          type: string
          description: Link to another model which is used as a base for the current model
        api_endpoint:
          $ref: '#/components/schemas/APIEndpointData'
          description: Data about the inference endpoint for this model
        backend_format:
          oneOf:
            - $ref: '#/components/schemas/BackendFormat'
            - type: 'null'
          description: >-
            Inference API wire format expected by the backend. If unset,
            inference routing treats the model as OPENAI_CHAT.
        prompt:
          $ref: '#/components/schemas/PromptData'
          description: Configuration for prompt engineering
        custom_fields:
          type: object
          additionalProperties:
            description: Any type
          description: Custom fields for additional metadata
        ownership:
          type: object
          additionalProperties:
            description: Any type
          description: Ownership information for the model
        model_providers:
          type: array
          items:
            type: string
          description: >-
            List of ModelProvider workspace/name resource names that provide
            inference for this Model Entity
        trust_remote_code:
          type: boolean
          default: false
          description: |-
            Whether to trust remote code for the checkpoint.
                    Some models without support in certain libraries such as Transformers require additional custom Python code to execute.
                    Due to security ramifications of running arbitrary code, this can only be set to true on one of the following conditions:
                    (1) the model's fileset's source is pre-approved in the platform config, or
                    (2) the user creating this model is an administrator.
                    
      required:
        - name
      description: Request model for creating a Model Entity.
      title: CreateModelEntityRequest
    Lora:
      type: object
      properties:
        alpha:
          type: integer
          description: Alpha scaling used for this adapter
        rank:
          type: integer
          description: LoRA Rank
      required:
        - rank
      title: Lora
    Adapter:
      type: object
      properties:
        name:
          type: string
          description: >-
            Name of the adapter. Name must be unique in the workspace for all
            Adapters and match the following regex: Allowed characters: letters
            (a-z, A-Z), digits (0-9), underscores, hyphens, and dots.
        workspace:
          type: string
          description: >-
            Workspace of the adapter. Allowed characters: letters (a-z, A-Z),
            digits (0-9), underscores, hyphens, and dots.
        description:
          type: string
          description: Optional description of the adapter
        fileset:
          type: string
          description: >-
            Fileset where the adapter files are stored expected format
            {workspace}/{fileset_name}
        finetuning_type:
          $ref: '#/components/schemas/FinetuningType'
          description: Type of finetuning (LORA, P_TUNING, etc.)
        enabled:
          type: boolean
          default: true
          description: Whether to make this adapter available for inference post training
        lora_config:
          $ref: '#/components/schemas/Lora'
          description: Lora configuration specifics
        model:
          type: string
          description: >-
            Parent model entity reference. A single name (2-63 characters) or
            'workspace/model_name' where each segment is a valid name
            (lowercase, digits, hyphens, and temporarily @ . + _; no
            leading/trailing or consecutive hyphens). If one slash, both sides
            must be non-empty.
        created_at:
          type: string
          format: date-time
        updated_at:
          type: string
          format: date-time
      required:
        - name
        - workspace
        - fileset
        - finetuning_type
      title: Adapter
    ModelEntity:
      type: object
      properties:
        id:
          type: string
          description: Autogenerated id
        name:
          type: string
          description: >-
            Name of the entity. Name/workspace combo must be unique across all
            entities. Allowed characters: letters (a-z, A-Z), digits (0-9),
            underscores, hyphens, and dots.
        workspace:
          type: string
          description: >-
            The workspace of the entity. Allowed characters: letters (a-z, A-Z),
            digits (0-9), underscores, hyphens, and dots.
        project:
          type: string
          description: The URN of the project associated with this model entity.
        created_at:
          type: string
          format: date-time
          description: The timestamp of model entity creation
        updated_at:
          type: string
          format: date-time
          description: The timestamp of the last model entity update
        description:
          type: string
          description: Optional description of the model.
        spec:
          $ref: '#/components/schemas/ModelSpec'
          description: Detailed specification for the model
        finetuning_type:
          $ref: '#/components/schemas/FinetuningType'
          description: Set for full weight finetuned models
        fileset:
          type: string
          description: >-
            A set of checkpoint files, configs, and other auxiliary info
            associated with this model - expected format
            {workspace}/{fileset_name}
        trust_remote_code:
          type: boolean
          default: false
          description: Whether to trust remote code to load this model checkpoint.
        base_model:
          type: string
          description: Link to another model which is used as a base for the current model
        api_endpoint:
          $ref: '#/components/schemas/APIEndpointData'
          description: Data about the inference endpoint for this model
        backend_format:
          oneOf:
            - $ref: '#/components/schemas/BackendFormat'
            - type: 'null'
          description: >-
            Inference API wire format expected by the backend. If unset,
            inference routing treats the model as OPENAI_CHAT.
        adapters:
          type: array
          items:
            $ref: '#/components/schemas/Adapter'
          description: Adapters that have been created against this model
        prompt:
          $ref: '#/components/schemas/PromptData'
          description: Configuration for prompt engineering
        custom_fields:
          type: object
          additionalProperties:
            description: Any type
          description: Custom fields for additional metadata
        ownership:
          type: object
          additionalProperties:
            description: Any type
          description: Ownership information for the model
        model_providers:
          type: array
          items:
            type: string
          description: >-
            List of ModelProvider workspace/name resource names that provide
            inference for this Model Entity
      required:
        - id
        - name
        - workspace
        - created_at
        - updated_at
      description: >-
        Model Entity represents a versioned model registered within the
        platform.

        Uses EntityBase for entity store compatibility.
      title: ModelEntity
    ValidationErrorLocItems:
      oneOf:
        - type: string
        - type: integer
      title: ValidationErrorLocItems
    ValidationError:
      type: object
      properties:
        loc:
          type: array
          items:
            $ref: '#/components/schemas/ValidationErrorLocItems'
        msg:
          type: string
        type:
          type: string
        input:
          description: Any type
        ctx:
          type: object
          additionalProperties:
            description: Any type
      required:
        - loc
        - msg
        - type
      title: ValidationError
    HTTPValidationError:
      type: object
      properties:
        detail:
          type: array
          items:
            $ref: '#/components/schemas/ValidationError'
      title: HTTPValidationError

```

## Examples



**Request**

```json
{
  "name": "llama-3.1-8b"
}
```

**Response**

```json
{
  "id": "a1b2c3d4-e5f6-7890-ab12-cd34ef567890",
  "name": "llama-3.1-8b",
  "workspace": "research-team",
  "created_at": "2024-01-15T09:30:00Z",
  "updated_at": "2024-01-15T09:30:00Z",
  "project": "nlp/llama-upgrade",
  "description": "LLaMA 3.1 8 billion parameter model fine-tuned for conversational tasks.",
  "spec": {
    "checkpoint_model_name": "llama-3.1-8b-v1",
    "family": "llama",
    "num_layers": 32,
    "hidden_size": 5120,
    "num_attention_heads": 40,
    "num_kv_heads": 40,
    "ffn_hidden_size": 13824,
    "vocab_size": 32000,
    "tied_embeddings": true,
    "gated_mlp": false,
    "base_num_parameters": 8000000000,
    "precision": "float16",
    "context_size": 4096,
    "num_virtual_tokens": 20,
    "is_chat": true,
    "is_embedding_model": false,
    "moe_config": {
      "num_experts": 16,
      "num_experts_per_tok": 2,
      "num_expert_layers": 4,
      "expert_ffn_size": 16384,
      "num_shared_experts": 0
    },
    "mamba_config": {
      "is_hybrid": false,
      "num_mamba_layers": 0,
      "num_attention_layers": 0,
      "num_mlp_layers": 0,
      "state_size": 16,
      "conv_kernel": 4
    },
    "sliding_window_config": {
      "window_size": 512
    },
    "linear_layers": [
      {
        "name": "model.layers.0.self_attn.q_proj",
        "in_features": 5120,
        "out_features": 5120
      },
      {
        "name": "model.layers.0.mlp.fc1",
        "in_features": 5120,
        "out_features": 13824
      }
    ],
    "chat_template": "You are a helpful assistant. {{user_message}}",
    "tool_call_config": {
      "tool_call_parser": "openai",
      "tool_call_plugin": "tools/llama-toolkit",
      "auto_tool_choice": true
    },
    "minimum_gpus_all_weights": 4,
    "minimum_gpus_lora": 1
  },
  "finetuning_type": "lora_merged",
  "fileset": "research-team/llama-3.1-8b-checkpoints",
  "trust_remote_code": false,
  "base_model": "llama-3.0-8b",
  "api_endpoint": {
    "url": "https://api.nemo-platform.com/v1/models/llama-3.1-8b/infer",
    "model_id": "llama-3.1-8b",
    "api_key": "sk-abc123def456ghi789jkl",
    "format": "openai"
  },
  "backend_format": "OPENAI_CHAT",
  "adapters": [
    {
      "name": "lora-adapter-v1",
      "workspace": "research-team",
      "fileset": "research-team/lora-medical-v1",
      "finetuning_type": "lora_merged",
      "description": "LoRA adapter for domain-specific tuning on medical texts.",
      "enabled": true,
      "lora_config": {
        "rank": 8,
        "alpha": 16
      },
      "model": "research-team/llama-3.1-8b",
      "created_at": "2024-01-10T12:00:00Z",
      "updated_at": "2024-01-12T15:45:00Z"
    }
  ],
  "prompt": {
    "system_prompt": "You are a helpful assistant specialized in technical support.",
    "icl_few_shot_examples": "User: How do I reset my password?\nAssistant: To reset your password, go to the settings page and click 'Reset Password'.",
    "inference_params": {
      "model": "llama-3.1-8b",
      "temperature": 0.7,
      "max_tokens": 512,
      "max_completion_tokens": 256,
      "top_p": 0.9,
      "stop": [
        "\nUser:",
        "\nAssistant:"
      ]
    },
    "system_prompt_template": "System: {{system_prompt}}\nExamples:\n{{icl_few_shot_examples}}"
  },
  "custom_fields": {},
  "ownership": {},
  "model_providers": [
    "research-team/llama-provider-v1"
  ]
}
```

**SDK Code**

```python
import requests

url = "https://host.com/apis/models/v2/workspaces/workspace/models"

payload = { "name": "llama-3.1-8b" }
headers = {"Content-Type": "application/json"}

response = requests.post(url, json=payload, headers=headers)

print(response.json())
```

```javascript
const url = 'https://host.com/apis/models/v2/workspaces/workspace/models';
const options = {
  method: 'POST',
  headers: {'Content-Type': 'application/json'},
  body: '{"name":"llama-3.1-8b"}'
};

try {
  const response = await fetch(url, options);
  const data = await response.json();
  console.log(data);
} catch (error) {
  console.error(error);
}
```

```go
package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://host.com/apis/models/v2/workspaces/workspace/models"

	payload := strings.NewReader("{\n  \"name\": \"llama-3.1-8b\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(res)
	fmt.Println(string(body))

}
```

```ruby
require 'uri'
require 'net/http'

url = URI("https://host.com/apis/models/v2/workspaces/workspace/models")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = 'application/json'
request.body = "{\n  \"name\": \"llama-3.1-8b\"\n}"

response = http.request(request)
puts response.read_body
```

```java
import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.Unirest;

HttpResponse<String> response = Unirest.post("https://host.com/apis/models/v2/workspaces/workspace/models")
  .header("Content-Type", "application/json")
  .body("{\n  \"name\": \"llama-3.1-8b\"\n}")
  .asString();
```

```php
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://host.com/apis/models/v2/workspaces/workspace/models', [
  'body' => '{
  "name": "llama-3.1-8b"
}',
  'headers' => [
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
```

```csharp
using RestSharp;

var client = new RestClient("https://host.com/apis/models/v2/workspaces/workspace/models");
var request = new RestRequest(Method.POST);
request.AddHeader("Content-Type", "application/json");
request.AddParameter("application/json", "{\n  \"name\": \"llama-3.1-8b\"\n}", ParameterType.RequestBody);
IRestResponse response = client.Execute(request);
```

```swift
import Foundation

let headers = ["Content-Type": "application/json"]
let parameters = ["name": "llama-3.1-8b"] as [String : Any]

let postData = JSONSerialization.data(withJSONObject: parameters, options: [])

let request = NSMutableURLRequest(url: NSURL(string: "https://host.com/apis/models/v2/workspaces/workspace/models")! as URL,
                                        cachePolicy: .useProtocolCachePolicy,
                                    timeoutInterval: 10.0)
request.httpMethod = "POST"
request.allHTTPHeaderFields = headers
request.httpBody = postData as Data

let session = URLSession.shared
let dataTask = session.dataTask(with: request as URLRequest, completionHandler: { (data, response, error) -> Void in
  if (error != nil) {
    print(error as Any)
  } else {
    let httpResponse = response as? HTTPURLResponse
    print(httpResponse)
  }
})

dataTask.resume()
```