> For clean Markdown of any page, append .md to the page URL.
> For a complete documentation index, see https://docs.nvidia.com/nemo/guardrails/llms.txt.
> For AI client integration (Claude Code, Cursor, etc.), connect to the MCP server at https://docs.nvidia.com/nemo/guardrails/_mcp/server.

# Create Chat Completion

POST http://localhost:8000/v1/chat/completions
Content-Type: application/json

Generate a chat completion with guardrails applied.

The request shape is compatible with the OpenAI Chat Completions API and
accepts Guardrails-specific options in the `guardrails` object.


Reference: https://docs.nvidia.com/nemo/guardrails/nemo/guardrails/reference/guardrails-api-server/chat-completions/chat-completions

## OpenAPI Specification

```yaml
openapi: 3.1.0
info:
  title: NVIDIA NeMo Guardrails Library API Server
  version: 1.0.0
paths:
  /v1/chat/completions:
    post:
      operationId: create-guardrails-chat-completion
      summary: Create a guarded chat completion
      description: |
        Generate a chat completion with guardrails applied.

        The request shape is compatible with the OpenAI Chat Completions API and
        accepts Guardrails-specific options in the `guardrails` object.
      tags:
        - subpackage_chatCompletions
      responses:
        '200':
          description: Chat completion response or server-sent event stream.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/GuardrailsChatCompletion'
        '422':
          description: Invalid request or unsupported state continuation.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/GuardrailsChatCompletionRequest'
servers:
  - url: http://localhost:8000
    description: Local Guardrails server
components:
  schemas:
    ChatMessageContent:
      oneOf:
        - type: string
        - type: array
          items:
            type: object
            additionalProperties:
              description: Any type
      description: Message content.
      title: ChatMessageContent
    ChatMessage:
      type: object
      properties:
        role:
          type: string
          description: >-
            Message role, such as `system`, `user`, `assistant`, `tool`, or
            `context`.
        content:
          $ref: '#/components/schemas/ChatMessageContent'
          description: Message content.
      required:
        - role
        - content
      title: ChatMessage
    GuardrailsChatCompletionRequestStop:
      oneOf:
        - type: string
        - type: array
          items:
            type: string
      description: Stop sequence or sequences.
      title: GuardrailsChatCompletionRequestStop
    RailSelection:
      oneOf:
        - type: boolean
        - type: array
          items:
            type: string
      description: Enable, disable, or select named rails.
      title: RailSelection
    GenerationRailsOptions:
      type: object
      properties:
        input:
          $ref: '#/components/schemas/RailSelection'
        output:
          $ref: '#/components/schemas/RailSelection'
        retrieval:
          $ref: '#/components/schemas/RailSelection'
        dialog:
          type: boolean
          default: true
          description: Enable dialog rails.
        tool_input:
          $ref: '#/components/schemas/RailSelection'
        tool_output:
          $ref: '#/components/schemas/RailSelection'
      title: GenerationRailsOptions
    GenerationOptionsOutputVars:
      oneOf:
        - type: boolean
        - type: array
          items:
            type: string
      description: Context variables to return.
      title: GenerationOptionsOutputVars
    GenerationLogOptions:
      type: object
      properties:
        activated_rails:
          type: boolean
          default: false
          description: Include information about activated rails.
        llm_calls:
          type: boolean
          default: false
          description: Include details about LLM calls.
        internal_events:
          type: boolean
          default: false
          description: Include internal generated events.
        colang_history:
          type: boolean
          default: false
          description: Include conversation history in Colang format.
      title: GenerationLogOptions
    GenerationOptions:
      type: object
      properties:
        rails:
          $ref: '#/components/schemas/GenerationRailsOptions'
        llm_params:
          type: object
          additionalProperties:
            description: Any type
          description: Additional parameters to pass to the LLM call.
        llm_output:
          type: boolean
          default: false
          description: Include custom LLM output in the response.
        output_vars:
          $ref: '#/components/schemas/GenerationOptionsOutputVars'
          description: Context variables to return.
        log:
          $ref: '#/components/schemas/GenerationLogOptions'
      title: GenerationOptions
    GuardrailsRequestOptions:
      type: object
      properties:
        config_id:
          type: string
          description: >-
            Guardrails configuration ID to use. Mutually exclusive with
            `config_ids`.
        config_ids:
          type: array
          items:
            type: string
          description: >-
            List of configuration IDs to combine. Mutually exclusive with
            `config_id`.
        thread_id:
          type: string
          description: Existing thread ID for Colang 1.0 conversation persistence.
        context:
          type: object
          additionalProperties:
            description: Any type
          description: Additional context data for the conversation.
        options:
          $ref: '#/components/schemas/GenerationOptions'
        state:
          type: object
          additionalProperties:
            description: Any type
          description: Colang 1.0 transcript state for continuing a previous interaction.
      description: Guardrails-specific request options.
      title: GuardrailsRequestOptions
    GuardrailsChatCompletionRequest:
      type: object
      properties:
        model:
          type: string
          description: LLM model to use for the completion.
        messages:
          type: array
          items:
            $ref: '#/components/schemas/ChatMessage'
          description: Chat messages in the current conversation.
        stream:
          type: boolean
          default: false
          description: Return partial message deltas as server-sent events.
        max_tokens:
          type: integer
          description: Maximum number of tokens to generate.
        temperature:
          type: number
          format: double
          description: Sampling temperature.
        top_p:
          type: number
          format: double
          description: Top-p sampling parameter.
        stop:
          $ref: '#/components/schemas/GuardrailsChatCompletionRequestStop'
          description: Stop sequence or sequences.
        presence_penalty:
          type: number
          format: double
          description: Presence penalty parameter.
        frequency_penalty:
          type: number
          format: double
          description: Frequency penalty parameter.
        function_call:
          type: object
          additionalProperties:
            description: Any type
          description: Function call parameter.
        logit_bias:
          type: object
          additionalProperties:
            description: Any type
          description: Logit bias parameter.
        logprobs:
          type: boolean
          description: Log probabilities parameter.
        guardrails:
          $ref: '#/components/schemas/GuardrailsRequestOptions'
      required:
        - model
      title: GuardrailsChatCompletionRequest
    ChatCompletionChoice:
      type: object
      properties:
        index:
          type: integer
        message:
          $ref: '#/components/schemas/ChatMessage'
        finish_reason:
          type:
            - string
            - 'null'
      title: ChatCompletionChoice
    GuardrailsResponseData:
      type: object
      properties:
        config_id:
          type:
            - string
            - 'null'
        state:
          type:
            - object
            - 'null'
          additionalProperties:
            description: Any type
        llm_output:
          type:
            - object
            - 'null'
          additionalProperties:
            description: Any type
        output_data:
          type:
            - object
            - 'null'
          additionalProperties:
            description: Any type
        log:
          type:
            - object
            - 'null'
          additionalProperties:
            description: Any type
      title: GuardrailsResponseData
    GuardrailsChatCompletion:
      type: object
      properties:
        id:
          type: string
        object:
          type: string
        created:
          type: integer
        model:
          type: string
        choices:
          type: array
          items:
            $ref: '#/components/schemas/ChatCompletionChoice'
        guardrails:
          $ref: '#/components/schemas/GuardrailsResponseData'
      title: GuardrailsChatCompletion
    ErrorResponseDetail:
      oneOf:
        - type: string
        - type: object
          additionalProperties:
            description: Any type
      description: Error detail.
      title: ErrorResponseDetail
    ErrorResponse:
      type: object
      properties:
        detail:
          $ref: '#/components/schemas/ErrorResponseDetail'
          description: Error detail.
      title: ErrorResponse

```

## Examples

### Basic guarded completion


**Request**

```json
{
  "model": "meta/llama-3.1-8b-instruct",
  "messages": [
    {
      "role": "user",
      "content": "What is the capital of France?"
    }
  ],
  "guardrails": {
    "config_id": "content_safety"
  }
}
```

**Response**

```json
{
  "id": "chatcmpl-abc123",
  "object": "chat.completion",
  "created": 1709424000,
  "model": "meta/llama-3.1-8b-instruct",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Paris is the capital of France."
      },
      "finish_reason": "stop"
    }
  ],
  "guardrails": {
    "config_id": "content_safety",
    "state": null,
    "llm_output": null,
    "output_data": null,
    "log": null
  }
}
```

**SDK Code**

```python Basic guarded completion
import requests

url = "http://localhost:8000/v1/chat/completions"

payload = {
    "model": "meta/llama-3.1-8b-instruct",
    "messages": [
        {
            "role": "user",
            "content": "What is the capital of France?"
        }
    ],
    "guardrails": { "config_id": "content_safety" }
}
headers = {"Content-Type": "application/json"}

response = requests.post(url, json=payload, headers=headers)

print(response.json())
```

```javascript Basic guarded completion
const url = 'http://localhost:8000/v1/chat/completions';
const options = {
  method: 'POST',
  headers: {'Content-Type': 'application/json'},
  body: '{"model":"meta/llama-3.1-8b-instruct","messages":[{"role":"user","content":"What is the capital of France?"}],"guardrails":{"config_id":"content_safety"}}'
};

try {
  const response = await fetch(url, options);
  const data = await response.json();
  console.log(data);
} catch (error) {
  console.error(error);
}
```

```go Basic guarded completion
package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "http://localhost:8000/v1/chat/completions"

	payload := strings.NewReader("{\n  \"model\": \"meta/llama-3.1-8b-instruct\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"What is the capital of France?\"\n    }\n  ],\n  \"guardrails\": {\n    \"config_id\": \"content_safety\"\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(res)
	fmt.Println(string(body))

}
```

```ruby Basic guarded completion
require 'uri'
require 'net/http'

url = URI("http://localhost:8000/v1/chat/completions")

http = Net::HTTP.new(url.host, url.port)

request = Net::HTTP::Post.new(url)
request["Content-Type"] = 'application/json'
request.body = "{\n  \"model\": \"meta/llama-3.1-8b-instruct\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"What is the capital of France?\"\n    }\n  ],\n  \"guardrails\": {\n    \"config_id\": \"content_safety\"\n  }\n}"

response = http.request(request)
puts response.read_body
```

```java Basic guarded completion
import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.Unirest;

HttpResponse<String> response = Unirest.post("http://localhost:8000/v1/chat/completions")
  .header("Content-Type", "application/json")
  .body("{\n  \"model\": \"meta/llama-3.1-8b-instruct\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"What is the capital of France?\"\n    }\n  ],\n  \"guardrails\": {\n    \"config_id\": \"content_safety\"\n  }\n}")
  .asString();
```

```php Basic guarded completion
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'http://localhost:8000/v1/chat/completions', [
  'body' => '{
  "model": "meta/llama-3.1-8b-instruct",
  "messages": [
    {
      "role": "user",
      "content": "What is the capital of France?"
    }
  ],
  "guardrails": {
    "config_id": "content_safety"
  }
}',
  'headers' => [
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
```

```csharp Basic guarded completion
using RestSharp;

var client = new RestClient("http://localhost:8000/v1/chat/completions");
var request = new RestRequest(Method.POST);
request.AddHeader("Content-Type", "application/json");
request.AddParameter("application/json", "{\n  \"model\": \"meta/llama-3.1-8b-instruct\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"What is the capital of France?\"\n    }\n  ],\n  \"guardrails\": {\n    \"config_id\": \"content_safety\"\n  }\n}", ParameterType.RequestBody);
IRestResponse response = client.Execute(request);
```

```swift Basic guarded completion
import Foundation

let headers = ["Content-Type": "application/json"]
let parameters = [
  "model": "meta/llama-3.1-8b-instruct",
  "messages": [
    [
      "role": "user",
      "content": "What is the capital of France?"
    ]
  ],
  "guardrails": ["config_id": "content_safety"]
] as [String : Any]

let postData = JSONSerialization.data(withJSONObject: parameters, options: [])

let request = NSMutableURLRequest(url: NSURL(string: "http://localhost:8000/v1/chat/completions")! as URL,
                                        cachePolicy: .useProtocolCachePolicy,
                                    timeoutInterval: 10.0)
request.httpMethod = "POST"
request.allHTTPHeaderFields = headers
request.httpBody = postData as Data

let session = URLSession.shared
let dataTask = session.dataTask(with: request as URLRequest, completionHandler: { (data, response, error) -> Void in
  if (error != nil) {
    print(error as Any)
  } else {
    let httpResponse = response as? HTTPURLResponse
    print(httpResponse)
  }
})

dataTask.resume()
```

### Streaming guarded completion


**Request**

```json
{
  "model": "meta/llama-3.1-8b-instruct",
  "messages": [
    {
      "role": "user",
      "content": "Tell me a short story."
    }
  ],
  "stream": true,
  "guardrails": {
    "config_id": "content_safety"
  }
}
```

**Response**

```json
{
  "id": "chatcmpl-abc123",
  "object": "chat.completion",
  "created": 1709424000,
  "model": "meta/llama-3.1-8b-instruct",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Paris is the capital of France."
      },
      "finish_reason": "stop"
    }
  ],
  "guardrails": {
    "config_id": "content_safety",
    "state": null,
    "llm_output": null,
    "output_data": null,
    "log": null
  }
}
```

**SDK Code**

```python Streaming guarded completion
import requests

url = "http://localhost:8000/v1/chat/completions"

payload = {
    "model": "meta/llama-3.1-8b-instruct",
    "messages": [
        {
            "role": "user",
            "content": "Tell me a short story."
        }
    ],
    "stream": True,
    "guardrails": { "config_id": "content_safety" }
}
headers = {"Content-Type": "application/json"}

response = requests.post(url, json=payload, headers=headers)

print(response.json())
```

```javascript Streaming guarded completion
const url = 'http://localhost:8000/v1/chat/completions';
const options = {
  method: 'POST',
  headers: {'Content-Type': 'application/json'},
  body: '{"model":"meta/llama-3.1-8b-instruct","messages":[{"role":"user","content":"Tell me a short story."}],"stream":true,"guardrails":{"config_id":"content_safety"}}'
};

try {
  const response = await fetch(url, options);
  const data = await response.json();
  console.log(data);
} catch (error) {
  console.error(error);
}
```

```go Streaming guarded completion
package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "http://localhost:8000/v1/chat/completions"

	payload := strings.NewReader("{\n  \"model\": \"meta/llama-3.1-8b-instruct\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"Tell me a short story.\"\n    }\n  ],\n  \"stream\": true,\n  \"guardrails\": {\n    \"config_id\": \"content_safety\"\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(res)
	fmt.Println(string(body))

}
```

```ruby Streaming guarded completion
require 'uri'
require 'net/http'

url = URI("http://localhost:8000/v1/chat/completions")

http = Net::HTTP.new(url.host, url.port)

request = Net::HTTP::Post.new(url)
request["Content-Type"] = 'application/json'
request.body = "{\n  \"model\": \"meta/llama-3.1-8b-instruct\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"Tell me a short story.\"\n    }\n  ],\n  \"stream\": true,\n  \"guardrails\": {\n    \"config_id\": \"content_safety\"\n  }\n}"

response = http.request(request)
puts response.read_body
```

```java Streaming guarded completion
import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.Unirest;

HttpResponse<String> response = Unirest.post("http://localhost:8000/v1/chat/completions")
  .header("Content-Type", "application/json")
  .body("{\n  \"model\": \"meta/llama-3.1-8b-instruct\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"Tell me a short story.\"\n    }\n  ],\n  \"stream\": true,\n  \"guardrails\": {\n    \"config_id\": \"content_safety\"\n  }\n}")
  .asString();
```

```php Streaming guarded completion
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'http://localhost:8000/v1/chat/completions', [
  'body' => '{
  "model": "meta/llama-3.1-8b-instruct",
  "messages": [
    {
      "role": "user",
      "content": "Tell me a short story."
    }
  ],
  "stream": true,
  "guardrails": {
    "config_id": "content_safety"
  }
}',
  'headers' => [
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
```

```csharp Streaming guarded completion
using RestSharp;

var client = new RestClient("http://localhost:8000/v1/chat/completions");
var request = new RestRequest(Method.POST);
request.AddHeader("Content-Type", "application/json");
request.AddParameter("application/json", "{\n  \"model\": \"meta/llama-3.1-8b-instruct\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"Tell me a short story.\"\n    }\n  ],\n  \"stream\": true,\n  \"guardrails\": {\n    \"config_id\": \"content_safety\"\n  }\n}", ParameterType.RequestBody);
IRestResponse response = client.Execute(request);
```

```swift Streaming guarded completion
import Foundation

let headers = ["Content-Type": "application/json"]
let parameters = [
  "model": "meta/llama-3.1-8b-instruct",
  "messages": [
    [
      "role": "user",
      "content": "Tell me a short story."
    ]
  ],
  "stream": true,
  "guardrails": ["config_id": "content_safety"]
] as [String : Any]

let postData = JSONSerialization.data(withJSONObject: parameters, options: [])

let request = NSMutableURLRequest(url: NSURL(string: "http://localhost:8000/v1/chat/completions")! as URL,
                                        cachePolicy: .useProtocolCachePolicy,
                                    timeoutInterval: 10.0)
request.httpMethod = "POST"
request.allHTTPHeaderFields = headers
request.httpBody = postData as Data

let session = URLSession.shared
let dataTask = session.dataTask(with: request as URLRequest, completionHandler: { (data, response, error) -> Void in
  if (error != nil) {
    print(error as Any)
  } else {
    let httpResponse = response as? HTTPURLResponse
    print(httpResponse)
  }
})

dataTask.resume()
```