nemo_microservices.resources.v2.inference.deployments.deployments#

Module Contents#

Classes#

API#

class nemo_microservices.resources.v2.inference.deployments.deployments.AsyncDeploymentsResource(
client: nemo_microservices._client.AsyncNeMoMicroservices,
)#

Bases: nemo_microservices._resource.AsyncAPIResource

Initialization

async create(
*,
config: str,
name: str,
config_version: int | nemo_microservices._types.Omit = omit,
hf_token: str | nemo_microservices._types.Omit = omit,
namespace: str | nemo_microservices._types.Omit = omit,
project: str | nemo_microservices._types.Omit = omit,
extra_headers: nemo_microservices._types.Headers | None = None,
extra_query: nemo_microservices._types.Query | None = None,
extra_body: nemo_microservices._types.Body | None = None,
timeout: float | httpx.Timeout | None | nemo_microservices._types.NotGiven = not_given,
) nemo_microservices.types.v2.inference.model_deployment.ModelDeployment#

Create a new ModelDeployment (version 1).

Args: config: Reference to the ModelDeploymentConfig name

name: Name of the deployment

config_version: Reference to a specific ModelDeploymentConfig version. If not specified, uses latest.

hf_token: Hugging Face authentication token for accessing private models and repositories.

namespace: The namespace of the deployment

project: The URN of the project associated with this deployment

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request

extra_body: Add additional JSON properties to the request

timeout: Override the client-level default timeout for this request, in seconds

async delete(
deployment_name: str,
*,
namespace: str,
extra_headers: nemo_microservices._types.Headers | None = None,
extra_query: nemo_microservices._types.Query | None = None,
extra_body: nemo_microservices._types.Body | None = None,
timeout: float | httpx.Timeout | None | nemo_microservices._types.NotGiven = not_given,
) object#

Delete all versions of a ModelDeployment.

If the deployment is in any state other than DELETED, this will set its status to DELETING. The models controller will then:

  1. Delete the infrastructure (e.g., K8s NimService)

  2. Update the status to DELETED

If the deployment is already in DELETED status, calling delete again will permanently remove it from the database.

Returns:

  • 202 Accepted: Deployment marked for deletion (status set to DELETING)

  • 204 No Content: Deployment permanently removed from database (was already DELETED)

  • 404 Not Found: Deployment doesn’t exist

Args: extra_headers: Send extra headers

extra_query: Add additional query parameters to the request

extra_body: Add additional JSON properties to the request

timeout: Override the client-level default timeout for this request, in seconds

async list(
*,
all_versions: bool | nemo_microservices._types.Omit = omit,
namespace: Optional[str] | nemo_microservices._types.Omit = omit,
project: Optional[str] | nemo_microservices._types.Omit = omit,
status_filter: Optional[nemo_microservices.types.v2.inference.model_deployment_status.ModelDeploymentStatus] | nemo_microservices._types.Omit = omit,
extra_headers: nemo_microservices._types.Headers | None = None,
extra_query: nemo_microservices._types.Query | None = None,
extra_body: nemo_microservices._types.Body | None = None,
timeout: float | httpx.Timeout | None | nemo_microservices._types.NotGiven = not_given,
) nemo_microservices.types.v2.inference.deployment_list_response.DeploymentListResponse#

List all ModelDeployments with optional filtering.

Args: namespace: Optional namespace filter project: Optional project filter status_filter: Optional status filter all_versions: If False (default), return only latest version of each deployment. If True, return all versions matching the filters.

Returns: List of ModelDeployment objects matching the filters

Args: status_filter: Status enum for ModelDeployment objects.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request

extra_body: Add additional JSON properties to the request

timeout: Override the client-level default timeout for this request, in seconds

async list_namespace(
namespace: str,
*,
all_versions: bool | nemo_microservices._types.Omit = omit,
project: Optional[str] | nemo_microservices._types.Omit = omit,
status_filter: Optional[nemo_microservices.types.v2.inference.model_deployment_status.ModelDeploymentStatus] | nemo_microservices._types.Omit = omit,
extra_headers: nemo_microservices._types.Headers | None = None,
extra_query: nemo_microservices._types.Query | None = None,
extra_body: nemo_microservices._types.Body | None = None,
timeout: float | httpx.Timeout | None | nemo_microservices._types.NotGiven = not_given,
) nemo_microservices.types.v2.inference.deployment_list_namespace_response.DeploymentListNamespaceResponse#

List ModelDeployments for a specific namespace.

Args: namespace: Namespace to filter by project: Optional project filter status_filter: Optional status filter all_versions: If False (default), return only latest version of each deployment. If True, return all versions matching the filters.

Returns: List of ModelDeployment objects matching the filters

Args: status_filter: Status enum for ModelDeployment objects.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request

extra_body: Add additional JSON properties to the request

timeout: Override the client-level default timeout for this request, in seconds

property models: nemo_microservices.resources.v2.inference.deployments.models.AsyncModelsResource#
async retrieve(
deployment_name: str,
*,
namespace: str,
extra_headers: nemo_microservices._types.Headers | None = None,
extra_query: nemo_microservices._types.Query | None = None,
extra_body: nemo_microservices._types.Body | None = None,
timeout: float | httpx.Timeout | None | nemo_microservices._types.NotGiven = not_given,
) nemo_microservices.types.v2.inference.model_deployment.ModelDeployment#

Get the latest version of a ModelDeployment.

Args: extra_headers: Send extra headers

extra_query: Add additional query parameters to the request

extra_body: Add additional JSON properties to the request

timeout: Override the client-level default timeout for this request, in seconds

property status: nemo_microservices.resources.v2.inference.deployments.status.AsyncStatusResource#
async update(
deployment_name: str,
*,
namespace: str,
config: str,
config_version: int | nemo_microservices._types.Omit = omit,
hf_token: str | nemo_microservices._types.Omit = omit,
extra_headers: nemo_microservices._types.Headers | None = None,
extra_query: nemo_microservices._types.Query | None = None,
extra_body: nemo_microservices._types.Body | None = None,
timeout: float | httpx.Timeout | None | nemo_microservices._types.NotGiven = not_given,
) nemo_microservices.types.v2.inference.model_deployment.ModelDeployment#

Update a ModelDeployment (creates a new immutable version).

Args: config: Reference to the ModelDeploymentConfig name

config_version: Reference to a specific ModelDeploymentConfig version. If not specified, uses latest.

hf_token: Hugging Face authentication token for accessing private models and repositories.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request

extra_body: Add additional JSON properties to the request

timeout: Override the client-level default timeout for this request, in seconds

property versions: nemo_microservices.resources.v2.inference.deployments.versions.AsyncVersionsResource#
property with_raw_response: nemo_microservices.resources.v2.inference.deployments.deployments.AsyncDeploymentsResourceWithRawResponse#

This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content.

For more information, see https://docs.nvidia.com/nemo/microservices/latest/pysdk/index.html#accessing-raw-response-data-e-g-headers

property with_streaming_response: nemo_microservices.resources.v2.inference.deployments.deployments.AsyncDeploymentsResourceWithStreamingResponse#

An alternative to .with_raw_response that doesn’t eagerly read the response body.

For more information, see https://docs.nvidia.com/nemo/microservices/latest/pysdk/index.html#with_streaming_response

class nemo_microservices.resources.v2.inference.deployments.deployments.AsyncDeploymentsResourceWithRawResponse(
deployments: nemo_microservices.resources.v2.inference.deployments.deployments.AsyncDeploymentsResource,
)#

Initialization

property models: nemo_microservices.resources.v2.inference.deployments.models.AsyncModelsResourceWithRawResponse#
property status: nemo_microservices.resources.v2.inference.deployments.status.AsyncStatusResourceWithRawResponse#
property versions: nemo_microservices.resources.v2.inference.deployments.versions.AsyncVersionsResourceWithRawResponse#
class nemo_microservices.resources.v2.inference.deployments.deployments.AsyncDeploymentsResourceWithStreamingResponse(
deployments: nemo_microservices.resources.v2.inference.deployments.deployments.AsyncDeploymentsResource,
)#

Initialization

property models: nemo_microservices.resources.v2.inference.deployments.models.AsyncModelsResourceWithStreamingResponse#
property status: nemo_microservices.resources.v2.inference.deployments.status.AsyncStatusResourceWithStreamingResponse#
property versions: nemo_microservices.resources.v2.inference.deployments.versions.AsyncVersionsResourceWithStreamingResponse#
class nemo_microservices.resources.v2.inference.deployments.deployments.DeploymentsResource(
client: nemo_microservices._client.NeMoMicroservices,
)#

Bases: nemo_microservices._resource.SyncAPIResource

Initialization

create(
*,
config: str,
name: str,
config_version: int | nemo_microservices._types.Omit = omit,
hf_token: str | nemo_microservices._types.Omit = omit,
namespace: str | nemo_microservices._types.Omit = omit,
project: str | nemo_microservices._types.Omit = omit,
extra_headers: nemo_microservices._types.Headers | None = None,
extra_query: nemo_microservices._types.Query | None = None,
extra_body: nemo_microservices._types.Body | None = None,
timeout: float | httpx.Timeout | None | nemo_microservices._types.NotGiven = not_given,
) nemo_microservices.types.v2.inference.model_deployment.ModelDeployment#

Create a new ModelDeployment (version 1).

Args: config: Reference to the ModelDeploymentConfig name

name: Name of the deployment

config_version: Reference to a specific ModelDeploymentConfig version. If not specified, uses latest.

hf_token: Hugging Face authentication token for accessing private models and repositories.

namespace: The namespace of the deployment

project: The URN of the project associated with this deployment

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request

extra_body: Add additional JSON properties to the request

timeout: Override the client-level default timeout for this request, in seconds

delete(
deployment_name: str,
*,
namespace: str,
extra_headers: nemo_microservices._types.Headers | None = None,
extra_query: nemo_microservices._types.Query | None = None,
extra_body: nemo_microservices._types.Body | None = None,
timeout: float | httpx.Timeout | None | nemo_microservices._types.NotGiven = not_given,
) object#

Delete all versions of a ModelDeployment.

If the deployment is in any state other than DELETED, this will set its status to DELETING. The models controller will then:

  1. Delete the infrastructure (e.g., K8s NimService)

  2. Update the status to DELETED

If the deployment is already in DELETED status, calling delete again will permanently remove it from the database.

Returns:

  • 202 Accepted: Deployment marked for deletion (status set to DELETING)

  • 204 No Content: Deployment permanently removed from database (was already DELETED)

  • 404 Not Found: Deployment doesn’t exist

Args: extra_headers: Send extra headers

extra_query: Add additional query parameters to the request

extra_body: Add additional JSON properties to the request

timeout: Override the client-level default timeout for this request, in seconds

list(
*,
all_versions: bool | nemo_microservices._types.Omit = omit,
namespace: Optional[str] | nemo_microservices._types.Omit = omit,
project: Optional[str] | nemo_microservices._types.Omit = omit,
status_filter: Optional[nemo_microservices.types.v2.inference.model_deployment_status.ModelDeploymentStatus] | nemo_microservices._types.Omit = omit,
extra_headers: nemo_microservices._types.Headers | None = None,
extra_query: nemo_microservices._types.Query | None = None,
extra_body: nemo_microservices._types.Body | None = None,
timeout: float | httpx.Timeout | None | nemo_microservices._types.NotGiven = not_given,
) nemo_microservices.types.v2.inference.deployment_list_response.DeploymentListResponse#

List all ModelDeployments with optional filtering.

Args: namespace: Optional namespace filter project: Optional project filter status_filter: Optional status filter all_versions: If False (default), return only latest version of each deployment. If True, return all versions matching the filters.

Returns: List of ModelDeployment objects matching the filters

Args: status_filter: Status enum for ModelDeployment objects.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request

extra_body: Add additional JSON properties to the request

timeout: Override the client-level default timeout for this request, in seconds

list_namespace(
namespace: str,
*,
all_versions: bool | nemo_microservices._types.Omit = omit,
project: Optional[str] | nemo_microservices._types.Omit = omit,
status_filter: Optional[nemo_microservices.types.v2.inference.model_deployment_status.ModelDeploymentStatus] | nemo_microservices._types.Omit = omit,
extra_headers: nemo_microservices._types.Headers | None = None,
extra_query: nemo_microservices._types.Query | None = None,
extra_body: nemo_microservices._types.Body | None = None,
timeout: float | httpx.Timeout | None | nemo_microservices._types.NotGiven = not_given,
) nemo_microservices.types.v2.inference.deployment_list_namespace_response.DeploymentListNamespaceResponse#

List ModelDeployments for a specific namespace.

Args: namespace: Namespace to filter by project: Optional project filter status_filter: Optional status filter all_versions: If False (default), return only latest version of each deployment. If True, return all versions matching the filters.

Returns: List of ModelDeployment objects matching the filters

Args: status_filter: Status enum for ModelDeployment objects.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request

extra_body: Add additional JSON properties to the request

timeout: Override the client-level default timeout for this request, in seconds

property models: nemo_microservices.resources.v2.inference.deployments.models.ModelsResource#
retrieve(
deployment_name: str,
*,
namespace: str,
extra_headers: nemo_microservices._types.Headers | None = None,
extra_query: nemo_microservices._types.Query | None = None,
extra_body: nemo_microservices._types.Body | None = None,
timeout: float | httpx.Timeout | None | nemo_microservices._types.NotGiven = not_given,
) nemo_microservices.types.v2.inference.model_deployment.ModelDeployment#

Get the latest version of a ModelDeployment.

Args: extra_headers: Send extra headers

extra_query: Add additional query parameters to the request

extra_body: Add additional JSON properties to the request

timeout: Override the client-level default timeout for this request, in seconds

property status: nemo_microservices.resources.v2.inference.deployments.status.StatusResource#
update(
deployment_name: str,
*,
namespace: str,
config: str,
config_version: int | nemo_microservices._types.Omit = omit,
hf_token: str | nemo_microservices._types.Omit = omit,
extra_headers: nemo_microservices._types.Headers | None = None,
extra_query: nemo_microservices._types.Query | None = None,
extra_body: nemo_microservices._types.Body | None = None,
timeout: float | httpx.Timeout | None | nemo_microservices._types.NotGiven = not_given,
) nemo_microservices.types.v2.inference.model_deployment.ModelDeployment#

Update a ModelDeployment (creates a new immutable version).

Args: config: Reference to the ModelDeploymentConfig name

config_version: Reference to a specific ModelDeploymentConfig version. If not specified, uses latest.

hf_token: Hugging Face authentication token for accessing private models and repositories.

extra_headers: Send extra headers

extra_query: Add additional query parameters to the request

extra_body: Add additional JSON properties to the request

timeout: Override the client-level default timeout for this request, in seconds

property versions: nemo_microservices.resources.v2.inference.deployments.versions.VersionsResource#
property with_raw_response: nemo_microservices.resources.v2.inference.deployments.deployments.DeploymentsResourceWithRawResponse#

This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content.

For more information, see https://docs.nvidia.com/nemo/microservices/latest/pysdk/index.html#accessing-raw-response-data-e-g-headers

property with_streaming_response: nemo_microservices.resources.v2.inference.deployments.deployments.DeploymentsResourceWithStreamingResponse#

An alternative to .with_raw_response that doesn’t eagerly read the response body.

For more information, see https://docs.nvidia.com/nemo/microservices/latest/pysdk/index.html#with_streaming_response

class nemo_microservices.resources.v2.inference.deployments.deployments.DeploymentsResourceWithRawResponse(
deployments: nemo_microservices.resources.v2.inference.deployments.deployments.DeploymentsResource,
)#

Initialization

property models: nemo_microservices.resources.v2.inference.deployments.models.ModelsResourceWithRawResponse#
property status: nemo_microservices.resources.v2.inference.deployments.status.StatusResourceWithRawResponse#
property versions: nemo_microservices.resources.v2.inference.deployments.versions.VersionsResourceWithRawResponse#
class nemo_microservices.resources.v2.inference.deployments.deployments.DeploymentsResourceWithStreamingResponse(
deployments: nemo_microservices.resources.v2.inference.deployments.deployments.DeploymentsResource,
)#

Initialization

property models: nemo_microservices.resources.v2.inference.deployments.models.ModelsResourceWithStreamingResponse#
property status: nemo_microservices.resources.v2.inference.deployments.status.StatusResourceWithStreamingResponse#
property versions: nemo_microservices.resources.v2.inference.deployments.versions.VersionsResourceWithStreamingResponse#