|
NVIDIA DeepStream SDK API Reference
|
6.4 Release
|
Go to the documentation of this file.
38 #ifdef _COMPILING_TRITONSERVER
40 #define TRITONSERVER_DECLSPEC __declspec(dllexport)
41 #elif defined(__GNUC__)
42 #define TRITONSERVER_DECLSPEC __attribute__((__visibility__("default")))
44 #define TRITONSERVER_DECLSPEC
48 #define TRITONSERVER_DECLSPEC __declspec(dllimport)
50 #define TRITONSERVER_DECLSPEC
54 struct TRITONSERVER_BufferAttributes;
55 struct TRITONSERVER_Error;
56 struct TRITONSERVER_InferenceRequest;
57 struct TRITONSERVER_InferenceResponse;
58 struct TRITONSERVER_InferenceTrace;
59 struct TRITONSERVER_Message;
60 struct TRITONSERVER_Metrics;
61 struct TRITONSERVER_Parameter;
62 struct TRITONSERVER_ResponseAllocator;
63 struct TRITONSERVER_Server;
64 struct TRITONSERVER_ServerOptions;
65 struct TRITONSERVER_Metric;
66 struct TRITONSERVER_MetricFamily;
93 #define TRITONSERVER_API_VERSION_MAJOR 1
94 #define TRITONSERVER_API_VERSION_MINOR 17
108 uint32_t* major, uint32_t* minor);
221 const char* name,
const void* byte_ptr,
const uint64_t size);
227 TRITONSERVER_Parameter* parameter);
343 TRITONSERVER_Error* error);
353 TRITONSERVER_Error* error);
391 typedef TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorAllocFn_t)(
392 TRITONSERVER_ResponseAllocator* allocator,
const char* tensor_name,
394 int64_t memory_type_id,
void* userp,
void** buffer,
void** buffer_userp,
396 int64_t* actual_memory_type_id);
420 typedef TRITONSERVER_Error* (
422 TRITONSERVER_ResponseAllocator* allocator,
const char* tensor_name,
423 TRITONSERVER_BufferAttributes* buffer_attributes,
void* userp,
450 typedef TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorQueryFn_t)(
451 TRITONSERVER_ResponseAllocator* allocator,
void* userp,
452 const char* tensor_name,
size_t* byte_size,
472 typedef TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorReleaseFn_t)(
473 TRITONSERVER_ResponseAllocator* allocator,
void* buffer,
void* buffer_userp,
475 int64_t memory_type_id);
486 typedef TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorStartFn_t)(
487 TRITONSERVER_ResponseAllocator* allocator,
void* userp);
539 TRITONSERVER_ResponseAllocator** allocator,
557 TRITONSERVER_ResponseAllocator* allocator,
574 TRITONSERVER_ResponseAllocator* allocator,
582 TRITONSERVER_ResponseAllocator* allocator);
597 TRITONSERVER_Message** message,
const char* base,
size_t byte_size);
604 TRITONSERVER_Message* message);
618 TRITONSERVER_Message* message,
const char** base,
size_t* byte_size);
635 TRITONSERVER_Metrics* metrics);
659 const char** base,
size_t* byte_size);
729 TRITONSERVER_InferenceTrace* trace,
740 TRITONSERVER_InferenceTrace* trace,
743 const int64_t* shape, uint64_t dim_count,
752 TRITONSERVER_InferenceTrace* trace,
void* userp);
818 TRITONSERVER_InferenceTrace* trace);
827 TRITONSERVER_InferenceTrace* trace, uint64_t*
id);
837 TRITONSERVER_InferenceTrace* trace, uint64_t* parent_id);
848 TRITONSERVER_InferenceTrace* trace,
const char** model_name);
858 TRITONSERVER_InferenceTrace* trace, int64_t* model_version);
910 TRITONSERVER_InferenceRequest* request,
const uint32_t flags,
void* userp);
930 TRITONSERVER_InferenceResponse* response,
const uint32_t flags,
943 TRITONSERVER_InferenceRequest** inference_request,
944 TRITONSERVER_Server* server,
const char* model_name,
945 const int64_t model_version);
952 TRITONSERVER_InferenceRequest* inference_request);
962 TRITONSERVER_InferenceRequest* inference_request,
const char**
id);
970 TRITONSERVER_InferenceRequest* inference_request,
const char*
id);
980 TRITONSERVER_InferenceRequest* inference_request, uint32_t* flags);
990 TRITONSERVER_InferenceRequest* inference_request, uint32_t flags);
1005 TRITONSERVER_InferenceRequest* inference_request, uint64_t* correlation_id);
1020 TRITONSERVER_InferenceRequest* inference_request,
1021 const char** correlation_id);
1034 TRITONSERVER_InferenceRequest* inference_request, uint64_t correlation_id);
1047 TRITONSERVER_InferenceRequest* inference_request,
1048 const char* correlation_id);
1058 TRITONSERVER_InferenceRequest* inference_request, uint32_t* priority);
1069 TRITONSERVER_InferenceRequest* inference_request, uint32_t priority);
1079 TRITONSERVER_InferenceRequest* inference_request, uint64_t* timeout_us);
1089 TRITONSERVER_InferenceRequest* inference_request, uint64_t timeout_us);
1102 TRITONSERVER_InferenceRequest* inference_request,
const char* name,
1104 uint64_t dim_count);
1118 TRITONSERVER_InferenceRequest* inference_request,
const char* name);
1127 TRITONSERVER_InferenceRequest* inference_request,
const char* name);
1135 TRITONSERVER_InferenceRequest* inference_request);
1153 TRITONSERVER_InferenceRequest* inference_request,
const char* name,
1155 int64_t memory_type_id);
1178 TRITONSERVER_InferenceRequest* inference_request,
const char* name,
1180 int64_t memory_type_id,
const char* host_policy_name);
1196 TRITONSERVER_InferenceRequest* inference_request,
const char* name,
1197 const void* base, TRITONSERVER_BufferAttributes* buffer_attributes);
1207 TRITONSERVER_InferenceRequest* inference_request,
const char* name);
1216 TRITONSERVER_InferenceRequest* inference_request,
const char* name);
1225 TRITONSERVER_InferenceRequest* inference_request,
const char* name);
1233 TRITONSERVER_InferenceRequest* inference_request);
1247 TRITONSERVER_InferenceRequest* inference_request,
1249 void* request_release_userp);
1269 TRITONSERVER_InferenceRequest* inference_request,
1270 TRITONSERVER_ResponseAllocator* response_allocator,
1271 void* response_allocator_userp,
1273 void* response_userp);
1287 TRITONSERVER_InferenceResponse* inference_response);
1298 TRITONSERVER_InferenceResponse* inference_response);
1311 TRITONSERVER_InferenceResponse* inference_response,
const char** model_name,
1312 int64_t* model_version);
1324 TRITONSERVER_InferenceResponse* inference_response,
1325 const char** request_id);
1334 TRITONSERVER_InferenceResponse* inference_response, uint32_t* count);
1369 TRITONSERVER_InferenceResponse* inference_response,
const uint32_t index,
1379 TRITONSERVER_InferenceResponse* inference_response, uint32_t* count);
1404 TRITONSERVER_InferenceResponse* inference_response,
const uint32_t index,
1406 uint64_t* dim_count,
const void** base,
size_t* byte_size,
1425 TRITONSERVER_InferenceResponse* inference_response,
const uint32_t index,
1426 const size_t class_index,
const char** label);
1440 TRITONSERVER_BufferAttributes** buffer_attributes);
1447 TRITONSERVER_BufferAttributes* buffer_attributes);
1457 TRITONSERVER_BufferAttributes* buffer_attributes, int64_t memory_type_id);
1466 TRITONSERVER_BufferAttributes* buffer_attributes,
1477 TRITONSERVER_BufferAttributes* buffer_attributes,
void* cuda_ipc_handle);
1486 TRITONSERVER_BufferAttributes* buffer_attributes,
size_t byte_size);
1496 TRITONSERVER_BufferAttributes* buffer_attributes, int64_t* memory_type_id);
1506 TRITONSERVER_BufferAttributes* buffer_attributes,
1518 TRITONSERVER_BufferAttributes* buffer_attributes,
void** cuda_ipc_handle);
1527 TRITONSERVER_BufferAttributes* buffer_attributes,
size_t* byte_size);
1555 TRITONSERVER_ServerOptions** options);
1562 TRITONSERVER_ServerOptions* options);
1571 TRITONSERVER_ServerOptions* options,
const char* server_id);
1584 TRITONSERVER_ServerOptions* options,
const char* model_repository_path);
1621 TRITONSERVER_ServerOptions* options,
const char* model_name);
1632 TRITONSERVER_ServerOptions* options,
bool strict);
1667 TRITONSERVER_ServerOptions* options,
const char* resource_name,
1668 const size_t resource_count,
const int device);
1680 TRITONSERVER_ServerOptions* options, uint64_t size);
1693 TRITONSERVER_ServerOptions* options,
int gpu_device, uint64_t size);
1704 TRITONSERVER_ServerOptions* options, uint64_t size);
1714 TRITONSERVER_ServerOptions* options,
double cc);
1724 TRITONSERVER_ServerOptions* options,
bool exit);
1734 TRITONSERVER_ServerOptions* options,
bool strict);
1744 TRITONSERVER_ServerOptions* options,
unsigned int timeout);
1753 TRITONSERVER_ServerOptions* options,
unsigned int thread_count);
1762 TRITONSERVER_ServerOptions* options,
unsigned int thread_count);
1772 TRITONSERVER_ServerOptions* options,
const char* file);
1780 TRITONSERVER_ServerOptions* options,
bool log);
1788 TRITONSERVER_ServerOptions* options,
bool log);
1796 TRITONSERVER_ServerOptions* options,
bool log);
1814 TRITONSERVER_ServerOptions* options,
int level);
1822 TRITONSERVER_ServerOptions* options,
bool metrics);
1833 TRITONSERVER_ServerOptions* options,
bool gpu_metrics);
1844 TRITONSERVER_ServerOptions* options,
bool cpu_metrics);
1855 TRITONSERVER_ServerOptions* options, uint64_t metrics_interval_ms);
1868 TRITONSERVER_ServerOptions* options,
const char* backend_dir);
1880 TRITONSERVER_ServerOptions* options,
const char* repoagent_dir);
1896 TRITONSERVER_ServerOptions* options,
1898 const double fraction);
1910 TRITONSERVER_ServerOptions* options,
const char* backend_name,
1911 const char* setting,
const char* value);
1922 TRITONSERVER_ServerOptions* options,
const char* policy_name,
1923 const char* setting,
const char* value);
1956 TRITONSERVER_Server** server, TRITONSERVER_ServerOptions* options);
1964 TRITONSERVER_Server* server);
1972 TRITONSERVER_Server* server);
1984 TRITONSERVER_Server* server,
const char* repository_path,
1985 const TRITONSERVER_Parameter** name_mapping,
const uint32_t mapping_count);
1994 TRITONSERVER_Server* server,
const char* repository_path);
2010 TRITONSERVER_Server* server,
bool* live);
2018 TRITONSERVER_Server* server,
bool* ready);
2030 TRITONSERVER_Server* server,
const char* model_name,
2031 const int64_t model_version,
bool* ready);
2059 TRITONSERVER_Server* server,
const char* model_name,
2060 const int64_t model_version, uint32_t* flags,
void** voidp);
2081 TRITONSERVER_Server* server,
const char* model_name,
2082 const int64_t model_version, uint32_t* txn_flags,
void** voidp);
2092 TRITONSERVER_Server* server, TRITONSERVER_Message** server_metadata);
2106 TRITONSERVER_Server* server,
const char* model_name,
2107 const int64_t model_version, TRITONSERVER_Message** model_metadata);
2122 TRITONSERVER_Server* server,
const char* model_name,
2123 const int64_t model_version, TRITONSERVER_Message** model_stats);
2140 TRITONSERVER_Server* server,
const char* model_name,
2141 const int64_t model_version,
const uint32_t config_version,
2142 TRITONSERVER_Message** model_config);
2160 TRITONSERVER_Server* server, uint32_t flags,
2161 TRITONSERVER_Message** model_index);
2172 TRITONSERVER_Server* server,
const char* model_name);
2190 TRITONSERVER_Server* server,
const char* model_name,
2191 const TRITONSERVER_Parameter** parameters,
const uint64_t parameter_count);
2203 TRITONSERVER_Server* server,
const char* model_name);
2218 TRITONSERVER_Server* server,
const char* model_name);
2228 TRITONSERVER_Server* server, TRITONSERVER_Metrics** metrics);
2252 TRITONSERVER_Server* server,
2253 TRITONSERVER_InferenceRequest* inference_request,
2254 TRITONSERVER_InferenceTrace* trace);
2278 const char* name,
const char* description);
2288 TRITONSERVER_MetricFamily* family);
2303 TRITONSERVER_Metric** metric, TRITONSERVER_MetricFamily* family,
2304 const TRITONSERVER_Parameter** labels,
const uint64_t label_count);
2314 TRITONSERVER_Metric* metric);
2325 TRITONSERVER_Metric* metric,
double* value);
2338 TRITONSERVER_Metric* metric,
double value);
2348 TRITONSERVER_Metric* metric,
double value);
@ TRITONSERVER_TYPE_INT64
tritonserver_traceactivity_enum
Trace activities.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(TRITONSERVER_ServerOptions *options, double cc)
Set the minimum support CUDA compute capability in a server options.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetRepoAgentDirectory(TRITONSERVER_ServerOptions *options, const char *repoagent_dir)
Set the directory containing repository agent shared libraries.
const TRITONSERVER_DECLSPEC char * TRITONSERVER_InferenceTraceLevelString(TRITONSERVER_InferenceTraceLevel level)
Get the string representation of a trace level.
void(* TRITONSERVER_InferenceTraceTensorActivityFn_t)(TRITONSERVER_InferenceTrace *trace, TRITONSERVER_InferenceTraceActivity activity, const char *name, TRITONSERVER_DataType datatype, const void *base, size_t byte_size, const int64_t *shape, uint64_t dim_count, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void *userp)
Type for trace tensor activity callback function.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetExitOnError(TRITONSERVER_ServerOptions *options, bool exit)
Enable or disable exit-on-error in a server options.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestId(TRITONSERVER_InferenceRequest *inference_request, const char **id)
Get the ID for a request.
enum tritonserver_tracelevel_enum TRITONSERVER_InferenceTraceLevel
TRITONSERVER_InferenceTrace.
@ TRITONSERVER_TRACE_LEVEL_DISABLED
Tracing disabled. No trace activities are reported.
TRITONSERVER_logformat_enum
Format of logging.
tritonserver_responsecompleteflag_enum
Inference response complete flags.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ResponseAllocatorSetQueryFunction(TRITONSERVER_ResponseAllocator *allocator, TRITONSERVER_ResponseAllocatorQueryFn_t query_fn)
Set the query function to a response allocator object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_LogMessage(TRITONSERVER_LogLevel level, const char *filename, const int line, const char *msg)
Log a message at a given log level if that level is enabled.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseId(TRITONSERVER_InferenceResponse *inference_response, const char **request_id)
Get the ID of the request corresponding to a response.
@ TRITONSERVER_MEMORY_CPU
@ TRITONSERVER_PARAMETER_BYTES
@ TRITONSERVER_LOG_ISO8601
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetMetrics(TRITONSERVER_ServerOptions *options, bool metrics)
Enable or disable metrics collection in a server options.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerModelIndex(TRITONSERVER_Server *server, uint32_t flags, TRITONSERVER_Message **model_index)
Get the index of all unique models in the model repositories as a TRITONSERVER_Message object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseError(TRITONSERVER_InferenceResponse *inference_response)
Return the error status of an inference response.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricsFormatted(TRITONSERVER_Metrics *metrics, TRITONSERVER_MetricFormat format, const char **base, size_t *byte_size)
Get a buffer containing the metrics in the specified format.
@ TRITONSERVER_LOG_DEFAULT
@ TRITONSERVER_TYPE_UINT16
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestAddInput(TRITONSERVER_InferenceRequest *inference_request, const char *name, const TRITONSERVER_DataType datatype, const int64_t *shape, uint64_t dim_count)
Add an input to a request.
void(* TRITONSERVER_InferenceTraceReleaseFn_t)(TRITONSERVER_InferenceTrace *trace, void *userp)
Type for trace release callback function.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceTraceDelete(TRITONSERVER_InferenceTrace *trace)
Delete a trace object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetFlags(TRITONSERVER_InferenceRequest *inference_request, uint32_t flags)
Set the flag(s) associated with a request.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerNew(TRITONSERVER_Server **server, TRITONSERVER_ServerOptions *options)
Create a new server object.
@ TRITONSERVER_TYPE_INT16
@ TRITONSERVER_TRACE_QUEUE_START
@ TRITONSERVER_TYPE_UINT64
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerModelBatchProperties(TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, uint32_t *flags, void **voidp)
Get the batch properties of the model.
TRITONSERVER_DECLSPEC void TRITONSERVER_ErrorDelete(TRITONSERVER_Error *error)
Delete an error object.
const TRITONSERVER_DECLSPEC char * TRITONSERVER_ErrorMessage(TRITONSERVER_Error *error)
Get the error message.
TRITONSERVER_metrickind_enum
TRITONSERVER_MetricKind.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerUnloadModelAndDependents(TRITONSERVER_Server *server, const char *model_name)
Unload the requested model, and also unload any dependent model that was loaded along with the reques...
enum tritonserver_modelcontrolmode_enum TRITONSERVER_ModelControlMode
TRITONSERVER_ServerOptions.
@ TRITONSERVER_RATE_LIMIT_OFF
@ TRITONSERVER_ERROR_NOT_FOUND
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MessageSerializeToJson(TRITONSERVER_Message *message, const char **base, size_t *byte_size)
Get the base and size of the buffer containing the serialized message in JSON format.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestRemoveRequestedOutput(TRITONSERVER_InferenceRequest *inference_request, const char *name)
Remove an output request from an inference request.
@ TRITONSERVER_TRACE_COMPUTE_END
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceTraceId(TRITONSERVER_InferenceTrace *trace, uint64_t *id)
Get the id associated with a trace.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetServerId(TRITONSERVER_ServerOptions *options, const char *server_id)
Set the textual ID for the server in a server options.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesSetCudaIpcHandle(TRITONSERVER_BufferAttributes *buffer_attributes, void *cuda_ipc_handle)
Set the CudaIpcHandle field of the buffer attributes.
@ TRITONSERVER_LOG_VERBOSE
enum TRITONSERVER_memorytype_enum TRITONSERVER_MemoryType
TRITONSERVER_MemoryType.
@ TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT
TRITONSERVER_DECLSPEC uint32_t TRITONSERVER_DataTypeByteSize(TRITONSERVER_DataType datatype)
Get the size of a Triton datatype in bytes.
@ TRITONSERVER_BATCH_UNKNOWN
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogVerbose(TRITONSERVER_ServerOptions *options, int level)
Set verbose logging level.
@ TRITONSERVER_METRIC_PROMETHEUS
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetStartupModel(TRITONSERVER_ServerOptions *options, const char *model_name)
Set the model to be loaded at startup in a server options.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogInfo(TRITONSERVER_ServerOptions *options, bool log)
Enable or disable info level logging.
@ TRITONSERVER_REQUEST_FLAG_SEQUENCE_END
void(* TRITONSERVER_InferenceRequestReleaseFn_t)(TRITONSERVER_InferenceRequest *request, const uint32_t flags, void *userp)
Type for inference request release callback function.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetTimeoutMicroseconds(TRITONSERVER_InferenceRequest *inference_request, uint64_t timeout_us)
Set the timeout for a request, in microseconds.
TRITONSERVER_Error *(* TRITONSERVER_ResponseAllocatorAllocFn_t)(TRITONSERVER_ResponseAllocator *allocator, const char *tensor_name, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void *userp, void **buffer, void **buffer_userp, TRITONSERVER_MemoryType *actual_memory_type, int64_t *actual_memory_type_id)
TRITONSERVER_ResponseAllocator.
const TRITONSERVER_DECLSPEC char * TRITONSERVER_ErrorCodeString(TRITONSERVER_Error *error)
Get the string representation of an error code.
@ TRITONSERVER_MODEL_CONTROL_POLL
@ TRITONSERVER_TRACE_COMPUTE_INPUT_END
@ TRITONSERVER_TXN_DECOUPLED
@ TRITONSERVER_ERROR_UNKNOWN
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestPriority(TRITONSERVER_InferenceRequest *inference_request, uint32_t *priority)
Get the priority for a request.
const TRITONSERVER_DECLSPEC char * TRITONSERVER_InferenceTraceActivityString(TRITONSERVER_InferenceTraceActivity activity)
Get the string representation of a trace activity.
enum TRITONSERVER_metrickind_enum TRITONSERVER_MetricKind
TRITONSERVER_MetricKind.
@ TRITONSERVER_TRACE_LEVEL_TIMESTAMPS
Record timestamps for the inference request.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MessageDelete(TRITONSERVER_Message *message)
Delete a message object.
@ TRITONSERVER_PARAMETER_INT
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetModelRepositoryPath(TRITONSERVER_ServerOptions *options, const char *model_repository_path)
Set the model repository path in a server options.
const TRITONSERVER_DECLSPEC char * TRITONSERVER_MemoryTypeString(TRITONSERVER_MemoryType memtype)
Get the string representation of a memory type.
@ TRITONSERVER_ERROR_ALREADY_EXISTS
tritonserver_modelindexflag_enum
Model index flags. The enum values must be power-of-2 values.
TRITONSERVER_DECLSPEC TRITONSERVER_Error_Code TRITONSERVER_ErrorCode(TRITONSERVER_Error *error)
Get the error code.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize(TRITONSERVER_ServerOptions *options, uint64_t size)
Set the total pinned memory byte size that the server can allocate in a server options.
@ TRITONSERVER_REQUEST_FLAG_SEQUENCE_START
#define TRITONSERVER_DECLSPEC
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction(TRITONSERVER_ResponseAllocator *allocator, TRITONSERVER_ResponseAllocatorBufferAttributesFn_t buffer_attributes_fn)
Set the buffer attributes function for a response allocator object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetBackendConfig(TRITONSERVER_ServerOptions *options, const char *backend_name, const char *setting, const char *value)
Set a configuration setting for a named backend in a server options.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetCorrelationIdString(TRITONSERVER_InferenceRequest *inference_request, const char *correlation_id)
Set the correlation ID of the inference request to be a string.
@ TRITONSERVER_TRACE_LEVEL_TENSORS
Record input and output tensor values for the inference request.
enum tritonserver_requestreleaseflag_enum TRITONSERVER_RequestReleaseFlag
Inference request release flags.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricsDelete(TRITONSERVER_Metrics *metrics)
Delete a metrics object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricValue(TRITONSERVER_Metric *metric, double *value)
Get the current value of a metric object.
@ TRITONSERVER_METRIC_KIND_GAUGE
void(* TRITONSERVER_InferenceResponseCompleteFn_t)(TRITONSERVER_InferenceResponse *response, const uint32_t flags, void *userp)
Type for callback function indicating that an inference response has completed.
TRITONSERVER_Error *(* TRITONSERVER_ResponseAllocatorQueryFn_t)(TRITONSERVER_ResponseAllocator *allocator, void *userp, const char *tensor_name, size_t *byte_size, TRITONSERVER_MemoryType *memory_type, int64_t *memory_type_id)
Type for function that is called to query the allocator's preferred memory type and memory type ID.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetPriority(TRITONSERVER_InferenceRequest *inference_request, uint32_t priority)
Set the priority for a request.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseOutputClassificationLabel(TRITONSERVER_InferenceResponse *inference_response, const uint32_t index, const size_t class_index, const char **label)
Get a classification label associated with an output for a given index.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetModelLoadThreadCount(TRITONSERVER_ServerOptions *options, unsigned int thread_count)
Set the number of threads to concurrently load models in a server options.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesNew(TRITONSERVER_BufferAttributes **buffer_attributes)
TRITONSERVER_BufferAttributes.
@ TRITONSERVER_TYPE_BYTES
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestRemoveAllInputs(TRITONSERVER_InferenceRequest *inference_request)
Remove all inputs from a request.
TRITONSERVER_Error *(* TRITONSERVER_ResponseAllocatorReleaseFn_t)(TRITONSERVER_ResponseAllocator *allocator, void *buffer, void *buffer_userp, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)
Type for function that is called when the server no longer holds any reference to a buffer allocated ...
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerModelIsReady(TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, bool *ready)
Is the model ready?
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerIsReady(TRITONSERVER_Server *server, bool *ready)
Is the server ready?
@ TRITONSERVER_INSTANCEGROUPKIND_MODEL
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerRegisterModelRepository(TRITONSERVER_Server *server, const char *repository_path, const TRITONSERVER_Parameter **name_mapping, const uint32_t mapping_count)
Register a new model repository.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogWarn(TRITONSERVER_ServerOptions *options, bool log)
Enable or disable warning level logging.
enum TRITONSERVER_errorcode_enum TRITONSERVER_Error_Code
TRITONSERVER_Error.
TRITONSERVER_DECLSPEC TRITONSERVER_DataType TRITONSERVER_StringToDataType(const char *dtype)
Get the Triton datatype corresponding to a string representation of a datatype.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricNew(TRITONSERVER_Metric **metric, TRITONSERVER_MetricFamily *family, const TRITONSERVER_Parameter **labels, const uint64_t label_count)
Create a new metric object.
enum TRITONSERVER_logformat_enum TRITONSERVER_LogFormat
Format of logging.
@ TRITONSERVER_REQUEST_RELEASE_ALL
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerStop(TRITONSERVER_Server *server)
Stop a server object.
const TRITONSERVER_DECLSPEC char * TRITONSERVER_DataTypeString(TRITONSERVER_DataType datatype)
Get the string representation of a data type.
void(* TRITONSERVER_InferenceTraceActivityFn_t)(TRITONSERVER_InferenceTrace *trace, TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns, void *userp)
Type for trace timeline activity callback function.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetMetricsInterval(TRITONSERVER_ServerOptions *options, uint64_t metrics_interval_ms)
Set the interval for metrics collection in a server options.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestRemoveAllInputData(TRITONSERVER_InferenceRequest *inference_request, const char *name)
Clear all input data from an input, releasing ownership of the buffer(s) that were appended to the in...
@ TRITONSERVER_METRIC_KIND_COUNTER
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetModelControlMode(TRITONSERVER_ServerOptions *options, TRITONSERVER_ModelControlMode mode)
Set the model control mode in a server options.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetStrictModelConfig(TRITONSERVER_ServerOptions *options, bool strict)
Enable or disable strict model configuration handling in a server options.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricDelete(TRITONSERVER_Metric *metric)
Delete a metric object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(TRITONSERVER_InferenceRequest *inference_request, const char *name, const void *base, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, const char *host_policy_name)
Assign a buffer of data to an input for execution on all model instances with the specified host poli...
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetReleaseCallback(TRITONSERVER_InferenceRequest *inference_request, TRITONSERVER_InferenceRequestReleaseFn_t request_release_fn, void *request_release_userp)
Set the release callback for an inference request.
@ TRITONSERVER_TXN_ONE_TO_ONE
TRITONSERVER_errorcode_enum
TRITONSERVER_Error.
@ TRITONSERVER_RATE_LIMIT_EXEC_COUNT
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerInferAsync(TRITONSERVER_Server *server, TRITONSERVER_InferenceRequest *inference_request, TRITONSERVER_InferenceTrace *trace)
Perform inference using the meta-data and inputs supplied by the 'inference_request'.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MessageNewFromSerializedJson(TRITONSERVER_Message **message, const char *base, size_t byte_size)
TRITONSERVER_Message.
@ TRITONSERVER_INDEX_FLAG_READY
enum tritonserver_txn_property_flag_enum TRITONSERVER_ModelTxnPropertyFlag
Model transaction policy flags.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerLoadModel(TRITONSERVER_Server *server, const char *model_name)
Load the requested model or reload the model if it is already loaded.
const TRITONSERVER_DECLSPEC char * TRITONSERVER_ParameterTypeString(TRITONSERVER_ParameterType paramtype)
Get the string representation of a parameter type.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricIncrement(TRITONSERVER_Metric *metric, double value)
Increment the current value of metric by value.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseOutput(TRITONSERVER_InferenceResponse *inference_response, const uint32_t index, const char **name, TRITONSERVER_DataType *datatype, const int64_t **shape, uint64_t *dim_count, const void **base, size_t *byte_size, TRITONSERVER_MemoryType *memory_type, int64_t *memory_type_id, void **userp)
Get all information about an output tensor.
TRITONSERVER_memorytype_enum
TRITONSERVER_MemoryType.
@ TRITONSERVER_TRACE_REQUEST_END
enum tritonserver_requestflag_enum TRITONSERVER_RequestFlag
TRITONSERVER_InferenceRequest.
enum tritonserver_traceactivity_enum TRITONSERVER_InferenceTraceActivity
Trace activities.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestFlags(TRITONSERVER_InferenceRequest *inference_request, uint32_t *flags)
Get the flag(s) associated with a request.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerDelete(TRITONSERVER_Server *server)
Delete a server object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerUnregisterModelRepository(TRITONSERVER_Server *server, const char *repository_path)
Unregister a model repository.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetHostPolicy(TRITONSERVER_ServerOptions *options, const char *policy_name, const char *setting, const char *value)
Set a host policy setting for a given policy name in a server options.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricSet(TRITONSERVER_Metric *metric, double value)
Set the current value of metric to value.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesDelete(TRITONSERVER_BufferAttributes *buffer_attributes)
Delete a buffer attributes object.
@ TRITONSERVER_ERROR_INTERNAL
@ TRITONSERVER_MODEL_CONTROL_NONE
@ TRITONSERVER_MODEL_CONTROL_EXPLICIT
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricFamilyNew(TRITONSERVER_MetricFamily **family, const TRITONSERVER_MetricKind kind, const char *name, const char *description)
Create a new metric family object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetBufferManagerThreadCount(TRITONSERVER_ServerOptions *options, unsigned int thread_count)
Set the number of threads used in buffer manager in a server options.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerMetadata(TRITONSERVER_Server *server, TRITONSERVER_Message **server_metadata)
Get the metadata of the server as a TRITONSERVER_Message object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestCorrelationId(TRITONSERVER_InferenceRequest *inference_request, uint64_t *correlation_id)
Get the correlation ID of the inference request as an unsigned integer.
enum TRITONSERVER_parametertype_enum TRITONSERVER_ParameterType
TRITONSERVER_ParameterType.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs(TRITONSERVER_InferenceRequest *inference_request)
Remove all output requests from an inference request.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes(TRITONSERVER_InferenceRequest *inference_request, const char *name, const void *base, TRITONSERVER_BufferAttributes *buffer_attributes)
Assign a buffer of data to an input.
tritonserver_tracelevel_enum
TRITONSERVER_InferenceTrace.
TRITONSERVER_DECLSPEC TRITONSERVER_Parameter * TRITONSERVER_ParameterBytesNew(const char *name, const void *byte_ptr, const uint64_t size)
Create a new parameter object with type TRITONSERVER_PARAMETER_BYTES.
TRITONSERVER_parametertype_enum
TRITONSERVER_ParameterType.
@ TRITONSERVER_TYPE_INVALID
@ TRITONSERVER_PARAMETER_STRING
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerLoadModelWithParameters(TRITONSERVER_Server *server, const char *model_name, const TRITONSERVER_Parameter **parameters, const uint64_t parameter_count)
Load the requested model or reload the model if it is already loaded, with load parameters provided.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerMetrics(TRITONSERVER_Server *server, TRITONSERVER_Metrics **metrics)
Get the current metrics for the server.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerModelTransactionProperties(TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, uint32_t *txn_flags, void **voidp)
Get the transaction policy of the model.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetRateLimiterMode(TRITONSERVER_ServerOptions *options, TRITONSERVER_RateLimitMode mode)
Set the rate limit mode in a server options.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceTraceModelVersion(TRITONSERVER_InferenceTrace *trace, int64_t *model_version)
Get the version of the model associated with a trace.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetId(TRITONSERVER_InferenceRequest *inference_request, const char *id)
Set the ID for a request.
enum TRITONSERVER_loglevel_enum TRITONSERVER_LogLevel
TRITONSERVER_Logging.
tritonserver_txn_property_flag_enum
Model transaction policy flags.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetStrictReadiness(TRITONSERVER_ServerOptions *options, bool strict)
Enable or disable strict readiness handling in a server options.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerIsLive(TRITONSERVER_Server *server, bool *live)
Is the server live?
enum TRITONSERVER_datatype_enum TRITONSERVER_DataType
TRITONSERVER_DataType.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestAddRawInput(TRITONSERVER_InferenceRequest *inference_request, const char *name)
Add a raw input to a request.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetGpuMetrics(TRITONSERVER_ServerOptions *options, bool gpu_metrics)
Enable or disable GPU metrics collection in a server options.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceTraceModelName(TRITONSERVER_InferenceTrace *trace, const char **model_name)
Get the name of the model associated with a trace.
@ TRITONSERVER_TRACE_COMPUTE_START
tritonserver_batchflag_enum
TRITONSERVER_Server.
@ TRITONSERVER_ERROR_INVALID_ARG
@ TRITONSERVER_MEMORY_CPU_PINNED
enum tritonserver_responsecompleteflag_enum TRITONSERVER_ResponseCompleteFlag
Inference response complete flags.
@ TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT
@ TRITONSERVER_TYPE_UINT8
TRITONSERVER_DECLSPEC TRITONSERVER_Parameter * TRITONSERVER_ParameterNew(const char *name, const TRITONSERVER_ParameterType type, const void *value)
Create a new parameter object.
@ TRITONSERVER_TYPE_INT32
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesSetMemoryTypeId(TRITONSERVER_BufferAttributes *buffer_attributes, int64_t memory_type_id)
Set the memory type id field of the buffer attributes.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogFile(TRITONSERVER_ServerOptions *options, const char *file)
Provide a log output file.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ApiVersion(uint32_t *major, uint32_t *minor)
Get the TRITONBACKEND API version supported by the Triton shared library.
@ TRITONSERVER_TRACE_LEVEL_MIN
Deprecated. Use TRITONSERVER_TRACE_LEVEL_TIMESTAMPS.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerPollModelRepository(TRITONSERVER_Server *server)
Check the model repository for changes and update server state based on those changes.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseParameter(TRITONSERVER_InferenceResponse *inference_response, const uint32_t index, const char **name, TRITONSERVER_ParameterType *type, const void **vvalue)
Get all information about a parameter.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerModelStatistics(TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, TRITONSERVER_Message **model_stats)
Get the statistics of a model as a TRITONSERVER_Message object.
TRITONSERVER_Error *(* TRITONSERVER_ResponseAllocatorStartFn_t)(TRITONSERVER_ResponseAllocator *allocator, void *userp)
Type for function that is called to indicate that subsequent allocation requests will refer to a new ...
@ TRITONSERVER_ERROR_UNAVAILABLE
@ TRITONSERVER_INSTANCEGROUPKIND_AUTO
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceTraceTensorNew(TRITONSERVER_InferenceTrace **trace, TRITONSERVER_InferenceTraceLevel level, uint64_t parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn, TRITONSERVER_InferenceTraceTensorActivityFn_t tensor_activity_fn, TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void *trace_userp)
Create a new inference trace object.
TRITONSERVER_instancegroupkind_enum
TRITONSERVER_InstanceGroupKind.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetExitTimeout(TRITONSERVER_ServerOptions *options, unsigned int timeout)
Set the exit timeout, in seconds, for the server in a server options.
tritonserver_modelcontrolmode_enum
TRITONSERVER_ServerOptions.
@ TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerUnloadModel(TRITONSERVER_Server *server, const char *model_name)
Unload the requested model.
TRITONSERVER_Error *(* TRITONSERVER_ResponseAllocatorBufferAttributesFn_t)(TRITONSERVER_ResponseAllocator *allocator, const char *tensor_name, TRITONSERVER_BufferAttributes *buffer_attributes, void *userp, void *buffer_userp)
Type for allocation function that allocates a buffer to hold an output tensor with buffer attributes.
TRITONSERVER_loglevel_enum
TRITONSERVER_Logging.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceTraceParentId(TRITONSERVER_InferenceTrace *trace, uint64_t *parent_id)
Get the parent id associated with a trace.
@ TRITONSERVER_ERROR_UNSUPPORTED
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ErrorNew(TRITONSERVER_Error_Code code, const char *msg)
Create a new error object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_GetMetricKind(TRITONSERVER_Metric *metric, TRITONSERVER_MetricKind *kind)
Get the TRITONSERVER_MetricKind of metric and its corresponding family.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseOutputCount(TRITONSERVER_InferenceResponse *inference_response, uint32_t *count)
Get the number of outputs available in the response.
const TRITONSERVER_DECLSPEC char * TRITONSERVER_InstanceGroupKindString(TRITONSERVER_InstanceGroupKind kind)
Get the string representation of an instance-group kind.
enum tritonserver_ratelimitmode_enum TRITONSERVER_RateLimitMode
Rate limit modes.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesByteSize(TRITONSERVER_BufferAttributes *buffer_attributes, size_t *byte_size)
Get the byte size field of the buffer attributes.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetResponseCacheByteSize(TRITONSERVER_ServerOptions *options, uint64_t size)
Set the total response cache byte size that the server can allocate in CPU memory.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricFamilyDelete(TRITONSERVER_MetricFamily *family)
Delete a metric family object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesSetMemoryType(TRITONSERVER_BufferAttributes *buffer_attributes, TRITONSERVER_MemoryType memory_type)
Set the memory type field of the buffer attributes.
enum tritonserver_metricformat_enum TRITONSERVER_MetricFormat
TRITONSERVER_Metrics.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesCudaIpcHandle(TRITONSERVER_BufferAttributes *buffer_attributes, void **cuda_ipc_handle)
Get the CudaIpcHandle field of the buffer attributes object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseModel(TRITONSERVER_InferenceResponse *inference_response, const char **model_name, int64_t *model_version)
Get model used to produce a response.
@ TRITONSERVER_TRACE_COMPUTE_OUTPUT_START
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetCorrelationId(TRITONSERVER_InferenceRequest *inference_request, uint64_t correlation_id)
Set the correlation ID of the inference request to be an unsigned integer.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ResponseAllocatorNew(TRITONSERVER_ResponseAllocator **allocator, TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn, TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn, TRITONSERVER_ResponseAllocatorStartFn_t start_fn)
Create a new response allocator object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit(TRITONSERVER_ServerOptions *options, const TRITONSERVER_InstanceGroupKind kind, const int device_id, const double fraction)
Specify the limit on memory usage as a fraction on the device identified by 'kind' and 'device_id'.
@ TRITONSERVER_RESPONSE_COMPLETE_FINAL
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestAppendInputData(TRITONSERVER_InferenceRequest *inference_request, const char *name, const void *base, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)
Assign a buffer of data to an input.
TRITONSERVER_datatype_enum
TRITONSERVER_DataType.
tritonserver_metricformat_enum
TRITONSERVER_Metrics.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestCorrelationIdString(TRITONSERVER_InferenceRequest *inference_request, const char **correlation_id)
Get the correlation ID of the inference request as a string.
@ TRITONSERVER_INSTANCEGROUPKIND_GPU
enum TRITONSERVER_instancegroupkind_enum TRITONSERVER_InstanceGroupKind
TRITONSERVER_InstanceGroupKind.
TRITONSERVER_DECLSPEC bool TRITONSERVER_LogIsEnabled(TRITONSERVER_LogLevel level)
Is a log level enabled?
tritonserver_requestflag_enum
TRITONSERVER_InferenceRequest.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestAddRequestedOutput(TRITONSERVER_InferenceRequest *inference_request, const char *name)
Add an output request to an inference request.
@ TRITONSERVER_TYPE_UINT32
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerModelMetadata(TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, TRITONSERVER_Message **model_metadata)
Get the metadata of a model as a TRITONSERVER_Message object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogError(TRITONSERVER_ServerOptions *options, bool log)
Enable or disable error level logging.
enum tritonserver_modelindexflag_enum TRITONSERVER_ModelIndexFlag
Model index flags. The enum values must be power-of-2 values.
@ TRITONSERVER_TRACE_LEVEL_MAX
Deprecated. Use TRITONSERVER_TRACE_LEVEL_TIMESTAMPS.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesSetByteSize(TRITONSERVER_BufferAttributes *buffer_attributes, size_t byte_size)
Set the byte size field of the buffer attributes.
@ TRITONSERVER_TRACE_REQUEST_START
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceTraceNew(TRITONSERVER_InferenceTrace **trace, TRITONSERVER_InferenceTraceLevel level, uint64_t parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn, TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void *trace_userp)
Create a new inference trace object.
@ TRITONSERVER_PARAMETER_BOOL
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerModelConfig(TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, const uint32_t config_version, TRITONSERVER_Message **model_config)
Get the configuration of a model as a TRITONSERVER_Message object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetCpuMetrics(TRITONSERVER_ServerOptions *options, bool cpu_metrics)
Enable or disable CPU metrics collection in a server options.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetBackendDirectory(TRITONSERVER_ServerOptions *options, const char *backend_dir)
Set the directory containing backend shared libraries.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ResponseAllocatorDelete(TRITONSERVER_ResponseAllocator *allocator)
Delete a response allocator.
TRITONSERVER_DECLSPEC void TRITONSERVER_ParameterDelete(TRITONSERVER_Parameter *parameter)
Delete an parameter object.
enum tritonserver_batchflag_enum TRITONSERVER_ModelBatchFlag
TRITONSERVER_Server.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesMemoryType(TRITONSERVER_BufferAttributes *buffer_attributes, TRITONSERVER_MemoryType *memory_type)
Get the memory type field of the buffer attributes.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestRemoveInput(TRITONSERVER_InferenceRequest *inference_request, const char *name)
Remove an input from a request.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize(TRITONSERVER_ServerOptions *options, int gpu_device, uint64_t size)
Set the total CUDA memory byte size that the server can allocate on given GPU device in a server opti...
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsAddRateLimiterResource(TRITONSERVER_ServerOptions *options, const char *resource_name, const size_t resource_count, const int device)
Add resource count for rate limiting.
@ TRITONSERVER_MEMORY_GPU
tritonserver_requestreleaseflag_enum
Inference request release flags.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetResponseCallback(TRITONSERVER_InferenceRequest *inference_request, TRITONSERVER_ResponseAllocator *response_allocator, void *response_allocator_userp, TRITONSERVER_InferenceResponseCompleteFn_t response_fn, void *response_userp)
Set the allocator and response callback for an inference request.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestTimeoutMicroseconds(TRITONSERVER_InferenceRequest *inference_request, uint64_t *timeout_us)
Get the timeout for a request, in microseconds.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseDelete(TRITONSERVER_InferenceResponse *inference_response)
TRITONSERVER_InferenceResponse.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogFormat(TRITONSERVER_ServerOptions *options, const TRITONSERVER_LogFormat format)
Set the logging format.
tritonserver_ratelimitmode_enum
Rate limit modes.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestDelete(TRITONSERVER_InferenceRequest *inference_request)
Delete an inference request object.
@ TRITONSERVER_BATCH_FIRST_DIM
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsNew(TRITONSERVER_ServerOptions **options)
Create a new server options object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesMemoryTypeId(TRITONSERVER_BufferAttributes *buffer_attributes, int64_t *memory_type_id)
Get the memory type id field of the buffer attributes.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestNew(TRITONSERVER_InferenceRequest **inference_request, TRITONSERVER_Server *server, const char *model_name, const int64_t model_version)
Create a new inference request object.
@ TRITONSERVER_INSTANCEGROUPKIND_CPU
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsDelete(TRITONSERVER_ServerOptions *options)
Delete a server options object.
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseParameterCount(TRITONSERVER_InferenceResponse *inference_response, uint32_t *count)
Get the number of parameters available in the response.