Program Listing for File trtserver.h

Return to documentation for file (src/core/trtserver.h)

// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once


#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

#if defined(_MSC_VER)
#define TRTSERVER_EXPORT __declspec(dllexport)
#elif defined(__GNUC__)
#define TRTSERVER_EXPORT __attribute__((__visibility__("default")))
#else
#define TRTSERVER_EXPORT
#endif

#ifdef TRTIS_ENABLE_GPU
#include <cuda_runtime_api.h>
#else
typedef void cudaIpcMemHandle_t;
#endif  // TRTIS_ENABLE_GPU

struct TRTSERVER_Error;
struct TRTSERVER_InferenceRequestOptions;
struct TRTSERVER_InferenceRequestProvider;
struct TRTSERVER_InferenceResponse;
struct TRTSERVER_Metrics;
struct TRTSERVER_Protobuf;
struct TRTSERVER_ResponseAllocator;
struct TRTSERVER_Server;
struct TRTSERVER_ServerOptions;
struct TRTSERVER_SharedMemoryBlock;
struct TRTSERVER_Trace;
struct TRTSERVER_TraceManager;

typedef enum trtserver_memorytype_enum {
  TRTSERVER_MEMORY_CPU,
  TRTSERVER_MEMORY_GPU,
  TRTSERVER_MEMORY_CPU_PINNED
} TRTSERVER_Memory_Type;


typedef enum trtserver_errorcode_enum {
  TRTSERVER_ERROR_UNKNOWN,
  TRTSERVER_ERROR_INTERNAL,
  TRTSERVER_ERROR_NOT_FOUND,
  TRTSERVER_ERROR_INVALID_ARG,
  TRTSERVER_ERROR_UNAVAILABLE,
  TRTSERVER_ERROR_UNSUPPORTED,
  TRTSERVER_ERROR_ALREADY_EXISTS
} TRTSERVER_Error_Code;

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ErrorNew(
    TRTSERVER_Error_Code code, const char* msg);

TRTSERVER_EXPORT void TRTSERVER_ErrorDelete(TRTSERVER_Error* error);

TRTSERVER_EXPORT TRTSERVER_Error_Code
TRTSERVER_ErrorCode(TRTSERVER_Error* error);

TRTSERVER_EXPORT const char* TRTSERVER_ErrorCodeString(TRTSERVER_Error* error);

TRTSERVER_EXPORT const char* TRTSERVER_ErrorMessage(TRTSERVER_Error* error);


TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_SharedMemoryBlockCpuNew(
    TRTSERVER_SharedMemoryBlock** shared_memory_block, const char* name,
    const char* shm_key, const size_t offset, const size_t byte_size);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_SharedMemoryBlockGpuNew(
    TRTSERVER_SharedMemoryBlock** shared_memory_block, const char* name,
    const cudaIpcMemHandle_t* cuda_shm_handle, const size_t byte_size,
    const int device_id);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_SharedMemoryBlockDelete(
    TRTSERVER_SharedMemoryBlock* shared_memory_block);

TRTSERVER_Error* TRTSERVER_SharedMemoryBlockMemoryType(
    TRTSERVER_SharedMemoryBlock* shared_memory_block,
    TRTSERVER_Memory_Type* memory_type);

TRTSERVER_Error* TRTSERVER_SharedMemoryBlockMemoryTypeId(
    TRTSERVER_SharedMemoryBlock* shared_memory_block, int64_t* memory_type_id);


typedef TRTSERVER_Error* (*TRTSERVER_ResponseAllocatorAllocFn_t)(
    TRTSERVER_ResponseAllocator* allocator, const char* tensor_name,
    size_t byte_size, TRTSERVER_Memory_Type memory_type, int64_t memory_type_id,
    void* userp, void** buffer, void** buffer_userp,
    TRTSERVER_Memory_Type* actual_memory_type, int64_t* actual_memory_type_id);

typedef TRTSERVER_Error* (*TRTSERVER_ResponseAllocatorReleaseFn_t)(
    TRTSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
    size_t byte_size, TRTSERVER_Memory_Type memory_type,
    int64_t memory_type_id);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ResponseAllocatorNew(
    TRTSERVER_ResponseAllocator** allocator,
    TRTSERVER_ResponseAllocatorAllocFn_t alloc_fn,
    TRTSERVER_ResponseAllocatorReleaseFn_t release_fn);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ResponseAllocatorDelete(
    TRTSERVER_ResponseAllocator* allocator);


TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ProtobufDelete(
    TRTSERVER_Protobuf* protobuf);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ProtobufSerialize(
    TRTSERVER_Protobuf* protobuf, const char** base, size_t* byte_size);


typedef enum trtserver_metricformat_enum {
  TRTSERVER_METRIC_PROMETHEUS
} TRTSERVER_Metric_Format;

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_MetricsDelete(
    TRTSERVER_Metrics* metrics);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_MetricsFormatted(
    TRTSERVER_Metrics* metrics, TRTSERVER_Metric_Format format,
    const char** base, size_t* byte_size);


typedef enum trtserver_tracelevel_enum {
  TRTSERVER_TRACE_LEVEL_DISABLED,
  TRTSERVER_TRACE_LEVEL_MIN,
  TRTSERVER_TRACE_LEVEL_MAX
} TRTSERVER_Trace_Level;

// Trace activities
typedef enum trtserver_traceactivity_enum {
  TRTSERVER_TRACE_REQUEST_START,
  TRTSERVER_TRACE_QUEUE_START,
  TRTSERVER_TRACE_COMPUTE_START,
  TRTSERVER_TRACE_COMPUTE_INPUT_END,
  TRTSERVER_TRACE_COMPUTE_OUTPUT_START,
  TRTSERVER_TRACE_COMPUTE_END,
  TRTSERVER_TRACE_REQUEST_END
} TRTSERVER_Trace_Activity;

typedef void (*TRTSERVER_TraceActivityFn_t)(
    TRTSERVER_Trace* trace, TRTSERVER_Trace_Activity activity,
    uint64_t timestamp_ns, void* userp);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_TraceNew(
    TRTSERVER_Trace** trace, TRTSERVER_Trace_Level level,
    TRTSERVER_TraceActivityFn_t activity_fn, void* activity_userp);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_TraceDelete(TRTSERVER_Trace* trace);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_TraceModelName(
    TRTSERVER_Trace* trace, const char** model_name);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_TraceModelVersion(
    TRTSERVER_Trace* trace, int64_t* model_version);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_TraceId(
    TRTSERVER_Trace* trace, int64_t* id);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_TraceParentId(
    TRTSERVER_Trace* trace, int64_t* parent_id);


typedef void (*TRTSERVER_TraceManagerCreateTraceFn_t)(
    TRTSERVER_Trace** trace, const char* model_name, int64_t version,
    void* userp);

typedef void (*TRTSERVER_TraceManagerReleaseTraceFn_t)(
    TRTSERVER_Trace* trace, void* activity_userp, void* userp);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_TraceManagerNew(
    TRTSERVER_TraceManager** trace_manager,
    TRTSERVER_TraceManagerCreateTraceFn_t create_fn,
    TRTSERVER_TraceManagerReleaseTraceFn_t release_fn, void* userp);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_TraceManagerDelete(
    TRTSERVER_TraceManager* trace_manager);


typedef enum trtserver_requestoptionsflag_enum {
  TRTSERVER_REQUEST_FLAG_NONE = 0,
  TRTSERVER_REQUEST_FLAG_SEQUENCE_START = 1,
  TRTSERVER_REQUEST_FLAG_SEQUENCE_END = 2
} TRTSERVER_Request_Options_Flag;

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_InferenceRequestOptionsNew(
    TRTSERVER_InferenceRequestOptions** request_options, const char* model_name,
    int64_t model_version);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_InferenceRequestOptionsSetId(
    TRTSERVER_InferenceRequestOptions* request_options, uint64_t id);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_InferenceRequestOptionsSetFlags(
    TRTSERVER_InferenceRequestOptions* request_options, uint32_t flags);

TRTSERVER_EXPORT TRTSERVER_Error*
TRTSERVER_InferenceRequestOptionsSetCorrelationId(
    TRTSERVER_InferenceRequestOptions* request_options,
    uint64_t correlation_id);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_InferenceRequestOptionsSetBatchSize(
    TRTSERVER_InferenceRequestOptions* request_options, uint32_t batch_size);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_InferenceRequestOptionsAddInput(
    TRTSERVER_InferenceRequestOptions* request_options, const char* input_name,
    const int64_t* dims, uint64_t dim_count, uint64_t batch_byte_size);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_InferenceRequestOptionsAddOutput(
    TRTSERVER_InferenceRequestOptions* request_options,
    const char* output_name);

TRTSERVER_EXPORT TRTSERVER_Error*
TRTSERVER_InferenceRequestOptionsAddClassificationOutput(
    TRTSERVER_InferenceRequestOptions* request_options, const char* output_name,
    uint32_t count);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_InferenceRequestOptionsDelete(
    TRTSERVER_InferenceRequestOptions* request_options);


TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_InferenceRequestProviderNew(
    TRTSERVER_InferenceRequestProvider** request_provider,
    TRTSERVER_Server* server, const char* model_name, int64_t model_version,
    const char* request_header_base, size_t request_header_byte_size);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_InferenceRequestProviderNewV2(
    TRTSERVER_InferenceRequestProvider** request_provider,
    TRTSERVER_Server* server,
    TRTSERVER_InferenceRequestOptions* request_options);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_InferenceRequestProviderDelete(
    TRTSERVER_InferenceRequestProvider* request_provider);

TRTSERVER_EXPORT TRTSERVER_Error*
TRTSERVER_InferenceRequestProviderInputBatchByteSize(
    TRTSERVER_InferenceRequestProvider* request_provider, const char* name,
    uint64_t* byte_size);

TRTSERVER_EXPORT TRTSERVER_Error*
TRTSERVER_InferenceRequestProviderSetInputData(
    TRTSERVER_InferenceRequestProvider* request_provider, const char* name,
    const void* base, size_t byte_size, TRTSERVER_Memory_Type memory_type,
    int64_t memory_type_id);


TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_InferenceResponseDelete(
    TRTSERVER_InferenceResponse* response);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_InferenceResponseStatus(
    TRTSERVER_InferenceResponse* response);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_InferenceResponseHeader(
    TRTSERVER_InferenceResponse* response, TRTSERVER_Protobuf** header);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_InferenceResponseOutputData(
    TRTSERVER_InferenceResponse* response, const char* name, const void** base,
    size_t* byte_size, TRTSERVER_Memory_Type* memory_type,
    int64_t* memory_type_id);


typedef enum trtserver_modelcontrolmode_enum {
  TRTSERVER_MODEL_CONTROL_NONE,
  TRTSERVER_MODEL_CONTROL_POLL,
  TRTSERVER_MODEL_CONTROL_EXPLICIT
} TRTSERVER_Model_Control_Mode;

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerOptionsNew(
    TRTSERVER_ServerOptions** options);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerOptionsDelete(
    TRTSERVER_ServerOptions* options);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerOptionsSetServerId(
    TRTSERVER_ServerOptions* options, const char* server_id);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerOptionsSetModelRepositoryPath(
    TRTSERVER_ServerOptions* options, const char* model_repository_path);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerOptionsSetModelControlMode(
    TRTSERVER_ServerOptions* options, TRTSERVER_Model_Control_Mode mode);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerOptionsSetStartupModel(
    TRTSERVER_ServerOptions* options, const char* model_name);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerOptionsSetStrictModelConfig(
    TRTSERVER_ServerOptions* options, bool strict);

TRTSERVER_EXPORT TRTSERVER_Error*
TRTSERVER_ServerOptionsSetPinnedMemoryPoolByteSize(
    TRTSERVER_ServerOptions* options, uint64_t size);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerOptionsSetExitOnError(
    TRTSERVER_ServerOptions* options, bool exit);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerOptionsSetStrictReadiness(
    TRTSERVER_ServerOptions* options, bool strict);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerOptionsSetExitTimeout(
    TRTSERVER_ServerOptions* options, unsigned int timeout);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerOptionsSetLogInfo(
    TRTSERVER_ServerOptions* options, bool log);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerOptionsSetLogWarn(
    TRTSERVER_ServerOptions* options, bool log);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerOptionsSetLogError(
    TRTSERVER_ServerOptions* options, bool log);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerOptionsSetLogVerbose(
    TRTSERVER_ServerOptions* options, int level);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerOptionsSetMetrics(
    TRTSERVER_ServerOptions* options, bool metrics);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerOptionsSetGpuMetrics(
    TRTSERVER_ServerOptions* options, bool gpu_metrics);

TRTSERVER_EXPORT TRTSERVER_Error*
TRTSERVER_ServerOptionsSetTensorFlowSoftPlacement(
    TRTSERVER_ServerOptions* options, bool soft_placement);

TRTSERVER_EXPORT TRTSERVER_Error*
TRTSERVER_ServerOptionsSetTensorFlowGpuMemoryFraction(
    TRTSERVER_ServerOptions* options, float fraction);

TRTSERVER_EXPORT TRTSERVER_Error*
TRTSERVER_ServerOptionsAddTensorFlowVgpuMemoryLimits(
    TRTSERVER_ServerOptions* options, int gpu_device, int num_vgpus,
    uint64_t per_vgpu_memory_mbytes);


TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerNew(
    TRTSERVER_Server** server, TRTSERVER_ServerOptions* options);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerDelete(
    TRTSERVER_Server* server);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerStop(
    TRTSERVER_Server* server);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerId(
    TRTSERVER_Server* server, const char** id);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerPollModelRepository(
    TRTSERVER_Server* server);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerIsLive(
    TRTSERVER_Server* server, bool* live);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerIsReady(
    TRTSERVER_Server* server, bool* ready);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerStatus(
    TRTSERVER_Server* server, TRTSERVER_Protobuf** status);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerModelStatus(
    TRTSERVER_Server* server, const char* model_name,
    TRTSERVER_Protobuf** status);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerModelRepositoryIndex(
    TRTSERVER_Server* server, TRTSERVER_Protobuf** repository_index);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerLoadModel(
    TRTSERVER_Server* server, const char* model_name);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerUnloadModel(
    TRTSERVER_Server* server, const char* model_name);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerRegisterSharedMemory(
    TRTSERVER_Server* server, TRTSERVER_SharedMemoryBlock* shared_memory_block);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerUnregisterSharedMemory(
    TRTSERVER_Server* server, TRTSERVER_SharedMemoryBlock* shared_memory_block);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerUnregisterAllSharedMemory(
    TRTSERVER_Server* server);


TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerSharedMemoryAddress(
    TRTSERVER_Server* server, TRTSERVER_SharedMemoryBlock* shared_memory_block,
    size_t offset, size_t byte_size, void** base);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerSharedMemoryStatus(
    TRTSERVER_Server* server, TRTSERVER_Protobuf** status);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerMetrics(
    TRTSERVER_Server* server, TRTSERVER_Metrics** metrics);

typedef void (*TRTSERVER_InferenceCompleteFn_t)(
    TRTSERVER_Server* server, TRTSERVER_TraceManager* trace_manager,
    TRTSERVER_InferenceResponse* response, void* userp);

TRTSERVER_EXPORT TRTSERVER_Error* TRTSERVER_ServerInferAsync(
    TRTSERVER_Server* server, TRTSERVER_TraceManager* trace_manager,
    TRTSERVER_InferenceRequestProvider* request_provider,
    TRTSERVER_ResponseAllocator* response_allocator,
    void* response_allocator_userp, TRTSERVER_InferenceCompleteFn_t complete_fn,
    void* complete_userp);

#ifdef __cplusplus
}
#endif