Program Listing for File request.h

Return to documentation for file (src/clients/c++/library/request.h)

// Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once


#ifdef _MSC_VER
#ifdef DLL_EXPORTING
#define DECLSPEC __declspec(dllexport)
#else
#define DECLSPEC __declspec(dllimport)
#endif
#else
#define DECLSPEC
#endif

#cmakedefine TRTIS_CLIENT_HEADER_FLAT 1

#include <functional>
#include <string>
#include <vector>
#ifdef TRTIS_CLIENT_HEADER_FLAT
#include "api.pb.h"
#include "request_status.pb.h"
#include "server_status.pb.h"
#else
#include "src/core/api.pb.h"
#include "src/core/request_status.pb.h"
#include "src/core/server_status.pb.h"
#endif

#ifdef TRTIS_ENABLE_GPU
#include <cuda_runtime_api.h>
#else
struct cudaIpcMemHandle_t{};
#endif  // TRTIS_ENABLE_GPU

namespace nvidia { namespace inferenceserver {

// From model_config.h, repeat here to avoid introducing dependence on
// that header.
using CorrelationID = uint64_t;
using DimsList = ::google::protobuf::RepeatedField<::google::protobuf::int64>;

}}  // namespace nvidia::inferenceserver

namespace nvidia { namespace inferenceserver { namespace client {

//==============================================================================
class DECLSPEC Error {
 public:
  explicit Error(const RequestStatus& status);

  explicit Error(RequestStatusCode code = RequestStatusCode::SUCCESS);

  explicit Error(RequestStatusCode code, const std::string& msg);

  RequestStatusCode Code() const { return code_; }

  const std::string& Message() const { return msg_; }

  const std::string& ServerId() const { return server_id_; }

  uint64_t RequestId() const { return request_id_; }

  bool IsOk() const { return code_ == RequestStatusCode::SUCCESS; }

  static const Error Success;

 private:
  friend std::ostream& operator<<(std::ostream&, const Error&);
  RequestStatusCode code_;
  std::string msg_;
  std::string server_id_;
  uint64_t request_id_;
};

//==============================================================================
class ServerHealthContext {
 public:
  virtual ~ServerHealthContext() = 0;

  virtual Error GetReady(bool* ready) = 0;

  virtual Error GetLive(bool* live) = 0;
};

//==============================================================================
class ServerStatusContext {
 public:
  virtual ~ServerStatusContext() = 0;

  virtual Error GetServerStatus(ServerStatus* status) = 0;
};

//==============================================================================
class ModelRepositoryContext {
 public:
  virtual ~ModelRepositoryContext() = 0;

  virtual Error GetModelRepositoryIndex(ModelRepositoryIndex* index) = 0;
};

//==============================================================================
class InferContext {
 public:
  //==============
  class Input {
   public:
    virtual ~Input() = 0;

    virtual const std::string& Name() const = 0;

    virtual int64_t ByteSize() const = 0;

    virtual size_t TotalByteSize() const = 0;

    virtual DataType DType() const = 0;

    virtual bool IsShapeTensor() const = 0;

    virtual ModelInput::Format Format() const = 0;

    virtual const DimsList& Dims() const = 0;

    virtual Error Reset() = 0;

    virtual const std::vector<int64_t>& Shape() const = 0;

    virtual Error SetShape(const std::vector<int64_t>& dims) = 0;

    virtual Error SetRaw(const uint8_t* input, size_t input_byte_size) = 0;

    virtual Error SetRaw(const std::vector<uint8_t>& input) = 0;

    virtual Error SetFromString(const std::vector<std::string>& input) = 0;

    virtual Error SetSharedMemory(
        const std::string& name, size_t offset, size_t byte_size) = 0;
  };

  //==============
  class Output {
   public:
    virtual ~Output() = 0;

    virtual const std::string& Name() const = 0;

    virtual DataType DType() const = 0;

    virtual bool IsShapeTensor() const = 0;

    virtual const DimsList& Dims() const = 0;
  };

  //==============
  class Result {
   public:
    virtual ~Result() = 0;

    enum ResultFormat {
      RAW = 0,

      CLASS = 1
    };

    virtual const std::string& ModelName() const = 0;

    virtual int64_t ModelVersion() const = 0;

    virtual const std::shared_ptr<Output> GetOutput() const = 0;

    virtual Error GetRawShape(std::vector<int64_t>* shape) const = 0;

    virtual Error GetRaw(
        size_t batch_idx, const std::vector<uint8_t>** buf) const = 0;

    virtual Error GetRaw(
        size_t batch_idx, const uint8_t** buf, size_t* byte_size) const = 0;

    virtual Error GetRawAtCursor(
        size_t batch_idx, const uint8_t** buf, size_t adv_byte_size) = 0;

    template <typename T>
    Error GetRawAtCursor(size_t batch_idx, T* out);

    struct ClassResult {
      size_t idx;
      float value;
      std::string label;
    };

    virtual Error GetClassCount(size_t batch_idx, size_t* cnt) const = 0;

    virtual Error GetClassAtCursor(size_t batch_idx, ClassResult* result) = 0;

    virtual Error ResetCursors() = 0;

    virtual Error ResetCursor(size_t batch_idx) = 0;
  };

  //==============
  class Options {
   public:
    virtual ~Options() = 0;

    static Error Create(std::unique_ptr<Options>* options);

    virtual bool Flag(InferRequestHeader::Flag flag) const = 0;

    virtual void SetFlag(InferRequestHeader::Flag flag, bool value) = 0;

    virtual uint32_t Flags() const = 0;

    virtual void SetFlags(uint32_t flags) = 0;

    virtual size_t BatchSize() const = 0;

    virtual void SetBatchSize(size_t batch_size) = 0;

    virtual CorrelationID CorrelationId() const = 0;

    virtual void SetCorrelationId(CorrelationID correlation_id) = 0;

    virtual Error AddRawResult(
        const std::shared_ptr<InferContext::Output>& output) = 0;

    virtual Error AddClassResult(
        const std::shared_ptr<InferContext::Output>& output, uint64_t k) = 0;

    //  until this output is ready (that is until after the Run() call(s) have
    virtual Error AddSharedMemoryResult(
        const std::shared_ptr<InferContext::Output>& output,
        const std::string& name, size_t offset, size_t byte_size) = 0;
  };

  //==============
  class Request {
   public:
    virtual ~Request() = 0;

    virtual uint64_t Id() const = 0;
  };

  //==============
  struct Stat {
    size_t completed_request_count;

    uint64_t cumulative_total_request_time_ns;

    uint64_t cumulative_send_time_ns;

    uint64_t cumulative_receive_time_ns;

    Stat()
        : completed_request_count(0), cumulative_total_request_time_ns(0),
          cumulative_send_time_ns(0), cumulative_receive_time_ns(0)
    {
    }
  };

 public:
  using ResultMap = std::map<std::string, std::unique_ptr<Result>>;
  using OnCompleteFn =
      std::function<void(InferContext*, const std::shared_ptr<Request>&)>;

  virtual ~InferContext() = 0;

  virtual const std::string& ModelName() const = 0;

  virtual int64_t ModelVersion() const = 0;

  virtual uint64_t MaxBatchSize() const = 0;

  virtual CorrelationID CorrelationId() const = 0;

  virtual const std::vector<std::shared_ptr<Input>>& Inputs() const = 0;

  virtual const std::vector<std::shared_ptr<Output>>& Outputs() const = 0;

  virtual Error GetInput(
      const std::string& name, std::shared_ptr<Input>* input) const = 0;

  virtual Error GetOutput(
      const std::string& name, std::shared_ptr<Output>* output) const = 0;

  virtual Error SetRunOptions(const Options& options) = 0;

  virtual int64_t ByteSize(const DimsList& shape, DataType dtype) const = 0;

  virtual Error GetStat(Stat* stat) const = 0;

  virtual Error Run(ResultMap* results) = 0;

  virtual Error AsyncRun(OnCompleteFn callback) = 0;

  virtual Error GetAsyncRunResults(
      const std::shared_ptr<Request>& async_request,
      ResultMap* results) = 0;
};

//==============================================================================
class ModelControlContext {
 public:
  virtual ~ModelControlContext() = 0;

  virtual Error Load(const std::string& model_name) = 0;

  virtual Error Unload(const std::string& model_name) = 0;
};

//==============================================================================
class SharedMemoryControlContext {
 public:
  virtual ~SharedMemoryControlContext() = 0;

  virtual Error RegisterSharedMemory(
      const std::string& name, const std::string& shm_key, size_t offset,
      size_t byte_size) = 0;

  virtual Error RegisterCudaSharedMemory(
      const std::string& name, const cudaIpcMemHandle_t& cuda_shm_handle,
      size_t byte_size, int device_id) = 0;

  virtual Error UnregisterSharedMemory(const std::string& name) = 0;

  virtual Error UnregisterAllSharedMemory() = 0;

  virtual Error GetSharedMemoryStatus(SharedMemoryStatus* status) = 0;
};

//==============================================================================

std::ostream& operator<<(std::ostream&, const Error&);

template <>
Error InferContext::Result::GetRawAtCursor(size_t batch_idx, std::string* out);

template <typename T>
Error
InferContext::Result::GetRawAtCursor(size_t batch_idx, T* out)
{
  const uint8_t* buf;
  Error err = GetRawAtCursor(batch_idx, &buf, sizeof(T));
  if (!err.IsOk()) {
    return err;
  }

  std::copy(buf, buf + sizeof(T), reinterpret_cast<uint8_t*>(out));
  return Error::Success;
}

}}}  // namespace nvidia::inferenceserver::client