nemo_export.tensorrt_mm_exporter#

Module Contents#

Classes#

TensorRTMMExporter

TensorRT multimodal exporter functionality has been removed.

API#

class nemo_export.tensorrt_mm_exporter.TensorRTMMExporter(
model_dir: str,
load_model: bool = True,
modality: str = 'vision',
)#

Bases: nemo_deploy.ITritonDeployable

TensorRT multimodal exporter functionality has been removed.

This class is kept for backward compatibility but all methods will raise NotImplementedError.

Initialization

abstractmethod export(
visual_checkpoint_path: str,
llm_checkpoint_path: str = None,
model_type: str = 'neva',
llm_model_type: str = 'llama',
processor_name: str = None,
tensor_parallel_size: int = 1,
max_input_len: int = 4096,
max_output_len: int = 256,
max_batch_size: int = 1,
vision_max_batch_size: int = 1,
max_multimodal_len: int = 3072,
dtype: str = 'bfloat16',
delete_existing_files: bool = True,
load_model: bool = True,
use_lora_plugin: str = None,
lora_target_modules: List[str] = None,
lora_checkpoint_path: str = None,
max_lora_rank: int = 64,
)#

Export multimodal models to TRTLLM.

abstractmethod forward(
input_text: str,
input_media: str,
batch_size: int = 1,
max_output_len: int = 30,
top_k: int = 1,
top_p: float = 0.0,
temperature: float = 1.0,
repetition_penalty: float = 1.0,
num_beams: int = 1,
lora_uids: List[str] = None,
)#

Run forward with loaded TRTLLM engine.

abstractmethod get_input_media_tensors()#

Get input media tensors.

abstract property get_triton_input#
abstract property get_triton_output#
abstractmethod triton_infer_fn(**inputs: numpy.ndarray)#

Triton inference function.

abstractmethod _load()#