nemo_export.multimodal.build#

Module Contents#

Functions#

build_trtllm_engine

Build TRTLLM engine by nemo export.

build_mllama_trtllm_engine

Build mllama TRTLLM engine from HF.

export_visual_wrapper_onnx

Export visual wrapper to ONNX.

build_trt_engine

Build TRT engine from onnx.

build_neva_engine

build_video_neva_engine

Build video neva visual engine.

build_mllama_visual_engine

Build mllama visual engine.

build_visual_engine

Build visual engine.

extract_lora_ckpt

Extrace lora from checkpoint.

build_mllama_engine

Build mllama engine.

API#

nemo_export.multimodal.build.build_trtllm_engine(
model_dir: str,
visual_checkpoint_path: str,
llm_checkpoint_path: str = None,
model_type: str = 'neva',
llm_model_type: str = 'llama',
tensor_parallelism_size: int = 1,
max_input_len: int = 256,
max_output_len: int = 256,
max_batch_size: int = 1,
max_multimodal_len: int = 1024,
dtype: str = 'bfloat16',
use_lora_plugin: str = None,
lora_target_modules: List[str] = None,
max_lora_rank: int = 64,
lora_ckpt_list: List[str] = None,
)#

Build TRTLLM engine by nemo export.

nemo_export.multimodal.build.build_mllama_trtllm_engine(
model_dir: str,
hf_model_path: str,
tensor_parallelism_size: int = 1,
max_input_len: int = 256,
max_output_len: int = 256,
max_batch_size: int = 1,
max_multimodal_len: int = 1024,
dtype: str = 'bfloat16',
use_lora_plugin: str = None,
lora_target_modules: List[str] = None,
max_lora_rank: int = 64,
lora_ckpt_list: List[str] = None,
)#

Build mllama TRTLLM engine from HF.

nemo_export.multimodal.build.export_visual_wrapper_onnx(
visual_wrapper,
input,
output_dir,
input_names=['input'],
dynamic_axes={'input': {0: 'batch'}},
)#

Export visual wrapper to ONNX.

nemo_export.multimodal.build.build_trt_engine(
model_type,
input_sizes,
output_dir,
vision_max_batch_size,
dtype=torch.bfloat16,
image_size=None,
num_frames=None,
nemo_config=None,
part_name='visual_encoder',
)#

Build TRT engine from onnx.

nemo_export.multimodal.build.build_neva_engine(
model_type: str,
model_dir: str,
visual_checkpoint_path: str,
vision_max_batch_size: int = 1,
)#
nemo_export.multimodal.build.build_video_neva_engine(
model_dir: str,
visual_checkpoint_path: str,
vision_max_batch_size: int = 1,
)#

Build video neva visual engine.

nemo_export.multimodal.build.build_mllama_visual_engine(
model_dir: str,
hf_model_path: str,
processor_name: str = 'meta-llama/Llama-3.2-11B-Vision-Instruct',
vision_max_batch_size: int = 1,
)#

Build mllama visual engine.

nemo_export.multimodal.build.build_visual_engine(
model_dir: str,
visual_checkpoint_path: str,
model_type: str = 'neva',
vision_max_batch_size: int = 1,
)#

Build visual engine.

nemo_export.multimodal.build.extract_lora_ckpt(lora_ckpt: str, output_dir: str)#

Extrace lora from checkpoint.

nemo_export.multimodal.build.build_mllama_engine(
model_dir: str,
checkpoint_path: str,
processor_name: str = 'meta-llama/Llama-3.2-11B-Vision-Instruct',
vision_max_batch_size: int = 1,
tensor_parallelism_size: int = 1,
max_input_len: int = 256,
max_output_len: int = 256,
max_batch_size: int = 1,
max_multimodal_len: int = 1024,
dtype: str = 'bfloat16',
)#

Build mllama engine.