nemo_export.multimodal.run#

Module Contents#

Classes#

Functions#

API#

nemo_export.multimodal.run.trt_dtype_to_torch(dtype)[source]#
class nemo_export.multimodal.run.MultimodalModelRunner(
visual_engine_dir,
llm_engine_dir,
modality='vision',
)[source]#

Initialization

init_tokenizer(llm_engine_dir)[source]#
init_image_encoder(visual_engine_dir)[source]#
init_vision_preprocessor(visual_encoder_dir)[source]#
init_llm(llm_engine_dir)[source]#
video_preprocess(video_path)[source]#
insert_tokens_by_index(input_ids, num_frames)[source]#
preprocess(
warmup,
pre_prompt,
post_prompt,
image,
attention_mask,
batch_size,
)[source]#
static tokenizer_image_token(
batch_size,
prompt,
tokenizer,
image_token_index=-200,
)[source]#
split_prompt_by_images(tensor)[source]#
generate(
pre_prompt,
post_prompt,
image,
decoder_input_ids,
max_new_tokens,
attention_mask,
warmup,
batch_size,
top_k,
top_p,
temperature,
repetition_penalty,
num_beams,
lora_uids=None,
)[source]#
get_visual_features(image, attention_mask)[source]#
setup_fake_prompts(
visual_features,
pre_input_ids,
post_input_ids,
input_lengths,
)[source]#
setup_fake_prompts_vila(
batch_size,
visual_features,
split_input_ids,
input_lengths,
)[source]#
preprocess_lita_visual(visual_features, config)[source]#
ptuning_setup(prompt_table, input_ids, input_lengths)[source]#
expand2square_pt(images, background_color)[source]#
load_video(config, video_path, processor, num_frames=None)[source]#
preprocess_frames(frames, config, processor)[source]#
get_num_sample_frames(config, vid_len)[source]#
process_lita_video(nemo_config, video_path, image_processor)[source]#
process_image(image_file, image_processor, nemo_config, image_folder)[source]#
process_vila_img(images)[source]#
setup_inputs(input_text, raw_image, batch_size)[source]#
run(
input_text,
input_image,
max_new_tokens,
batch_size,
top_k,
top_p,
temperature,
repetition_penalty,
num_beams,
lora_uids=None,
run_profiling=False,
check_accuracy=False,
)[source]#
print_result(
input_text,
output_text,
batch_size,
num_beams,
run_profiling,
check_accuracy,
)[source]#
load_test_media(input_media)[source]#