Index _ | A | B | C | D | E | F | G | H | I | K | L | M | N | O | P | Q | R | S | T | U | V | W | Z _ __all__ (in module nemo_deploy) (in module nemo_deploy.nlp) (in module nemo_deploy.package_info) (in module nemo_deploy.service) (in module nemo_export) (in module nemo_export.package_info) (in module nemo_export.trt_llm.qnemo) (in module nemo_export.utils) __contains__() (nemo_export.tarutils.ZarrPathStore method) __del__() (nemo_export.tarutils.TarPath method) __delitem__() (nemo_export.tarutils.ZarrPathStore method) __enter__() (nemo_export.tarutils.TarPath method) __exit__() (nemo_export.tarutils.TarPath method) __getitem__() (nemo_export.tarutils.ZarrPathStore method) __iter__() (nemo_export.tarutils.ZarrPathStore method) __len__() (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer method) (nemo_export.tarutils.ZarrPathStore method) __setitem__() (nemo_export.tarutils.ZarrPathStore method) __str__() (nemo_export.tarutils.TarPath method) __truediv__() (nemo_export.tarutils.TarPath method) _add_request_to_engine() (nemo_export.vllm_exporter.vLLMExporter method) _add_triton_request_to_engine() (nemo_export.vllm_exporter.vLLMExporter method) _change_paths_to_absolute_paths() (nemo_export.vllm.model_config.NemoModelConfig static method) _export_to_nim_format() (nemo_export.tensorrt_llm.TensorRTLLM method) _export_to_onnx() (nemo_export.onnx_llm_exporter.OnnxLLMExporter method) _forward() (in module nemo_export.trt_llm.tensorrt_llm_run) _forward_regular() (nemo_export.vllm_exporter.vLLMExporter method) _forward_streaming() (nemo_export.vllm_exporter.vLLMExporter method) _helper_fun() (in module nemo_deploy.service.fastapi_interface_to_pytriton) _infer_fn() (nemo_deploy.nlp.megatronllm_deployable.MegatronLLMDeployableNemo2 method) (nemo_export.tensorrt_llm.TensorRTLLM method) _infer_fn_common() (nemo_deploy.nlp.hf_deployable.HuggingFaceLLMDeploy method) _is_model_deployable() (nemo_deploy.deploy_base.DeployBase method) _load() (in module nemo_export.trt_llm.tensorrt_llm_run) (nemo_deploy.nlp.hf_deployable.HuggingFaceLLMDeploy method) (nemo_export.tensorrt_llm.TensorRTLLM method) (nemo_export.tensorrt_mm_exporter.TensorRTMMExporter method) _load_config_file() (nemo_export.tensorrt_llm.TensorRTLLM method) _load_hf_arguments() (nemo_export.vllm.model_config.NemoModelConfig method) _load_hf_model() (nemo_export.onnx_llm_exporter.OnnxLLMExporter method) _load_nemo_checkpoint_state() (nemo_export.vllm.model_loader.NemoModelLoader static method) _load_runtime() (nemo_export.onnx_llm_exporter.OnnxLLMExporter method) _mock_import() (in module nemo_export.utils._mock_import) _MODEL_CONVERTERS (in module nemo_export.vllm.model_converters) _override_layer_precision_to_fp32() (nemo_export.onnx_llm_exporter.OnnxLLMExporter method) _override_layernorm_precision_to_fp32() (nemo_export.onnx_llm_exporter.OnnxLLMExporter method) _override_layers_to_fp32() (nemo_export.onnx_llm_exporter.OnnxLLMExporter method) _pad_logits() (nemo_export.tensorrt_llm.TensorRTLLM method) _prepare_lora_checkpoints() (nemo_export.vllm_exporter.vLLMExporter method) _setup_unique_distributed_parameters() (nemo_deploy.nlp.hf_deployable_ray.HFRayDeployable method) _standardize_nemo2_naming() (nemo_export.vllm.model_loader.NemoModelLoader static method) _triton_request_timeout (nemo_deploy.service.rest_model_api.TritonSettings attribute) _triton_service_ip (nemo_deploy.service.fastapi_interface_to_pytriton.TritonSettings attribute) (nemo_deploy.service.rest_model_api.TritonSettings attribute) _triton_service_port (nemo_deploy.service.fastapi_interface_to_pytriton.TritonSettings attribute) (nemo_deploy.service.rest_model_api.TritonSettings attribute) A add_bos (nemo_export.trt_llm.tensorrt_llm_run.TensorrtLLMHostContext attribute) add_lora_models() (nemo_export.vllm_hf_exporter.vLLMHFExporter method) add_special_tokens() (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer method) additional_special_tokens_ids (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer property) app (in module nemo_deploy.nlp.hf_deployable_ray) (in module nemo_deploy.nlp.megatronllm_deployable_ray) (in module nemo_deploy.service.fastapi_interface_to_pytriton) (in module nemo_deploy.service.rest_model_api) (in module nemo_export.tensorrt_llm_deployable_ray) apply_chat_template() (nemo_deploy.nlp.megatronllm_deployable.MegatronLLMDeployableNemo2 method) B BaseRequest (class in nemo_deploy.service.fastapi_interface_to_pytriton) batch (in module nemo_export.onnx_llm_exporter) (in module nemo_export.tensorrt_mm_exporter) (in module nemo_export.vllm_exporter) batch_decode() (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer method) (nemo_export.tiktoken_tokenizer.TiktokenTokenizer method) bos_token_id (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer property) (nemo_export.tiktoken_tokenizer.TiktokenTokenizer property) broadcast_list() (in module nemo_deploy.utils) build_mllama_engine() (in module nemo_export.multimodal.build) build_mllama_trtllm_engine() (in module nemo_export.multimodal.build) build_mllama_visual_engine() (in module nemo_export.multimodal.build) build_neva_engine() (in module nemo_export.multimodal.build) build_tokenizer() (in module nemo_export.trt_llm.nemo_ckpt_loader.nemo_file) build_trt_engine() (in module nemo_export.multimodal.build) build_trtllm_engine() (in module nemo_export.multimodal.build) build_video_neva_engine() (in module nemo_export.multimodal.build) build_visual_engine() (in module nemo_export.multimodal.build) C cast_output() (in module nemo_deploy.utils) chat_completions() (nemo_deploy.nlp.hf_deployable_ray.HFRayDeployable method) (nemo_deploy.nlp.megatronllm_deployable_ray.MegatronRayDeployable method) (nemo_export.tensorrt_llm_deployable_ray.TensorRTLLMRayDeployable method) chat_completions_v1() (in module nemo_deploy.service.fastapi_interface_to_pytriton) ChatCompletionRequest (class in nemo_deploy.service.fastapi_interface_to_pytriton) check_triton_health() (in module nemo_deploy.service.fastapi_interface_to_pytriton) (in module nemo_deploy.service.rest_model_api) cls_id (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer property) CompletionRequest (class in nemo_deploy.service.fastapi_interface_to_pytriton) (class in nemo_deploy.service.rest_model_api) completions() (nemo_deploy.nlp.hf_deployable_ray.HFRayDeployable method) (nemo_deploy.nlp.megatronllm_deployable_ray.MegatronRayDeployable method) (nemo_export.tensorrt_llm_deployable_ray.TensorRTLLMRayDeployable method) completions_v1() (in module nemo_deploy.service.fastapi_interface_to_pytriton) (in module nemo_deploy.service.rest_model_api) CONFIG_NAME (in module nemo_export.trt_llm.qnemo.utils) contains_extra_states() (in module nemo_export.utils.model_loader) convert_and_store_nemo_weights() (nemo_export.vllm.model_loader.NemoModelLoader static method) convert_config() (nemo_export.vllm.model_converters.ModelConverter method) (nemo_export.vllm.model_converters.Starcoder2Converter method) convert_ids_to_tokens() (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer method) convert_lora_nemo_to_canonical() (in module nemo_export.utils.lora_converter) convert_lora_weights_to_canonical() (in module nemo_export.utils.lora_converter) convert_numpy() (in module nemo_deploy.service.fastapi_interface_to_pytriton) convert_tokens_to_string() (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer method) convert_weights() (nemo_export.vllm.model_converters.GemmaConverter method) (nemo_export.vllm.model_converters.LlamaConverter method) (nemo_export.vllm.model_converters.MixtralConverter method) (nemo_export.vllm.model_converters.ModelConverter method) (nemo_export.vllm.model_converters.Starcoder2Converter method) D decode() (nemo_export.tiktoken_tokenizer.TiktokenTokenizer method) decoder (nemo_export.trt_llm.tensorrt_llm_run.TensorrtLLMWorkerContext attribute) DEFAULT_TIKTOKEN_MAX_VOCAB (in module nemo_export.tiktoken_tokenizer) deploy() (nemo_deploy.deploy_base.DeployBase method) (nemo_deploy.deploy_pytriton.DeployPyTriton method) DeployBase (class in nemo_deploy.deploy_base) DeployPyTriton (class in nemo_deploy.deploy_pytriton) DeployRay (class in nemo_deploy.deploy_ray) determine_quantization_settings() (in module nemo_export.trt_llm.utils) dict_to_str() (in module nemo_deploy.nlp.megatronllm_deployable) (in module nemo_deploy.service.fastapi_interface_to_pytriton) download_model() (nemo_export.vllm.model_loader.NemoModelLoader method) E echo (nemo_deploy.service.fastapi_interface_to_pytriton.CompletionRequest attribute) encode() (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer method) (nemo_export.tiktoken_tokenizer.TiktokenTokenizer method) eos_token_id (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer property) (nemo_export.tiktoken_tokenizer.TiktokenTokenizer property) executor (nemo_export.trt_llm.tensorrt_llm_run.TensorrtLLMHostContext attribute) exists() (nemo_export.tarutils.TarPath method) expand2square_pt() (nemo_export.multimodal.run.MultimodalModelRunner method) export() (nemo_export.onnx_llm_exporter.OnnxLLMExporter method) (nemo_export.tensorrt_llm.TensorRTLLM method) (nemo_export.tensorrt_mm_exporter.TensorRTMMExporter method) (nemo_export.vllm_exporter.vLLMExporter method) (nemo_export.vllm_hf_exporter.vLLMHFExporter method) export_hf_model() (nemo_export.tensorrt_llm.TensorRTLLM method) export_onnx_to_trt() (nemo_export.onnx_llm_exporter.OnnxLLMExporter method) export_visual_wrapper_onnx() (in module nemo_export.multimodal.build) EXTRA_STATE (in module nemo_export.trt_llm.nemo_ckpt_loader.nemo_file) extract_lora_ckpt() (in module nemo_export.multimodal.build) F find_available_port() (in module nemo_deploy.ray_utils) forward() (in module nemo_export.trt_llm.tensorrt_llm_run) (nemo_export.onnx_llm_exporter.OnnxLLMExporter method) (nemo_export.tensorrt_llm.TensorRTLLM method) (nemo_export.tensorrt_mm_exporter.TensorRTMMExporter method) (nemo_export.vllm_exporter.vLLMExporter method) (nemo_export.vllm_hf_exporter.vLLMHFExporter method) frame_len() (nemo_deploy.multimodal.query_multimodal.NemoQueryMultimodal method) frequency_penalty (nemo_deploy.service.rest_model_api.CompletionRequest attribute) G GemmaConverter (class in nemo_export.vllm.model_converters) generate() (in module nemo_export.trt_llm.tensorrt_llm_run) (nemo_deploy.nlp.hf_deployable.HuggingFaceLLMDeploy method) (nemo_deploy.nlp.megatronllm_deployable.MegatronLLMDeployableNemo2 method) (nemo_deploy.nlp.trtllm_api_deployable.TensorRTLLMAPIDeployable method) (nemo_export.multimodal.run.MultimodalModelRunner method) generate_other_ranks() (nemo_deploy.nlp.hf_deployable.HuggingFaceLLMDeploy method) (nemo_deploy.nlp.megatronllm_deployable.MegatronLLMDeployableNemo2 method) get_added_vocab() (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer method) get_architecture() (nemo_export.vllm.model_converters.GemmaConverter method) (nemo_export.vllm.model_converters.LlamaConverter method) (nemo_export.vllm.model_converters.MixtralConverter method) (nemo_export.vllm.model_converters.ModelConverter method) (nemo_export.vllm.model_converters.Starcoder2Converter method) get_deployable() (nemo_deploy.nlp.megatronllm_deployable.MegatronLLMDeploy static method) get_example_inputs() (in module nemo_export.utils.utils) get_hf_model_dtype() (nemo_export.tensorrt_llm.TensorRTLLM method) get_hf_model_type() (nemo_export.tensorrt_llm.TensorRTLLM method) get_hidden_size (nemo_export.tensorrt_llm.TensorRTLLM property) get_input_media_tensors() (nemo_export.tensorrt_mm_exporter.TensorRTMMExporter method) get_model (nemo_export.onnx_llm_exporter.OnnxLLMExporter property) get_model_converter() (in module nemo_export.vllm.model_converters) get_model_device_type() (in module nemo_export.utils.utils) get_model_input_names (nemo_export.onnx_llm_exporter.OnnxLLMExporter property) get_model_type() (in module nemo_export.trt_llm.nemo_ckpt_loader.nemo_file) get_num_sample_frames() (nemo_export.multimodal.run.MultimodalModelRunner method) get_subsampled_frames() (nemo_deploy.multimodal.query_multimodal.NemoQueryMultimodal method) get_supported_hf_model_mapping (nemo_export.tensorrt_llm.TensorRTLLM property) get_supported_models_list (nemo_export.tensorrt_llm.TensorRTLLM property) get_tokenizer (nemo_export.onnx_llm_exporter.OnnxLLMExporter property) get_tokenizer() (in module nemo_export.trt_llm.nemo_ckpt_loader.nemo_file) get_tokenizer_from_nemo2_context() (in module nemo_export.trt_llm.nemo_ckpt_loader.nemo_file) get_transformer_config() (nemo_export.tensorrt_llm.TensorRTLLM method) get_triton_input (nemo_deploy.nlp.hf_deployable.HuggingFaceLLMDeploy property) (nemo_deploy.nlp.megatronllm_deployable.MegatronLLMDeployableNemo2 property) (nemo_deploy.nlp.trtllm_api_deployable.TensorRTLLMAPIDeployable property) (nemo_export.onnx_llm_exporter.OnnxLLMExporter property) (nemo_export.tensorrt_llm.TensorRTLLM property) (nemo_export.tensorrt_mm_exporter.TensorRTMMExporter property) (nemo_export.vllm_exporter.vLLMExporter property) (nemo_export.vllm_hf_exporter.vLLMHFExporter property) get_triton_input() (nemo_deploy.triton_deployable.ITritonDeployable method) get_triton_output (nemo_deploy.nlp.hf_deployable.HuggingFaceLLMDeploy property) (nemo_deploy.nlp.megatronllm_deployable.MegatronLLMDeployableNemo2 property) (nemo_deploy.nlp.trtllm_api_deployable.TensorRTLLMAPIDeployable property) (nemo_export.onnx_llm_exporter.OnnxLLMExporter property) (nemo_export.tensorrt_llm.TensorRTLLM property) (nemo_export.tensorrt_mm_exporter.TensorRTMMExporter property) (nemo_export.vllm_exporter.vLLMExporter property) (nemo_export.vllm_hf_exporter.vLLMHFExporter property) get_triton_output() (nemo_deploy.triton_deployable.ITritonDeployable method) get_visual_features() (nemo_export.multimodal.run.MultimodalModelRunner method) get_weights_dtype() (in module nemo_export.trt_llm.nemo_ckpt_loader.nemo_file) glob() (nemo_export.tarutils.TarPath method) H health_check() (in module nemo_deploy.service.fastapi_interface_to_pytriton) (in module nemo_deploy.service.rest_model_api) (nemo_deploy.nlp.hf_deployable_ray.HFRayDeployable method) (nemo_deploy.nlp.megatronllm_deployable_ray.MegatronRayDeployable method) (nemo_export.tensorrt_llm_deployable_ray.TensorRTLLMRayDeployable method) HFRayDeployable (class in nemo_deploy.nlp.hf_deployable_ray) HuggingFaceLLMDeploy (class in nemo_deploy.nlp.hf_deployable) I ids_to_tokens() (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer method) infer() (nemo_deploy.nlp.megatronllm_deployable_ray.ModelWorker method) init_image_encoder() (nemo_export.multimodal.run.MultimodalModelRunner method) init_llm() (nemo_export.multimodal.run.MultimodalModelRunner method) init_tokenizer() (nemo_export.multimodal.run.MultimodalModelRunner method) init_vision_preprocessor() (nemo_export.multimodal.run.MultimodalModelRunner method) insert_tokens_by_index() (nemo_export.multimodal.run.MultimodalModelRunner method) is_dir() (nemo_export.tarutils.TarPath method) is_fast (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer property) is_file() (nemo_export.tarutils.TarPath method) is_nemo2_checkpoint() (in module nemo_export.utils.utils) is_nemo_tarfile() (in module nemo_export.utils.utils) is_port_in_use() (in module nemo_deploy.ray_utils) is_qnemo_checkpoint() (in module nemo_export.trt_llm.qnemo.utils) is_rank() (in module nemo_export.trt_llm.utils) iterdir() (nemo_export.tarutils.TarPath method) ITritonDeployable (class in nemo_deploy.triton_deployable) K keys() (nemo_export.tarutils.ZarrPathStore method) L list_models() (nemo_deploy.nlp.hf_deployable_ray.HFRayDeployable method) (nemo_deploy.nlp.megatronllm_deployable_ray.MegatronRayDeployable method) (nemo_export.tensorrt_llm_deployable_ray.TensorRTLLMRayDeployable method) LlamaConverter (class in nemo_export.vllm.model_converters) load() (in module nemo_export.trt_llm.tensorrt_llm_run) load_distributed_model_weights() (in module nemo_export.trt_llm.nemo_ckpt_loader.nemo_file) load_extra_state_from_bytes() (in module nemo_export.trt_llm.nemo_ckpt_loader.nemo_file) load_model() (nemo_export.vllm.model_loader.NemoModelLoader method) load_model_weights() (in module nemo_export.utils.model_loader) load_nemo_config() (in module nemo_export.trt_llm.nemo_ckpt_loader.nemo_file) load_nemo_model() (in module nemo_export.trt_llm.nemo_ckpt_loader.nemo_file) load_sharded_metadata_torch_dist() (in module nemo_export.utils.model_loader) load_sharded_metadata_zarr() (in module nemo_export.utils.model_loader) load_sharded_pickle_extra_state_scale() (in module nemo_export.utils.model_loader) load_test_media() (nemo_export.multimodal.run.MultimodalModelRunner method) load_video() (nemo_export.multimodal.run.MultimodalModelRunner method) LOGGER (in module nemo_deploy.deploy_base) (in module nemo_deploy.deploy_pytriton) (in module nemo_deploy.deploy_ray) (in module nemo_deploy.nlp.hf_deployable) (in module nemo_deploy.nlp.hf_deployable_ray) (in module nemo_deploy.nlp.megatronllm_deployable) (in module nemo_deploy.nlp.megatronllm_deployable_ray) (in module nemo_deploy.nlp.trtllm_api_deployable) (in module nemo_export.tarutils) (in module nemo_export.tensorrt_llm) (in module nemo_export.tensorrt_llm_deployable_ray) (in module nemo_export.tensorrt_mm_exporter) (in module nemo_export.trt_llm.nemo_ckpt_loader.nemo_file) (in module nemo_export.trt_llm.tensorrt_llm_run) (in module nemo_export.utils._mock_import) (in module nemo_export.utils.model_loader) (in module nemo_export.vllm.model_loader) (in module nemo_export.vllm_exporter) logprobs (nemo_deploy.service.fastapi_interface_to_pytriton.CompletionRequest attribute) lora_manager (nemo_export.trt_llm.tensorrt_llm_run.TensorrtLLMWorkerContext attribute) M mask_id (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer property) max_batch_size (nemo_export.trt_llm.tensorrt_llm_run.TensorrtLLMHostContext attribute) (nemo_export.trt_llm.tensorrt_llm_run.TensorrtLLMWorkerContext attribute) max_input_len (nemo_export.trt_llm.tensorrt_llm_run.TensorrtLLMHostContext attribute) (nemo_export.trt_llm.tensorrt_llm_run.TensorrtLLMWorkerContext attribute) max_tokens (nemo_deploy.service.fastapi_interface_to_pytriton.BaseRequest attribute) (nemo_deploy.service.rest_model_api.CompletionRequest attribute) MegatronLLMDeploy (class in nemo_deploy.nlp.megatronllm_deployable) MegatronLLMDeployableNemo2 (class in nemo_deploy.nlp.megatronllm_deployable) MegatronRayDeployable (class in nemo_deploy.nlp.megatronllm_deployable_ray) messages (nemo_deploy.service.fastapi_interface_to_pytriton.ChatCompletionRequest attribute) MixtralConverter (class in nemo_export.vllm.model_converters) model (nemo_deploy.service.fastapi_interface_to_pytriton.BaseRequest attribute) (nemo_deploy.service.rest_model_api.CompletionRequest attribute) ModelConverter (class in nemo_export.vllm.model_converters) ModelWorker (class in nemo_deploy.nlp.megatronllm_deployable_ray) module nemo_deploy nemo_deploy.deploy_base nemo_deploy.deploy_pytriton nemo_deploy.deploy_ray nemo_deploy.multimodal nemo_deploy.multimodal.query_multimodal nemo_deploy.nlp nemo_deploy.nlp.hf_deployable nemo_deploy.nlp.hf_deployable_ray nemo_deploy.nlp.megatronllm_deployable nemo_deploy.nlp.megatronllm_deployable_ray nemo_deploy.nlp.query_llm nemo_deploy.nlp.trtllm_api_deployable nemo_deploy.package_info nemo_deploy.ray_utils nemo_deploy.service nemo_deploy.service.fastapi_interface_to_pytriton nemo_deploy.service.rest_model_api nemo_deploy.triton_deployable nemo_deploy.utils nemo_export nemo_export.multimodal nemo_export.multimodal.build nemo_export.multimodal.run nemo_export.onnx_llm_exporter nemo_export.package_info nemo_export.sentencepiece_tokenizer nemo_export.tarutils nemo_export.tensorrt_llm nemo_export.tensorrt_llm_deployable_ray nemo_export.tensorrt_mm_exporter nemo_export.tiktoken_tokenizer nemo_export.trt_llm nemo_export.trt_llm.nemo_ckpt_loader nemo_export.trt_llm.nemo_ckpt_loader.nemo_file nemo_export.trt_llm.qnemo nemo_export.trt_llm.qnemo.qnemo_to_tensorrt_llm nemo_export.trt_llm.qnemo.utils nemo_export.trt_llm.tensorrt_llm_run nemo_export.trt_llm.utils nemo_export.utils nemo_export.utils._mock_import nemo_export.utils.constants nemo_export.utils.lora_converter nemo_export.utils.model_loader nemo_export.utils.utils nemo_export.vllm nemo_export.vllm.model_config nemo_export.vllm.model_converters nemo_export.vllm.model_loader nemo_export.vllm_exporter nemo_export.vllm_hf_exporter MultimodalModelRunner (class in nemo_export.multimodal.run) N name (nemo_export.tarutils.TarPath property) ndarray2img() (in module nemo_deploy.utils) NEMO1 (in module nemo_deploy.utils) NEMO2 (in module nemo_deploy.utils) nemo_checkpoint_version() (in module nemo_deploy.utils) nemo_deploy module nemo_deploy.deploy_base module nemo_deploy.deploy_pytriton module nemo_deploy.deploy_ray module nemo_deploy.multimodal module nemo_deploy.multimodal.query_multimodal module nemo_deploy.nlp module nemo_deploy.nlp.hf_deployable module nemo_deploy.nlp.hf_deployable_ray module nemo_deploy.nlp.megatronllm_deployable module nemo_deploy.nlp.megatronllm_deployable_ray module nemo_deploy.nlp.query_llm module nemo_deploy.nlp.trtllm_api_deployable module nemo_deploy.package_info module nemo_deploy.ray_utils module nemo_deploy.service module nemo_deploy.service.fastapi_interface_to_pytriton module nemo_deploy.service.rest_model_api module nemo_deploy.triton_deployable module nemo_deploy.utils module nemo_export module nemo_export.multimodal module nemo_export.multimodal.build module nemo_export.multimodal.run module nemo_export.onnx_llm_exporter module nemo_export.package_info module nemo_export.sentencepiece_tokenizer module nemo_export.tarutils module nemo_export.tensorrt_llm module nemo_export.tensorrt_llm_deployable_ray module nemo_export.tensorrt_mm_exporter module nemo_export.tiktoken_tokenizer module nemo_export.trt_llm module nemo_export.trt_llm.nemo_ckpt_loader module nemo_export.trt_llm.nemo_ckpt_loader.nemo_file module nemo_export.trt_llm.qnemo module nemo_export.trt_llm.qnemo.qnemo_to_tensorrt_llm module nemo_export.trt_llm.qnemo.utils module nemo_export.trt_llm.tensorrt_llm_run module nemo_export.trt_llm.utils module nemo_export.utils module nemo_export.utils._mock_import module nemo_export.utils.constants module nemo_export.utils.lora_converter module nemo_export.utils.model_loader module nemo_export.utils.utils module nemo_export.vllm module nemo_export.vllm.model_config module nemo_export.vllm.model_converters module nemo_export.vllm.model_loader module nemo_export.vllm_exporter module nemo_export.vllm_hf_exporter module nemo_to_path() (in module nemo_export.utils.model_loader) nemo_weights_directory() (in module nemo_export.utils.model_loader) NemoModelConfig (class in nemo_export.vllm.model_config) NemoModelLoader (class in nemo_export.vllm.model_loader) NemoQueryLLM (class in nemo_deploy.nlp.query_llm) NemoQueryLLMBase (class in nemo_deploy.nlp.query_llm) NemoQueryLLMHF (class in nemo_deploy.nlp.query_llm) NemoQueryLLMPyTorch (class in nemo_deploy.nlp.query_llm) NemoQueryMultimodal (class in nemo_deploy.multimodal.query_multimodal) NemoQueryTRTLLMAPI (class in nemo_deploy.nlp.query_llm) noop_decorator() (in module nemo_export.onnx_llm_exporter) (in module nemo_export.tensorrt_mm_exporter) (in module nemo_export.vllm_exporter) O OnnxLLMExporter (class in nemo_export.onnx_llm_exporter) open() (nemo_export.tarutils.TarPath method) openai_format_response (nemo_deploy.service.rest_model_api.TritonSettings property) output_generation_logits (nemo_deploy.service.rest_model_api.TritonSettings property) P pad_id (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer property) (nemo_export.tiktoken_tokenizer.TiktokenTokenizer property) PATTERN_TIKTOKEN (in module nemo_export.tiktoken_tokenizer) prepare_directory_for_export() (in module nemo_export.utils.utils) prepare_input_tensors() (in module nemo_export.trt_llm.tensorrt_llm_run) preprocess() (nemo_export.multimodal.run.MultimodalModelRunner method) preprocess_frames() (nemo_export.multimodal.run.MultimodalModelRunner method) preprocess_lita_visual() (nemo_export.multimodal.run.MultimodalModelRunner method) print_result() (nemo_export.multimodal.run.MultimodalModelRunner method) process_image() (nemo_export.multimodal.run.MultimodalModelRunner method) process_lita_video() (nemo_export.multimodal.run.MultimodalModelRunner method) process_vila_img() (nemo_export.multimodal.run.MultimodalModelRunner method) prompt (nemo_deploy.service.fastapi_interface_to_pytriton.CompletionRequest attribute) (nemo_deploy.service.rest_model_api.CompletionRequest attribute) ptuning_setup() (nemo_export.multimodal.run.MultimodalModelRunner method) Q qnemo_to_tensorrt_llm() (in module nemo_export.trt_llm.qnemo.qnemo_to_tensorrt_llm) quantize() (nemo_export.onnx_llm_exporter.OnnxLLMExporter method) query() (nemo_deploy.multimodal.query_multimodal.NemoQueryMultimodal method) query_llm() (nemo_deploy.nlp.query_llm.NemoQueryLLM method) (nemo_deploy.nlp.query_llm.NemoQueryLLMHF method) (nemo_deploy.nlp.query_llm.NemoQueryLLMPyTorch method) (nemo_deploy.nlp.query_llm.NemoQueryTRTLLMAPI method) query_llm_async() (in module nemo_deploy.service.fastapi_interface_to_pytriton) R ray_infer_fn() (nemo_deploy.nlp.hf_deployable.HuggingFaceLLMDeploy method) (nemo_deploy.nlp.megatronllm_deployable.MegatronLLMDeployableNemo2 method) (nemo_export.tensorrt_llm.TensorRTLLM method) reformat_module_names_to_hf() (in module nemo_export.utils.lora_converter) register_model_converter() (in module nemo_export.vllm.model_converters) reload_mergeable_ranks() (in module nemo_export.tiktoken_tokenizer) relpath (nemo_export.tarutils.TarPath property) remove_eos_token() (nemo_deploy.nlp.megatronllm_deployable.MegatronLLMDeployableNemo2 method) rename_extra_states() (in module nemo_export.trt_llm.nemo_ckpt_loader.nemo_file) rename_qkv_keys() (in module nemo_export.utils.lora_converter) replace_number_add_offset() (in module nemo_export.utils.lora_converter) requires_bos_token() (nemo_export.vllm.model_converters.GemmaConverter method) (nemo_export.vllm.model_converters.LlamaConverter method) (nemo_export.vllm.model_converters.MixtralConverter method) (nemo_export.vllm.model_converters.ModelConverter method) rglob() (nemo_export.tarutils.TarPath method) run() (nemo_deploy.deploy_base.DeployBase method) (nemo_deploy.deploy_pytriton.DeployPyTriton method) (nemo_deploy.deploy_ray.DeployRay method) (nemo_export.multimodal.run.MultimodalModelRunner method) S sampling_config (nemo_export.trt_llm.tensorrt_llm_run.TensorrtLLMWorkerContext attribute) SentencePieceTokenizer (class in nemo_export.sentencepiece_tokenizer) sep_id (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer property) serve() (nemo_deploy.deploy_base.DeployBase method) (nemo_deploy.deploy_pytriton.DeployPyTriton method) set_greedy_params() (nemo_deploy.service.fastapi_interface_to_pytriton.BaseRequest method) setup_fake_prompts() (nemo_export.multimodal.run.MultimodalModelRunner method) setup_fake_prompts_vila() (nemo_export.multimodal.run.MultimodalModelRunner method) setup_inputs() (nemo_export.multimodal.run.MultimodalModelRunner method) setup_media() (nemo_deploy.multimodal.query_multimodal.NemoQueryMultimodal method) SPECIAL_TOKEN_TEMPLATE (in module nemo_export.tiktoken_tokenizer) SPECIAL_TOKENS (in module nemo_export.tiktoken_tokenizer) split_prompt_by_images() (nemo_export.multimodal.run.MultimodalModelRunner method) Starcoder2Converter (class in nemo_export.vllm.model_converters) start() (nemo_deploy.deploy_ray.DeployRay method) stop (nemo_deploy.service.rest_model_api.CompletionRequest attribute) stop() (nemo_deploy.deploy_base.DeployBase method) (nemo_deploy.deploy_pytriton.DeployPyTriton method) (nemo_deploy.deploy_ray.DeployRay method) str_list2numpy() (in module nemo_deploy.utils) str_ndarray2list() (in module nemo_deploy.utils) str_to_dict() (nemo_deploy.nlp.megatronllm_deployable.MegatronLLMDeployableNemo2 method) stream (nemo_deploy.service.rest_model_api.CompletionRequest attribute) suffix (nemo_export.tarutils.TarPath property) SUPPORTED_TASKS (in module nemo_deploy.nlp.hf_deployable) T TarFileSystemReader (class in nemo_export.utils.model_loader) tarobject (nemo_export.tarutils.TarPath property) TarPath (class in nemo_export.tarutils) temperature (nemo_deploy.service.fastapi_interface_to_pytriton.BaseRequest attribute) (nemo_deploy.service.rest_model_api.CompletionRequest attribute) tensorrt_llm_worker_context (in module nemo_export.trt_llm.tensorrt_llm_run) TensorRTLLM (class in nemo_export.tensorrt_llm) TensorRTLLMAPIDeployable (class in nemo_deploy.nlp.trtllm_api_deployable) TensorrtLLMHostContext (class in nemo_export.trt_llm.tensorrt_llm_run) TensorRTLLMRayDeployable (class in nemo_export.tensorrt_llm_deployable_ray) TensorrtLLMWorkerContext (class in nemo_export.trt_llm.tensorrt_llm_run) TensorRTMMExporter (class in nemo_export.tensorrt_mm_exporter) text_to_tokens() (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer method) TiktokenTokenizer (class in nemo_export.tiktoken_tokenizer) to_word_list_format() (in module nemo_export.trt_llm.tensorrt_llm_run) token_to_id() (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer method) tokenizer (nemo_export.trt_llm.tensorrt_llm_run.TensorrtLLMHostContext attribute) tokenizer_image_token() (nemo_export.multimodal.run.MultimodalModelRunner static method) tokens_to_ids() (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer method) tokens_to_text() (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer method) top_k (nemo_deploy.service.fastapi_interface_to_pytriton.BaseRequest attribute) (nemo_deploy.service.rest_model_api.CompletionRequest attribute) top_p (nemo_deploy.service.fastapi_interface_to_pytriton.BaseRequest attribute) (nemo_deploy.service.rest_model_api.CompletionRequest attribute) torch_dtype_from_precision() (in module nemo_export.utils.utils) triton_infer_fn() (nemo_deploy.nlp.hf_deployable.HuggingFaceLLMDeploy method) (nemo_deploy.nlp.megatronllm_deployable.MegatronLLMDeployableNemo2 method) (nemo_deploy.nlp.trtllm_api_deployable.TensorRTLLMAPIDeployable method) (nemo_deploy.triton_deployable.ITritonDeployable method) (nemo_export.onnx_llm_exporter.OnnxLLMExporter method) (nemo_export.tensorrt_llm.TensorRTLLM method) (nemo_export.tensorrt_mm_exporter.TensorRTMMExporter method) (nemo_export.vllm_exporter.vLLMExporter method) (nemo_export.vllm_hf_exporter.vLLMHFExporter method) triton_infer_fn_streaming() (nemo_export.vllm_exporter.vLLMExporter method) triton_request_timeout (nemo_deploy.service.rest_model_api.TritonSettings property) triton_service_ip (nemo_deploy.service.fastapi_interface_to_pytriton.TritonSettings property) (nemo_deploy.service.rest_model_api.TritonSettings property) triton_service_port (nemo_deploy.service.fastapi_interface_to_pytriton.TritonSettings property) (nemo_deploy.service.rest_model_api.TritonSettings property) triton_settings (in module nemo_deploy.service.fastapi_interface_to_pytriton) (in module nemo_deploy.service.rest_model_api) TritonSettings (class in nemo_deploy.service.fastapi_interface_to_pytriton) (class in nemo_deploy.service.rest_model_api) trt_dtype_to_torch() (in module nemo_export.multimodal.run) TRTLLM_ENGINE_DIR (in module nemo_export.utils.constants) try_get_generation_config() (nemo_export.vllm.model_config.NemoModelConfig method) typedict2tensor() (in module nemo_deploy.utils) U unk_id (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer property) unload() (in module nemo_export.trt_llm.tensorrt_llm_run) unload_engine() (in module nemo_export.trt_llm.tensorrt_llm_run) (nemo_export.tensorrt_llm.TensorRTLLM method) update_tokenizer_paths() (in module nemo_export.trt_llm.nemo_ckpt_loader.nemo_file) use_deploy (in module nemo_export.tensorrt_mm_exporter) use_onnxruntime (in module nemo_export.onnx_llm_exporter) use_pytriton (in module nemo_export.onnx_llm_exporter) (in module nemo_export.tensorrt_mm_exporter) (in module nemo_export.vllm_exporter) V validate_fp8_network() (in module nemo_export.utils.utils) video_preprocess() (nemo_export.multimodal.run.MultimodalModelRunner method) vLLMExporter (class in nemo_export.vllm_exporter) vLLMHFExporter (class in nemo_export.vllm_hf_exporter) vocab (nemo_export.sentencepiece_tokenizer.SentencePieceTokenizer property) W WEIGHTS_NAME (in module nemo_export.trt_llm.qnemo.utils) world_size (nemo_export.trt_llm.tensorrt_llm_run.TensorrtLLMHostContext attribute) Z ZarrPathStore (class in nemo_export.tarutils)