Source code for nemo_rl.models.megatron.community_import

# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

from transformers import AutoConfig


[docs] def import_model_from_hf_name(hf_model_name: str, output_path: str): hf_config = AutoConfig.from_pretrained(hf_model_name, trust_remote_code=True) if hf_config.model_type == "llama": from nemo.tron.converter.llama import HFLlamaImporter print(f"Importing model {hf_model_name} to {output_path}...") importer = HFLlamaImporter( hf_model_name, output_path=output_path, ) elif hf_config.model_type == "qwen2": from nemo.tron.converter.qwen import HFQwen2Importer print(f"Importing model {hf_model_name} to {output_path}...") importer = HFQwen2Importer( hf_model_name, output_path=output_path, ) elif hf_config.model_type in ("qwen3", "qwen3_moe"): from nemo.tron.converter.qwen import HFQwen3Importer print(f"Importing model {hf_model_name} to {output_path}...") importer = HFQwen3Importer( hf_model_name, output_path=output_path, ) elif hf_config.model_type in ("deepseek_v2", "deepseek_v3"): from nemo.tron.converter.deepseek import HFDeepSeekImporter print(f"Importing model {hf_model_name} to {output_path}...") importer = HFDeepSeekImporter( hf_model_name, output_path=output_path, ) else: raise ValueError( f"Unknown model type: {hf_config.model_type}. Currently, DeepSeek, Qwen and Llama are supported. " "If you'd like to run with a different model, please raise an issue or consider adding your own converter." ) importer.apply() # resetting mcore state import megatron.core.rerun_state_machine megatron.core.rerun_state_machine.destroy_rerun_state_machine()
[docs] def export_model_from_megatron( hf_model_name: str, input_path: str, output_path: str, hf_tokenizer_path: str, overwrite: bool = False, ): if os.path.exists(output_path) and not overwrite: raise FileExistsError( f"HF checkpoint already exists at {output_path}. Delete it to run or set overwrite=True." ) hf_config = AutoConfig.from_pretrained(hf_model_name, trust_remote_code=True) if hf_config.model_type == "llama": from nemo.tron.converter.llama import HFLlamaExporter exporter_cls = HFLlamaExporter elif hf_config.model_type == "qwen2": from nemo.tron.converter.qwen import HFQwen2Exporter exporter_cls = HFQwen2Exporter else: raise ValueError( f"Unknown model: {hf_model_name}. Currently, only Qwen2 and Llama are supported. " "If you'd like to run with a different model, please raise an issue or consider adding your own converter." ) print(f"Exporting model {hf_model_name} to {output_path}...") exporter = exporter_cls( input_path=input_path, output_path=output_path, hf_tokenizer_path=hf_tokenizer_path, ) exporter.apply() # resetting mcore state import megatron.core.rerun_state_machine megatron.core.rerun_state_machine.destroy_rerun_state_machine()