Source code for nemo_rl.models.megatron.community_import
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from transformers import AutoConfig
[docs]
def import_model_from_hf_name(hf_model_name: str, output_path: str):
hf_config = AutoConfig.from_pretrained(hf_model_name, trust_remote_code=True)
if hf_config.model_type == "llama":
from nemo.tron.converter.llama import HFLlamaImporter
print(f"Importing model {hf_model_name} to {output_path}...")
importer = HFLlamaImporter(
hf_model_name,
output_path=output_path,
)
elif hf_config.model_type == "qwen2":
from nemo.tron.converter.qwen import HFQwen2Importer
print(f"Importing model {hf_model_name} to {output_path}...")
importer = HFQwen2Importer(
hf_model_name,
output_path=output_path,
)
elif hf_config.model_type in ("qwen3", "qwen3_moe"):
from nemo.tron.converter.qwen import HFQwen3Importer
print(f"Importing model {hf_model_name} to {output_path}...")
importer = HFQwen3Importer(
hf_model_name,
output_path=output_path,
)
elif hf_config.model_type in ("deepseek_v2", "deepseek_v3"):
from nemo.tron.converter.deepseek import HFDeepSeekImporter
print(f"Importing model {hf_model_name} to {output_path}...")
importer = HFDeepSeekImporter(
hf_model_name,
output_path=output_path,
)
else:
raise ValueError(
f"Unknown model type: {hf_config.model_type}. Currently, DeepSeek, Qwen and Llama are supported. "
"If you'd like to run with a different model, please raise an issue or consider adding your own converter."
)
importer.apply()
# resetting mcore state
import megatron.core.rerun_state_machine
megatron.core.rerun_state_machine.destroy_rerun_state_machine()
[docs]
def export_model_from_megatron(
hf_model_name: str,
input_path: str,
output_path: str,
hf_tokenizer_path: str,
overwrite: bool = False,
):
if os.path.exists(output_path) and not overwrite:
raise FileExistsError(
f"HF checkpoint already exists at {output_path}. Delete it to run or set overwrite=True."
)
hf_config = AutoConfig.from_pretrained(hf_model_name, trust_remote_code=True)
if hf_config.model_type == "llama":
from nemo.tron.converter.llama import HFLlamaExporter
exporter_cls = HFLlamaExporter
elif hf_config.model_type == "qwen2":
from nemo.tron.converter.qwen import HFQwen2Exporter
exporter_cls = HFQwen2Exporter
else:
raise ValueError(
f"Unknown model: {hf_model_name}. Currently, only Qwen2 and Llama are supported. "
"If you'd like to run with a different model, please raise an issue or consider adding your own converter."
)
print(f"Exporting model {hf_model_name} to {output_path}...")
exporter = exporter_cls(
input_path=input_path,
output_path=output_path,
hf_tokenizer_path=hf_tokenizer_path,
)
exporter.apply()
# resetting mcore state
import megatron.core.rerun_state_machine
megatron.core.rerun_state_machine.destroy_rerun_state_machine()