Source code for nemo_retriever.utils.hf_model_registry

# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""
Central registry of pinned HuggingFace model revisions.

Every ``from_pretrained`` call in the codebase should pass
``revision=get_hf_revision(model_id)`` and direct ``hf_hub_download`` calls
should use ``hf_hub_download_with_pinned_revision`` so that we always
download an exact, immutable snapshot rather than tracking the mutable
``main`` branch.

To bump a model version, update the corresponding SHA in
``HF_MODEL_REVISIONS`` and re-test.
"""

from __future__ import annotations

import logging
import os
from typing import Any

logger = logging.getLogger(__name__)

HF_MODEL_REVISIONS: dict[str, str] = {
    "nvidia/llama-3.2-nv-embedqa-1b-v2": "cefc2394cc541737b7867df197984cf23f05367f",
    "nvidia/llama-nemotron-embed-1b-v2": "b4caa8456edd360b3b4e938d94ed4398dd437fad",
    "nvidia/parakeet-ctc-1.1b": "a707e818195cb97c8f7da2fc36b221a29f69a5db",
    "nvidia/NVIDIA-Nemotron-Parse-v1.2": "f42c8040b12ee64370922d108778ab655b722c5d",
    "nvidia/llama-nemotron-embed-vl-1b-v2": "4ef1bfa6da3a909de6bd00611950b7ed99203117",
    "meta-llama/Llama-3.2-1B": "4e20de362430cd3b72f300e6b0f18e50e7166e08",
    "intfloat/e5-large-unsupervised": "15af9288f69a6291f37bfb89b47e71abc747b206",
    "nvidia/llama-nemotron-rerank-1b-v2": "8fd3e5d962d44cfe65d4ba0784eebed44cf136b0",
    "nvidia/llama-nemotron-rerank-vl-1b-v2": "edc083f4b3a433d65287cbca916759c9f88fa887",
    "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16": "5d250e2e111dc5e1434131bdf3d590c27a878ade",
    "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-FP8": "7394488badb786e1decc0e00e308de1cab9560e6",
    "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-NVFP4-QAD": "b8d3c170d9ee3a078917ef9bfd508eff988d6de7",
    "nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-BF16": "24e67ea000b7c2837fc8f9488aa2008524fac8ba",
    "nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-FP8": "6647b845a4b786c6e2c7adb1b6a909e1aa71fac2",
    "nvidia/Nemotron-3-Nano-Omni-30B-A3B-Reasoning-NVFP4": "dc5f0b0bfddf8b6e0f5891475be9af05b80126fe",
    "nvidia/nemotron-ocr-v1": "8657d08d3279f4864002d5fd3fdcd47ad8c96bcb",
    "nvidia/nemotron-ocr-v2": "86cacb0467fa4f7ce54342fdb250825e0d928ae7",
    "nvidia/nemotron-page-elements-v3": "df62dbb631502575ac4d43b44d700b1674ab1d56",
    "nvidia/nemotron-table-structure-v1": "9350162faa1110320af62699105780b0c87b73ad",
    "nvidia/nemotron-graphic-elements-v1": "4a76546bb1bb4cbab3401361c91cf01706321805",
}


[docs] def get_hf_revision(model_id: str, *, strict: bool = True) -> str | None: """Return the pinned commit SHA for *model_id*. Parameters ---------- model_id: HuggingFace model identifier (e.g. ``"nvidia/parakeet-ctc-1.1b"``). strict: When ``True`` (the default), raise ``ValueError`` if *model_id* has no pinned revision. When ``False``, log a warning and return ``None`` so that ``from_pretrained`` falls back to the ``main`` branch. """ revision = HF_MODEL_REVISIONS.get(model_id) if revision is not None: return revision msg = ( f"No pinned HuggingFace revision for model '{model_id}'. " "Add an entry to HF_MODEL_REVISIONS in hf_model_registry.py to pin it." ) if strict: raise ValueError(msg) logger.warning(msg + " Falling back to the default (main) branch.") return None
[docs] def hf_hub_download(*args: Any, **kwargs: Any) -> str: """Proxy to Hugging Face's downloader, imported lazily.""" from huggingface_hub import hf_hub_download as _hf_hub_download return _hf_hub_download(*args, **kwargs)
def _is_huggingface_hub_error(exc: Exception) -> bool: return exc.__class__.__module__.startswith("huggingface_hub.") def _hf_download_error_context(repo_id: Any, revision: Any, filename: Any) -> str: token_state = "set" if os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") else "unset" hf_hub_offline = os.environ.get("HF_HUB_OFFLINE", "0") return ( "Failed to resolve Hugging Face asset " f"repo_id={repo_id!r}, revision={revision!r}, filename={filename!r}. " f"HF_HUB_OFFLINE={hf_hub_offline}; HF token is {token_state}. " "For offline startup, pre-cache this exact revision and set HF_HUB_OFFLINE=1. " "For online startup, set HF_TOKEN/HUGGING_FACE_HUB_TOKEN if the repo requires auth or the cluster is hitting " "anonymous Hub rate limits." )
[docs] def hf_hub_download_with_pinned_revision(*args: Any, **kwargs: Any) -> str: """Call ``hf_hub_download`` with a registry revision when one is known. Parameters ---------- *args: Positional arguments forwarded to ``huggingface_hub.hf_hub_download``. When present, the first positional argument is treated as ``repo_id``. **kwargs: Keyword arguments forwarded to ``huggingface_hub.hf_hub_download``. If ``repo_id`` has a registered pin and ``revision`` is omitted, this helper adds the pinned revision before downloading. Returns ------- str The local path returned by ``huggingface_hub.hf_hub_download``. Raises ------ RuntimeError If Hugging Face Hub raises while resolving the asset; the original exception is chained with startup-focused context. """ repo_id = kwargs.get("repo_id") if repo_id is None and args: repo_id = args[0] if repo_id is not None and kwargs.get("revision") is None: revision = get_hf_revision(str(repo_id), strict=False) if revision is not None: kwargs["revision"] = revision try: return hf_hub_download(*args, **kwargs) except Exception as exc: if _is_huggingface_hub_error(exc): filename = kwargs.get("filename") if filename is None and len(args) > 1: filename = args[1] raise RuntimeError(_hf_download_error_context(repo_id, kwargs.get("revision"), filename)) from exc raise
[docs] def install_pinned_hf_hub_download(module: Any) -> None: """Patch an upstream module-level ``hf_hub_download`` to use registry pins. Parameters ---------- module: Imported upstream module object expected to expose a top-level ``hf_hub_download`` function. If the attribute is absent, the helper logs a warning and leaves the module unchanged. Returns ------- None The module is mutated in place when patching succeeds. """ if hasattr(module, "hf_hub_download"): # TODO: Move this pinning into the extraction package nightly build/publish # once those packages expose revision-aware downloads natively. module.hf_hub_download = hf_hub_download_with_pinned_revision else: logger.warning( "install_pinned_hf_hub_download: module %r has no 'hf_hub_download' attribute; " "revision pinning was NOT applied.", getattr(module, "__name__", module), )