# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""BEIR Evaluator — Designer component for running BEIR evaluation against LanceDB.
Reuses the existing evaluation logic from ``nemo_retriever.recall.beir`` and
prints the standard run summary via
``nemo_retriever.utils.detection_summary.print_run_summary``.
"""
from __future__ import annotations
import logging
import time
from pathlib import Path
from typing import Annotated, Any, Optional
from nemo_retriever.graph.designer import Param, designer_component
logger = logging.getLogger(__name__)
[docs]
@designer_component(
name="BEIR Evaluator",
category="Evaluation",
compute="cpu",
description="Runs BEIR evaluation against a LanceDB table and prints the standard run summary",
category_color="#42d6a4",
component_type="pipeline_evaluator",
)
class BEIREvaluatorActor:
"""Designer BEIR evaluation node against an existing LanceDB table.
Assumes vectors were already written (for example via
:class:`~nemo_retriever.vdb.operators.IngestVdbOperator` or the ``retriever
pipeline`` upload path). After evaluation, calls ``print_run_summary`` like
the batch pipeline.
"""
def __init__(
self,
lancedb_uri: Annotated[str, Param(label="LanceDB URI", placeholder="/path/to/lancedb")] = "lancedb",
lancedb_table: Annotated[str, Param(label="Table Name")] = "nv-ingest",
embedding_model: Annotated[str, Param(label="Embedding Model")] = "nvidia/llama-nemotron-embed-1b-v2",
beir_loader: Annotated[str, Param(label="BEIR Loader", choices=["vidore_hf"])] = "vidore_hf",
beir_dataset_name: Annotated[
str, Param(label="BEIR Dataset Name", placeholder="e.g. vidore_v3_computer_science")
] = "",
beir_split: Annotated[str, Param(label="BEIR Split")] = "test",
beir_query_language: Annotated[str, Param(label="Query Language", placeholder="Optional (e.g. en, fr)")] = "",
beir_doc_id_field: Annotated[
str,
Param(label="Doc ID Field", choices=["pdf_basename", "pdf_page", "source_id", "path"]),
] = "pdf_basename",
beir_ks: Annotated[str, Param(label="K Values", placeholder="1,3,5,10")] = "1,3,5,10",
hybrid: Annotated[bool, Param(label="Hybrid Search")] = False,
) -> None:
self.lancedb_uri = str(Path(lancedb_uri).expanduser().resolve())
self.lancedb_table = lancedb_table
self.embedding_model = embedding_model
self.beir_loader = beir_loader
self.beir_dataset_name = beir_dataset_name
self.beir_split = beir_split
self.beir_query_language = beir_query_language or None
self.beir_doc_id_field = beir_doc_id_field
self.hybrid = hybrid
self._ks: tuple[int, ...] = (
tuple(int(k) for k in beir_ks.split(",") if k.strip()) if isinstance(beir_ks, str) else tuple(beir_ks)
)
if not self._ks:
self._ks = (1, 3, 5, 10)
[docs]
def evaluate(self) -> dict[str, Any]:
"""Run the configured evaluation and print the standard run summary.
Returns the ``summary_dict`` produced by ``print_run_summary``.
"""
from nemo_retriever.model import resolve_embed_model
from nemo_retriever.recall.beir import BeirConfig, evaluate_lancedb_beir
from nemo_retriever.utils.detection_summary import print_run_summary
resolved_model = resolve_embed_model(self.embedding_model)
evaluation_label = "BEIR"
evaluation_total_time = 0.0
evaluation_metrics: dict[str, float] = {}
evaluation_query_count: Optional[int] = None
beir_cfg = BeirConfig(
lancedb_uri=self.lancedb_uri,
lancedb_table=self.lancedb_table,
embedding_model=resolved_model,
loader=self.beir_loader,
dataset_name=self.beir_dataset_name,
split=self.beir_split,
query_language=self.beir_query_language,
doc_id_field=self.beir_doc_id_field,
ks=self._ks,
hybrid=self.hybrid,
)
eval_start = time.perf_counter()
beir_dataset, _raw_hits, _run, evaluation_metrics = evaluate_lancedb_beir(beir_cfg)
evaluation_total_time = time.perf_counter() - eval_start
evaluation_query_count = len(beir_dataset.query_ids)
summary_dict = print_run_summary(
processed_pages=-1,
input_path=Path(self.lancedb_uri),
vdb_op="lancedb",
vdb_kwargs={"uri": self.lancedb_uri, "table_name": self.lancedb_table, "hybrid": self.hybrid},
total_time=-1,
ingest_only_total_time=-1,
ray_dataset_download_total_time=-1,
vdb_upload_total_time=-1,
evaluation_total_time=evaluation_total_time,
evaluation_metrics=evaluation_metrics,
recall_total_time=0.0,
recall_metrics={},
evaluation_label=evaluation_label,
evaluation_count=evaluation_query_count,
)
return summary_dict