mteb#
This page contains all evaluation tasks for the mteb harness.
Task |
Description |
|---|---|
MMTEB |
|
MTEB |
|
MTEB_NL_RETRIEVAL |
|
MTEB Visual Document Retrieval benchmark |
|
RTEB |
|
ViDoReV1 |
|
ViDoReV2 |
|
ViDoReV3 |
|
ViDoReV3 Text (text_image markdown only) |
|
ViDoReV3 Text+Image (text_image markdown + images) |
|
Custom BEIR-formatted text retrieval benchmark |
|
Financial Opinion Mining and Question Answering |
|
HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong supervision for supporting facts to enable more explainable question answering systems. |
|
MIRACL (Multilingual Information Retrieval Across a Continuum of Languages) is a multilingual retrieval dataset that focuses on search across 18 different languages. |
|
MIRACL (Multilingual Information Retrieval Across a Continuum of Languages) is a multilingual retrieval dataset that focuses on search across 18 different languages. |
|
MLDR |
|
MLQA (MultiLingual Question Answering) is a benchmark dataset for evaluating cross-lingual question answering performance. |
|
NanoFiQA2018 is a smaller subset of the Financial Opinion Mining and Question Answering dataset. |
|
Natural Questions (NQ) contains real user questions issued to Google search, and answers found from Wikipedia by annotators. NQ is designed for the training and evaluation of automatic question answering systems. |
MMTEB#
MMTEB
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: MMTEB
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: MTEB(Multilingual, v2)
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: MMTEB
target:
api_endpoint: {}
MTEB#
MTEB
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: MTEB
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: MTEB(eng, v2)
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: MTEB
target:
api_endpoint: {}
MTEB_NL_RETRIEVAL#
MTEB_NL_RETRIEVAL
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: MTEB_NL_RETRIEVAL
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: MTEB(nld, v1, retrieval)
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: MTEB_NL_RETRIEVAL
target:
api_endpoint: {}
MTEB_VDR#
MTEB Visual Document Retrieval benchmark
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: MTEB_VDR
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: VisualDocumentRetrieval
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: MTEB_VDR
target:
api_endpoint: {}
RTEB#
RTEB
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: RTEB
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: RTEB(beta)
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: RTEB
target:
api_endpoint: {}
ViDoReV1#
ViDoReV1
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: ViDoReV1
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: ViDoRe(v1)
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: ViDoReV1
target:
api_endpoint: {}
ViDoReV2#
ViDoReV2
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: ViDoReV2
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: ViDoRe(v2)
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: ViDoReV2
target:
api_endpoint: {}
ViDoReV3#
ViDoReV3
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: ViDoReV3
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: ViDoRe(v3)
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: ViDoReV3
target:
api_endpoint: {}
ViDoReV3_Text#
ViDoReV3 Text (text_image markdown only)
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: ViDoReV3_Text
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: ViDoRe(v3, Text)
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: ViDoReV3_Text
target:
api_endpoint: {}
ViDoReV3_Text_Image#
ViDoReV3 Text+Image (text_image markdown + images)
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: ViDoReV3_Text_Image
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: ViDoRe(v3, Text+Image)
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: ViDoReV3_Text_Image
target:
api_endpoint: {}
custom_beir_task#
Custom BEIR-formatted text retrieval benchmark
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: custom_beir_task
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: custom_beir_task
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: custom_beir_task
target:
api_endpoint: {}
fiqa#
Financial Opinion Mining and Question Answering
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: fiqa
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: FiQA2018
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: fiqa
target:
api_endpoint: {}
hotpotqa#
HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong supervision for supporting facts to enable more explainable question answering systems.
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: hotpotqa
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: HotpotQA
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: hotpotqa
target:
api_endpoint: {}
miracl#
MIRACL (Multilingual Information Retrieval Across a Continuum of Languages) is a multilingual retrieval dataset that focuses on search across 18 different languages.
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: miracl
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: MIRACLRetrieval
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: miracl
target:
api_endpoint: {}
miracl_lite#
MIRACL (Multilingual Information Retrieval Across a Continuum of Languages) is a multilingual retrieval dataset that focuses on search across 18 different languages.
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: miracl_lite
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: MIRACLRetrieval
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: true
language: null
supported_endpoint_types:
- embedding
type: miracl_lite
target:
api_endpoint: {}
mldr#
MLDR
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: mldr
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: MultiLongDocRetrieval
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: mldr
target:
api_endpoint: {}
mlqa#
MLQA (MultiLingual Question Answering) is a benchmark dataset for evaluating cross-lingual question answering performance.
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: mlqa
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: MLQARetrieval
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: mlqa
target:
api_endpoint: {}
nano_fiqa#
NanoFiQA2018 is a smaller subset of the Financial Opinion Mining and Question Answering dataset.
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: nano_fiqa
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: NanoFiQA2018Retrieval
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: train
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: nano_fiqa
target:
api_endpoint: {}
nq#
Natural Questions (NQ) contains real user questions issued to Google search, and answers found from Wikipedia by annotators. NQ is designed for the training and evaluation of automatic question answering systems.
Harness: mteb
Container:
nvcr.io/nvidia/eval-factory/mteb:26.01
Container Digest:
sha256:fb0ea5360bec880d4ecbfc63015d775dc3d22601e5ab17d760a992402646cbbb
Container Arch: multiarch
Task Type: nq
{% if target.api_endpoint.api_key_name is not none %}export API_TOKEN=${{target.api_endpoint.api_key_name}} &&{% endif %} {% if config.params.extra.dataset_path is not none %} export MTEB_INTERNAL_DATASET_PATH={{config.params.extra.dataset_path}} &&{% endif %} {% if config.params.extra.ranker.api_key is not none %}export RANKER_API_TOKEN=${{config.params.extra.ranker.api_key}} &&{% endif %} mteb --encoder_name {{target.api_endpoint.model_id}} --encoder_url {{target.api_endpoint.url}} --task "{{config.params.task}}" --workdir {{config.output_dir}} --batch_size {{config.params.extra.batch_size}} --async_limit {{config.params.parallelism}} --max_retries {{config.params.max_retries}} --request_timeout {{config.params.request_timeout}} {% if config.params.extra.cache_path is not none %} --cache_path {{config.params.extra.cache_path}}{% endif %} {% if config.params.extra.args is not none %} {{config.params.extra.args}} {% endif %} {% if config.params.extra.language is not none %} --langs {{config.params.extra.language}} {% endif %} {% if config.params.extra.query_prompt_template is not none %} --query_prompt_template "{{config.params.extra.query_prompt_template}}"{% endif %} {% if config.params.extra.document_prompt_template is not none %} --document_prompt_template "{{config.params.extra.document_prompt_template}}"{% endif %} {% if config.params.extra.ranker.model_id is not none %} --ranker_name {{config.params.extra.ranker.model_id}} --ranker_url {{config.params.extra.ranker.url}} --ranker_endpoint_type {{config.params.extra.ranker.endpoint_type}}{% endif %} --truncate {{config.params.extra.truncate}} --top_k {{config.params.extra.top_k}} {% if config.params.extra.version_lite is not none%} --version_lite {{config.params.extra.version_lite}} {% endif %} {% if config.params.extra.eval_split is not none %} --eval_split {{config.params.extra.eval_split}} {% endif %}
framework_name: mteb
pkg_name: mteb
config:
params:
max_retries: 10
parallelism: 20
task: NQ
request_timeout: 300
extra:
query_prompt_template: null
document_prompt_template: null
ranker:
model_id: null
url: null
api_key: null
endpoint_type: nim
top_k: 40
truncate: END
batch_size: 128
eval_split: test
dataset_path: null
cache_path: null
args: null
version_lite: null
language: null
supported_endpoint_types:
- embedding
type: nq
target:
api_endpoint: {}