mmath#
This page contains all evaluation tasks for the mmath harness.
Task |
Description |
|---|---|
Arabic mmath |
|
English mmath |
|
Spanish mmath |
|
French mmath |
|
Japanese mmath |
|
Korean mmath |
|
Portuguese mmath |
|
Thai mmath |
|
Vietnamese mmath |
|
Chinese mmath |
mmath_ar#
Arabic mmath
Harness: mmath
Container:
nvcr.io/nvidia/eval-factory/mmath:26.01
Container Digest:
sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc
Container Arch: multiarch
Task Type: mmath_ar
{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}} && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}} --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
params:
max_new_tokens: 32768
max_retries: 5
parallelism: 8
temperature: 0.6
request_timeout: 3600
top_p: 0.95
extra:
language: ar
n_samples: 4
supported_endpoint_types:
- chat
type: mmath_ar
target:
api_endpoint:
stream: false
mmath_en#
English mmath
Harness: mmath
Container:
nvcr.io/nvidia/eval-factory/mmath:26.01
Container Digest:
sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc
Container Arch: multiarch
Task Type: mmath_en
{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}} && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}} --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
params:
max_new_tokens: 32768
max_retries: 5
parallelism: 8
temperature: 0.6
request_timeout: 3600
top_p: 0.95
extra:
language: en
n_samples: 4
supported_endpoint_types:
- chat
type: mmath_en
target:
api_endpoint:
stream: false
mmath_es#
Spanish mmath
Harness: mmath
Container:
nvcr.io/nvidia/eval-factory/mmath:26.01
Container Digest:
sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc
Container Arch: multiarch
Task Type: mmath_es
{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}} && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}} --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
params:
max_new_tokens: 32768
max_retries: 5
parallelism: 8
temperature: 0.6
request_timeout: 3600
top_p: 0.95
extra:
language: es
n_samples: 4
supported_endpoint_types:
- chat
type: mmath_es
target:
api_endpoint:
stream: false
mmath_fr#
French mmath
Harness: mmath
Container:
nvcr.io/nvidia/eval-factory/mmath:26.01
Container Digest:
sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc
Container Arch: multiarch
Task Type: mmath_fr
{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}} && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}} --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
params:
max_new_tokens: 32768
max_retries: 5
parallelism: 8
temperature: 0.6
request_timeout: 3600
top_p: 0.95
extra:
language: fr
n_samples: 4
supported_endpoint_types:
- chat
type: mmath_fr
target:
api_endpoint:
stream: false
mmath_ja#
Japanese mmath
Harness: mmath
Container:
nvcr.io/nvidia/eval-factory/mmath:26.01
Container Digest:
sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc
Container Arch: multiarch
Task Type: mmath_ja
{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}} && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}} --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
params:
max_new_tokens: 32768
max_retries: 5
parallelism: 8
temperature: 0.6
request_timeout: 3600
top_p: 0.95
extra:
language: en
n_samples: 4
supported_endpoint_types:
- chat
type: mmath_ja
target:
api_endpoint:
stream: false
mmath_ko#
Korean mmath
Harness: mmath
Container:
nvcr.io/nvidia/eval-factory/mmath:26.01
Container Digest:
sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc
Container Arch: multiarch
Task Type: mmath_ko
{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}} && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}} --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
params:
max_new_tokens: 32768
max_retries: 5
parallelism: 8
temperature: 0.6
request_timeout: 3600
top_p: 0.95
extra:
language: ko
n_samples: 4
supported_endpoint_types:
- chat
type: mmath_ko
target:
api_endpoint:
stream: false
mmath_pt#
Portuguese mmath
Harness: mmath
Container:
nvcr.io/nvidia/eval-factory/mmath:26.01
Container Digest:
sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc
Container Arch: multiarch
Task Type: mmath_pt
{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}} && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}} --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
params:
max_new_tokens: 32768
max_retries: 5
parallelism: 8
temperature: 0.6
request_timeout: 3600
top_p: 0.95
extra:
language: pt
n_samples: 4
supported_endpoint_types:
- chat
type: mmath_pt
target:
api_endpoint:
stream: false
mmath_th#
Thai mmath
Harness: mmath
Container:
nvcr.io/nvidia/eval-factory/mmath:26.01
Container Digest:
sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc
Container Arch: multiarch
Task Type: mmath_th
{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}} && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}} --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
params:
max_new_tokens: 32768
max_retries: 5
parallelism: 8
temperature: 0.6
request_timeout: 3600
top_p: 0.95
extra:
language: th
n_samples: 4
supported_endpoint_types:
- chat
type: mmath_th
target:
api_endpoint:
stream: false
mmath_vi#
Vietnamese mmath
Harness: mmath
Container:
nvcr.io/nvidia/eval-factory/mmath:26.01
Container Digest:
sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc
Container Arch: multiarch
Task Type: mmath_vi
{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}} && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}} --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
params:
max_new_tokens: 32768
max_retries: 5
parallelism: 8
temperature: 0.6
request_timeout: 3600
top_p: 0.95
extra:
language: vi
n_samples: 4
supported_endpoint_types:
- chat
type: mmath_vi
target:
api_endpoint:
stream: false
mmath_zh#
Chinese mmath
Harness: mmath
Container:
nvcr.io/nvidia/eval-factory/mmath:26.01
Container Digest:
sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc
Container Arch: multiarch
Task Type: mmath_zh
{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}} && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}} --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
params:
max_new_tokens: 32768
max_retries: 5
parallelism: 8
temperature: 0.6
request_timeout: 3600
top_p: 0.95
extra:
language: zh
n_samples: 4
supported_endpoint_types:
- chat
type: mmath_zh
target:
api_endpoint:
stream: false