mmath#

This page contains all evaluation tasks for the mmath harness.

Task

Description

mmath_ar

Arabic mmath

mmath_en

English mmath

mmath_es

Spanish mmath

mmath_fr

French mmath

mmath_ja

Japanese mmath

mmath_ko

Korean mmath

mmath_pt

Portuguese mmath

mmath_th

Thai mmath

mmath_vi

Vietnamese mmath

mmath_zh

Chinese mmath

mmath_ar#

Arabic mmath

Harness: mmath

Container:

nvcr.io/nvidia/eval-factory/mmath:26.01

Container Digest:

sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc

Container Arch: multiarch

Task Type: mmath_ar

{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}}  && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}}  --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
  params:
    max_new_tokens: 32768
    max_retries: 5
    parallelism: 8
    temperature: 0.6
    request_timeout: 3600
    top_p: 0.95
    extra:
      language: ar
      n_samples: 4
  supported_endpoint_types:
  - chat
  type: mmath_ar
target:
  api_endpoint:
    stream: false

mmath_en#

English mmath

Harness: mmath

Container:

nvcr.io/nvidia/eval-factory/mmath:26.01

Container Digest:

sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc

Container Arch: multiarch

Task Type: mmath_en

{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}}  && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}}  --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
  params:
    max_new_tokens: 32768
    max_retries: 5
    parallelism: 8
    temperature: 0.6
    request_timeout: 3600
    top_p: 0.95
    extra:
      language: en
      n_samples: 4
  supported_endpoint_types:
  - chat
  type: mmath_en
target:
  api_endpoint:
    stream: false

mmath_es#

Spanish mmath

Harness: mmath

Container:

nvcr.io/nvidia/eval-factory/mmath:26.01

Container Digest:

sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc

Container Arch: multiarch

Task Type: mmath_es

{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}}  && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}}  --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
  params:
    max_new_tokens: 32768
    max_retries: 5
    parallelism: 8
    temperature: 0.6
    request_timeout: 3600
    top_p: 0.95
    extra:
      language: es
      n_samples: 4
  supported_endpoint_types:
  - chat
  type: mmath_es
target:
  api_endpoint:
    stream: false

mmath_fr#

French mmath

Harness: mmath

Container:

nvcr.io/nvidia/eval-factory/mmath:26.01

Container Digest:

sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc

Container Arch: multiarch

Task Type: mmath_fr

{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}}  && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}}  --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
  params:
    max_new_tokens: 32768
    max_retries: 5
    parallelism: 8
    temperature: 0.6
    request_timeout: 3600
    top_p: 0.95
    extra:
      language: fr
      n_samples: 4
  supported_endpoint_types:
  - chat
  type: mmath_fr
target:
  api_endpoint:
    stream: false

mmath_ja#

Japanese mmath

Harness: mmath

Container:

nvcr.io/nvidia/eval-factory/mmath:26.01

Container Digest:

sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc

Container Arch: multiarch

Task Type: mmath_ja

{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}}  && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}}  --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
  params:
    max_new_tokens: 32768
    max_retries: 5
    parallelism: 8
    temperature: 0.6
    request_timeout: 3600
    top_p: 0.95
    extra:
      language: en
      n_samples: 4
  supported_endpoint_types:
  - chat
  type: mmath_ja
target:
  api_endpoint:
    stream: false

mmath_ko#

Korean mmath

Harness: mmath

Container:

nvcr.io/nvidia/eval-factory/mmath:26.01

Container Digest:

sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc

Container Arch: multiarch

Task Type: mmath_ko

{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}}  && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}}  --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
  params:
    max_new_tokens: 32768
    max_retries: 5
    parallelism: 8
    temperature: 0.6
    request_timeout: 3600
    top_p: 0.95
    extra:
      language: ko
      n_samples: 4
  supported_endpoint_types:
  - chat
  type: mmath_ko
target:
  api_endpoint:
    stream: false

mmath_pt#

Portuguese mmath

Harness: mmath

Container:

nvcr.io/nvidia/eval-factory/mmath:26.01

Container Digest:

sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc

Container Arch: multiarch

Task Type: mmath_pt

{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}}  && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}}  --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
  params:
    max_new_tokens: 32768
    max_retries: 5
    parallelism: 8
    temperature: 0.6
    request_timeout: 3600
    top_p: 0.95
    extra:
      language: pt
      n_samples: 4
  supported_endpoint_types:
  - chat
  type: mmath_pt
target:
  api_endpoint:
    stream: false

mmath_th#

Thai mmath

Harness: mmath

Container:

nvcr.io/nvidia/eval-factory/mmath:26.01

Container Digest:

sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc

Container Arch: multiarch

Task Type: mmath_th

{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}}  && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}}  --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
  params:
    max_new_tokens: 32768
    max_retries: 5
    parallelism: 8
    temperature: 0.6
    request_timeout: 3600
    top_p: 0.95
    extra:
      language: th
      n_samples: 4
  supported_endpoint_types:
  - chat
  type: mmath_th
target:
  api_endpoint:
    stream: false

mmath_vi#

Vietnamese mmath

Harness: mmath

Container:

nvcr.io/nvidia/eval-factory/mmath:26.01

Container Digest:

sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc

Container Arch: multiarch

Task Type: mmath_vi

{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}}  && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}}  --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
  params:
    max_new_tokens: 32768
    max_retries: 5
    parallelism: 8
    temperature: 0.6
    request_timeout: 3600
    top_p: 0.95
    extra:
      language: vi
      n_samples: 4
  supported_endpoint_types:
  - chat
  type: mmath_vi
target:
  api_endpoint:
    stream: false

mmath_zh#

Chinese mmath

Harness: mmath

Container:

nvcr.io/nvidia/eval-factory/mmath:26.01

Container Digest:

sha256:da033bf95efd05af58d2ab06feb2344dbca60678f3075a4bf7f53899901c5efc

Container Arch: multiarch

Task Type: mmath_zh

{% if target.api_endpoint.api_key_name is not none %}export OPENAI_API_KEY=${{target.api_endpoint.api_key_name}}  && {% endif %} mmath --model-url {{target.api_endpoint.url}} --model-name {{target.api_endpoint.model_id}}  --lang {{config.params.extra.language}} --output-dir {{config.output_dir}} --parallelism {{config.params.parallelism}} --retries {{config.params.max_retries}} --max-tokens {{config.params.max_new_tokens}} --temperature {{config.params.temperature}} --top-p {{config.params.top_p}} --request-timeout {{config.params.request_timeout}} --n-samples {{config.params.extra.n_samples}} {% if config.params.limit_samples is not none %} --limit {{config.params.limit_samples}}{% endif %}
framework_name: mmath
pkg_name: mmath
config:
  params:
    max_new_tokens: 32768
    max_retries: 5
    parallelism: 8
    temperature: 0.6
    request_timeout: 3600
    top_p: 0.95
    extra:
      language: zh
      n_samples: 4
  supported_endpoint_types:
  - chat
  type: mmath_zh
target:
  api_endpoint:
    stream: false