tooltalk#
This page contains all evaluation tasks for the tooltalk harness.
Task |
Description |
|---|---|
ToolTalk task with default settings. |
tooltalk#
ToolTalk task with default settings.
Harness: tooltalk
Container:
nvcr.io/nvidia/eval-factory/tooltalk:26.01
Container Digest:
sha256:2c032e8274fd3a825b3c2774d33d0caddfa198fe24980dd99b8e3ae77c8aadee
Container Arch: multiarch
Task Type: tooltalk
{% if target.api_endpoint.api_key_name is not none %}API_KEY=${{target.api_endpoint.api_key_name}}{% endif %} python -m tooltalk.evaluation.evaluate_{{'openai' if 'azure' in target.api_endpoint.url or 'api.openai' in target.api_endpoint.url else 'nim'}} --dataset data/easy --database data/databases --model {{target.api_endpoint.model_id}} {% if config.params.max_new_tokens is not none %}--max_new_tokens {{config.params.max_new_tokens}}{% endif %} {% if config.params.temperature is not none %}--temperature {{config.params.temperature}}{% endif %} {% if config.params.top_p is not none %}--top_p {{config.params.top_p}}{% endif %} --api_mode all --output_dir {{config.output_dir}} --url {{target.api_endpoint.url}} {% if config.params.limit_samples is not none %}--first_n {{config.params.limit_samples}}{% endif %}
framework_name: tooltalk
pkg_name: tooltalk
config:
params:
extra: {}
supported_endpoint_types:
- chat
type: tooltalk
target:
api_endpoint: {}