# Choose a container name for bookkeeping
export NIM_MODEL_NAME=nvidia/llama-3.2-nv-rerankqa-1b-v2
export CONTAINER_NAME=$(basename $NIM_MODEL_NAME)

# Choose a NIM Image from NGC
export IMG_NAME="nvcr.io/nim/$NIM_MODEL_NAME:1.7.0"

# Choose a path on your system to cache the downloaded models
export LOCAL_NIM_CACHE=~/.cache/nim
mkdir -p "$LOCAL_NIM_CACHE"

docker run --network host -it --rm \
    --runtime=nvidia \
    --gpus '"device=0"' \
    --name=$CONTAINER_NAME \
    -e NGC_API_KEY=$NGC_API_KEY \
    -v "$LOCAL_NIM_CACHE:/opt/nim/.cache" \
    -u $(id -u) \
    -e NIM_ENABLE_OTEL=1 \
    -e NIM_OTEL_TRACES_EXPORTER=otlp \
    -e NIM_OTEL_METRICS_EXPORTER=otlp \
    -e NIM_OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4318" \
    -e NIM_OTEL_SERVICE_NAME=nim \
    -e TRITON_OTEL_URL="http://localhost:4318/v1/traces" \
    -e TRITON_OTEL_RATE=1 \
    -e TRITON_OTEL_COUNT=-1 \
    -e NIM_TRITON_EXTRA_ARGS="--allow-gpu-metrics=true --allow-cpu-metrics=true" \
    $IMG_NAME