target_info | service_version, service_name, commit |
stargate_client_requests_inflight | model |
stargate_client_requests_state | model, state |
stargate_client_requests_state_input_tokens | model, state |
stargate_client_requests_total | model, routing_key, status |
stargate_client_request_time_to_response_headers_seconds | model, routing_key |
stargate_client_request_time_to_first_output_seconds | model, routing_key |
stargate_client_request_time_to_first_token_seconds | model, routing_key |
stargate_client_request_duration_seconds | model, routing_key, status |
stargate_client_request_input_tokens_total | model, routing_key, status |
stargate_client_request_output_tokens_total | model, routing_key, status |
stargate_client_request_input_tokens | model, routing_key, status |
stargate_client_request_output_tokens | model, routing_key, status |
stargate_client_registration_stream_connected | router |
stargate_client_reverse_tunnel_connected | router |
stargate_client_model_input_tps | model |
stargate_client_model_output_tps | model |
stargate_client_model_max_input_tps | model |
stargate_client_model_max_output_tps | model |
stargate_client_model_queue_size | model |
stargate_client_model_queued_input_tokens | model |
stargate_client_model_kv_cache_capacity_tokens | model |
stargate_client_model_kv_cache_used_tokens | model |
stargate_client_model_kv_cache_free_tokens | model |
stargate_client_model_advertised_status | router, model, status |
stargate_client_retryable_responses_total | inference_server_id, reason, status |
stargate_client_nonretryable_failures_total | inference_server_id, reason |