nat.eval.utils.weave_eval#

Attributes#

logger

Classes#

WeaveEvaluationIntegration

Class to handle all Weave integration functionality.

Module Contents#

logger#

class WeaveEvaluationIntegration( eval_trace_context: nat.eval.utils.eval_trace_ctx.EvalTraceContext, )#

Class to handle all Weave integration functionality.

available = False#

client = None#

eval_logger = None#

pred_loggers#

eval_trace_context#

initialize_client()#: Initialize the Weave client if available.

_get_prediction_inputs( item: nat.eval.evaluator.evaluator_model.EvalInputItem, )#

Get the inputs for displaying in the UI. The following fields are excluded as they are too large to display in the UI: - full_dataset_entry - expected_trajectory - trajectory

output_obj is excluded because it is displayed separately.

_get_weave_dataset( eval_input: nat.eval.evaluator.evaluator_model.EvalInput, )#: Get the full dataset for Weave.

initialize_logger( workflow_alias: str, eval_input: nat.eval.evaluator.evaluator_model.EvalInput, config: Any, )#: Initialize the Weave evaluation logger.

log_prediction( item: nat.eval.evaluator.evaluator_model.EvalInputItem, output: Any, )#: Log a prediction to Weave.

async log_usage_stats( item: nat.eval.evaluator.evaluator_model.EvalInputItem, usage_stats_item: nat.eval.usage_stats.UsageStatsItem, )#: Log usage stats to Weave.

async alog_score( eval_output: nat.eval.evaluator.evaluator_model.EvalOutput, evaluator_name: str, )#: Log scores for evaluation outputs.

async afinish_loggers()#: Finish all prediction loggers and wait for exports.

_log_profiler_metrics( profiler_results: nat.profiler.data_models.ProfilerResults, usage_stats: nat.eval.usage_stats.UsageStats, ) → dict[str, Any]#: Log profiler metrics to Weave.

log_summary( usage_stats: nat.eval.usage_stats.UsageStats, evaluation_results: list[tuple[str, nat.eval.evaluator.evaluator_model.EvalOutput]], profiler_results: nat.profiler.data_models.ProfilerResults, )#: Log summary statistics to Weave.