nat.eval.utils.weave_eval#

Attributes#

Classes#

WeaveEvaluationIntegration

Class to handle all Weave integration functionality.

Module Contents#

logger#
class WeaveEvaluationIntegration#

Class to handle all Weave integration functionality.

available = False#
client = None#
eval_logger = None#
pred_loggers#
initialize_client()#

Initialize the Weave client if available.

_get_prediction_inputs(
item: nat.eval.evaluator.evaluator_model.EvalInputItem,
)#

Get the inputs for displaying in the UI. The following fields are excluded as they are too large to display in the UI: - full_dataset_entry - expected_trajectory - trajectory

output_obj is excluded because it is displayed separately.

_get_weave_dataset(
eval_input: nat.eval.evaluator.evaluator_model.EvalInput,
)#

Get the full dataset for Weave.

initialize_logger(
workflow_alias: str,
eval_input: nat.eval.evaluator.evaluator_model.EvalInput,
config: Any,
)#

Initialize the Weave evaluation logger.

log_prediction(
item: nat.eval.evaluator.evaluator_model.EvalInputItem,
output: Any,
)#

Log a prediction to Weave.

async log_usage_stats(
item: nat.eval.evaluator.evaluator_model.EvalInputItem,
usage_stats_item: nat.eval.usage_stats.UsageStatsItem,
)#

Log usage stats to Weave.

async alog_score(
eval_output: nat.eval.evaluator.evaluator_model.EvalOutput,
evaluator_name: str,
)#

Log scores for evaluation outputs.

async afinish_loggers()#

Finish all prediction loggers.

_log_profiler_metrics(
profiler_results: nat.profiler.data_models.ProfilerResults,
usage_stats: nat.eval.usage_stats.UsageStats,
) dict[str, Any]#

Log profiler metrics to Weave.

log_summary(
usage_stats: nat.eval.usage_stats.UsageStats,
evaluation_results: list[tuple[str, nat.eval.evaluator.evaluator_model.EvalOutput]],
profiler_results: nat.profiler.data_models.ProfilerResults,
)#

Log summary statistics to Weave.