nat.plugins.weave.weave_eval_callback#

Attributes#

Classes#

WeaveEvaluationCallback

Eval callback that publishes per-item metrics and summary to Weave.

Module Contents#

logger#
class WeaveEvaluationCallback(*, project: str)#

Eval callback that publishes per-item metrics and summary to Weave.

project#
client = None#
eval_logger = None#
pred_loggers: dict[Any, Any]#
eval_call = None#
evaluation_logger_cls = None#
weave_client_context = None#
set_call_stack = None#
_is_available() bool#
_initialize_client() bool#
static _prediction_inputs(
item: nat.data_models.evaluator.EvalInputItem,
) dict[str, Any]#
static _weave_dataset(
eval_input: nat.data_models.evaluator.EvalInput,
) list[dict[str, Any]]#
on_eval_started(
*,
workflow_alias: str,
eval_input: nat.data_models.evaluator.EvalInput,
config: Any,
job_id: str | None = None,
) None#
evaluation_context()#
on_prediction(
*,
item: nat.data_models.evaluator.EvalInputItem,
output: Any,
) None#
async a_on_usage_stats(
*,
item: nat.data_models.evaluator.EvalInputItem,
usage_stats_item: nat.data_models.evaluate_runtime.UsageStatsItem,
) None#
async a_on_evaluator_score(
*,
eval_output: nat.plugins.eval.data_models.evaluator_io.EvalOutput,
evaluator_name: str,
) None#
async a_on_export_flush() None#
static _profiler_metrics(
profiler_results: nat.data_models.evaluate_runtime.ProfilerResults,
usage_stats: nat.data_models.evaluate_runtime.UsageStats,
) dict[str, Any]#
on_eval_summary(
*,
usage_stats: nat.data_models.evaluate_runtime.UsageStats,
evaluation_results: list[tuple[str, nat.plugins.eval.data_models.evaluator_io.EvalOutput]],
profiler_results: nat.data_models.evaluate_runtime.ProfilerResults,
) None#