nat.plugins.langchain.eval.trajectory_evaluator#

Attributes#

Classes#

TrajectoryEvaluatorConfig

Agent trajectory evaluator configuration.

TrajectoryEvaluator

Base class for custom evaluators.

Functions#

_coerce_text(→ str)

Best-effort coercion to text for judge-chain inputs.

_extract_score_from_parser_error(→ float | None)

Best-effort extraction of numeric judge score from parser failures.

_to_agent_actions(...)

Convert intermediate steps to LangChain agent_trajectory tuples.

_message_to_text(→ str)

Convert ATIF message payloads into text for LangChain trajectory scoring.

_has_meaningful_value(→ bool)

Return whether a value is non-empty for trajectory scoring.

_dedupe_adjacent_actions(...)

Drop adjacent duplicate trajectory rows to reduce evaluator noise.

_atif_to_agent_actions(...)

Convert an ATIF trajectory into LangChain agent_trajectory tuples.

_atif_to_user_input(→ str)

Extract first user message from ATIF trajectory.

register_trajectory_evaluator(config, builder)

Module Contents#

logger#
_DEFAULT_EVENT_FILTER#
_coerce_text(value) str#

Best-effort coercion to text for judge-chain inputs.

_extract_score_from_parser_error(error_text: str) float | None#

Best-effort extraction of numeric judge score from parser failures.

class TrajectoryEvaluatorConfig#

Bases: nat.data_models.evaluator.EvaluatorLLMConfig

Agent trajectory evaluator configuration.

enable_atif_evaluator: bool = None#
_to_agent_actions(
intermediate_steps: list[nat.data_models.intermediate_step.IntermediateStep],
) list[tuple[langchain_core.agents.AgentAction, str]]#

Convert intermediate steps to LangChain agent_trajectory tuples.

_message_to_text(message) str#

Convert ATIF message payloads into text for LangChain trajectory scoring.

_has_meaningful_value(value) bool#

Return whether a value is non-empty for trajectory scoring.

_dedupe_adjacent_actions(
agent_actions: list[tuple[langchain_core.agents.AgentAction, str]],
) list[tuple[langchain_core.agents.AgentAction, str]]#

Drop adjacent duplicate trajectory rows to reduce evaluator noise.

_atif_to_agent_actions(
trajectory,
) list[tuple[langchain_core.agents.AgentAction, str]]#

Convert an ATIF trajectory into LangChain agent_trajectory tuples.

Action mapping is intentionally step-centric: - Emit at most one LLM action for each agent step when the step message is meaningful. - Emit one tool action for each structurally valid tool call in that step. - Skip structurally empty artifacts and adjacent duplicate rows to reduce evaluator noise.

_atif_to_user_input(trajectory) str#

Extract first user message from ATIF trajectory.

class TrajectoryEvaluator(
llm: langchain_core.language_models.BaseChatModel,
tools: list[langchain_core.tools.BaseTool] | None = None,
max_concurrency: int = 8,
)#

Bases: nat.plugins.eval.evaluator.base_evaluator.BaseEvaluator

Base class for custom evaluators.

Warning

Experimental Feature: The Evaluation API is experimental and may change in future releases. Future versions may introduce breaking changes without notice.

Each custom evaluator must implement the evaluate_item method which is used to evaluate a single EvalInputItem.

traj_eval_chain#
async _evaluate_with_trajectory(
item_id,
lane: str,
question: str,
generated_answer: str,
agent_trajectory: list[tuple[langchain_core.agents.AgentAction, str]],
) nat.plugins.eval.data_models.evaluator_io.EvalOutputItem#

Run trajectory scoring for one item regardless of input lane.

async evaluate_item(
item: nat.data_models.evaluator.EvalInputItem,
) nat.plugins.eval.data_models.evaluator_io.EvalOutputItem#

Each evaluator must implement this for item-level evaluation

async evaluate_atif_item(
sample: nat.plugins.eval.evaluator.atif_evaluator.AtifEvalSample,
) nat.plugins.eval.data_models.evaluator_io.EvalOutputItem#

Evaluate a single ATIF-native sample.

async evaluate_atif_fn(
atif_samples: nat.plugins.eval.evaluator.atif_evaluator.AtifEvalSampleList,
) nat.plugins.eval.data_models.evaluator_io.EvalOutput#

ATIF-native evaluation lane for trajectory scoring.

async register_trajectory_evaluator(
config: TrajectoryEvaluatorConfig,
builder: nat.builder.builder.EvalBuilder,
)#