aiq.eval.evaluate#

Attributes#

Classes#

EvaluationRun

Instantiated for each evaluation run and used to store data for that single run.

Module Contents#

logger#
class EvaluationRun(config: aiq.eval.config.EvaluationRunConfig)#

Instantiated for each evaluation run and used to store data for that single run.

Initialize an EvaluationRun with configuration.

config: aiq.eval.config.EvaluationRunConfig#
eval_config: aiq.data_models.evaluate.EvalConfig | None = None#
intermediate_step_adapter: aiq.eval.intermediate_step_adapter.IntermediateStepAdapter#
eval_input: aiq.eval.evaluator.evaluator_model.EvalInput | None = None#
workflow_interrupted: bool = False#
evaluation_results: list[tuple[str, aiq.eval.evaluator.evaluator_model.EvalOutput]] = []#
workflow_output_file: pathlib.Path | None = None#
evaluator_output_files: list[pathlib.Path] = []#
async run_workflow_local(
session_manager: aiq.runtime.session.AIQSessionManager,
)#

Launch the workflow with the specified questions and extract the output using the jsonpath

async run_workflow(session_manager: aiq.runtime.session.AIQSessionManager)#
async profile_workflow()#

Profile a dataset

write_output(
dataset_handler: aiq.eval.dataset_handler.dataset_handler.DatasetHandler,
)#
async run_single_evaluator(evaluator_name: str, evaluator: Any)#

Run a single evaluator and store its results.

async run_evaluators(evaluators: dict[str, Any])#

Run all configured evaluators asynchronously.

async run_and_evaluate() aiq.eval.config.EvaluationRunOutput#

Run the workflow with the specified config file and evaluate the dataset