pipeline.pipeline#

Module Contents#

Classes#

Pipeline

User-facing pipeline definition for composing processing stages.

API#

class pipeline.pipeline.Pipeline(
name: str,
description: str | None = None,
stages: list[nemo_curator.stages.base.ProcessingStage] | None = None,
config: dict[str, Any] | None = None,
)#

User-facing pipeline definition for composing processing stages.

Initialization

Initialize a new pipeline.

Args: name (str): Name of the pipeline description (str, optional): Pipeline Description. Defaults to None. stages (list[ProcessingStage], optional): List of stages to add to the pipeline. Defaults to None. config (dict[str, Any], optional): Pipeline configuration that is valid across all executors. Defaults to None.

add_stage(
stage: nemo_curator.stages.base.ProcessingStage,
) pipeline.pipeline.Pipeline#

Add a stage to the pipeline.

Args: stage (ProcessingStage): Processing stage to add

Returns: Pipeline: Self (Pipeline) for method chaining

build() None#

Build an execution plan from the pipeline.

Raises: ValueError: If the pipeline has no stages

describe() str#

Get a detailed description of the pipeline stages and their requirements.

run(
executor: nemo_curator.backends.base.BaseExecutor | None = None,
initial_tasks: list[nemo_curator.tasks.Task] | None = None,
) list[nemo_curator.tasks.Task] | None#

Run the pipeline.

Args: executor (BaseExecutor): Executor to use initial_tasks (list[Task], optional): Initial tasks to start the pipeline with. Defaults to None.

Returns: list[Task] | None: List of tasks