backends.base#

Module Contents#

Classes#

BaseExecutor

Executor for a pipeline.

BaseStageAdapter

Adapts ProcessingStage to an execution backend, if needed.

NodeInfo

Generic node information for setup_on_node calls across backends. Simplified to match Xenna’s structure.

WorkerMetadata

Generic worker metadata for setup_on_node calls across backends. Simplified to match Xenna’s structure. The allocation field can contain backend-specific allocation information.

API#

class backends.base.BaseExecutor(config: dict[str, Any] | None = None)#

Bases: abc.ABC

Executor for a pipeline.

Initialization

abstractmethod execute(
stages: list[nemo_curator.stages.base.ProcessingStage],
initial_tasks: list[nemo_curator.tasks.Task] | None = None,
) None#

Execute the pipeline.

class backends.base.BaseStageAdapter(stage: nemo_curator.stages.base.ProcessingStage)#

Adapts ProcessingStage to an execution backend, if needed.

Initialization

process_batch(
tasks: list[nemo_curator.tasks.Task],
) list[nemo_curator.tasks.Task]#

Process a batch of tasks.

Args: tasks (list[Task]): List of tasks to process

Returns: list[Task]: List of processed tasks

setup(
worker_metadata: backends.base.WorkerMetadata | None = None,
) None#

Setup the stage once per actor.

Args: worker_metadata (WorkerMetadata, optional): Information about the worker

setup_on_node(
node_info: backends.base.NodeInfo | None = None,
worker_metadata: backends.base.WorkerMetadata | None = None,
) None#

Setup the stage on a node.

Args: node_info (NodeInfo, optional): Information about the node worker_metadata (WorkerMetadata, optional): Information about the worker

teardown() None#

Teardown the stage once per actor.

class backends.base.NodeInfo#

Generic node information for setup_on_node calls across backends. Simplified to match Xenna’s structure.

node_id: str = <Multiline-String>#
class backends.base.WorkerMetadata#

Generic worker metadata for setup_on_node calls across backends. Simplified to match Xenna’s structure. The allocation field can contain backend-specific allocation information.

allocation: Any#

None

worker_id: str = <Multiline-String>#