nemo_curator.pipeline.workflow

View as Markdown

Module Contents

Classes

NameDescription
WorkflowBase-
WorkflowRunResultContainer returned by high-level workflows to expose pipeline outputs.

API

class nemo_curator.pipeline.workflow.WorkflowBase()
Abstract
nemo_curator.pipeline.workflow.WorkflowBase.run(
args = (),
kwargs = {}
) -> nemo_curator.pipeline.workflow.WorkflowRunResult
abstract
class nemo_curator.pipeline.workflow.WorkflowRunResult(
workflow_name: str,
pipeline_tasks: dict[str, list[nemo_curator.tasks.Task]] = dict(),
metadata: dict[str, typing.Any] = dict()
)
Dataclass

Container returned by high-level workflows to expose pipeline outputs.

metadata
dict[str, Any] = field(default_factory=dict)
pipeline_tasks
dict[str, list[Task]] = field(default_factory=dict)
workflow_name
str
nemo_curator.pipeline.workflow.WorkflowRunResult.add_metadata(
key: str,
value: typing.Any
) -> None

Add a metadata key-value pair.

nemo_curator.pipeline.workflow.WorkflowRunResult.add_pipeline_tasks(
pipeline_name: str,
tasks: list[nemo_curator.tasks.Task] | None
) -> None

Record the tasks emitted by a pipeline run (empty list if None).

nemo_curator.pipeline.workflow.WorkflowRunResult.extend_metadata(
updates: dict[str, typing.Any] | None = None
) -> None

Update metadata dictionary in-place.

nemo_curator.pipeline.workflow.WorkflowRunResult.get_metadata(
key: str
) -> typing.Any

Get a metadata value.