nemo_curator.stages.image.filters.base

View as Markdown

Module Contents

Classes

NameDescription
BaseFilterStageBase class for image filtering stages.

Data

__all__

API

class nemo_curator.stages.image.filters.base.BaseFilterStage(
model_dir: str = None,
num_gpus_per_worker: float = 0.25,
model_inference_batch_size: int = 32,
score_threshold: float = 0.5,
verbose: bool = False,
name: str = 'image_filter'
)
Dataclass

Bases: ProcessingStage[ImageBatch, ImageBatch]

Base class for image filtering stages.

This class provides a base class for image filtering stages.

model_dir
str = None
model_inference_batch_size
int = 32
name
str = 'image_filter'
num_gpus_per_worker
float = 0.25
score_threshold
float = 0.5
verbose
bool = False
nemo_curator.stages.image.filters.base.BaseFilterStage.__post_init__() -> None
nemo_curator.stages.image.filters.base.BaseFilterStage.inputs() -> tuple[list[str], list[str]]
nemo_curator.stages.image.filters.base.BaseFilterStage.outputs() -> tuple[list[str], list[str]]
nemo_curator.stages.image.filters.base.BaseFilterStage.process(
task: nemo_curator.tasks.ImageBatch
) -> nemo_curator.tasks.ImageBatch

Process an image batch to generate scores and filter by threshold.

Parameters:

task
ImageBatch

ImageBatch containing list of ImageObject instances with pre-computed embeddings

Returns: ImageBatch

ImageBatch with filtered images that have scores below the threshold

nemo_curator.stages.image.filters.base.BaseFilterStage.setup(
_worker_metadata: nemo_curator.backends.base.WorkerMetadata | None = None
) -> None

Initialize the base filter stage.

nemo_curator.stages.image.filters.base.BaseFilterStage.yield_next_batch(
task: nemo_curator.tasks.ImageBatch
) -> collections.abc.Generator[list[nemo_curator.tasks.ImageObject], None, None]

Yields a generator of model inputs for the next batch.

Parameters:

task
ImageBatch

The ImageBatch to process.

nemo_curator.stages.image.filters.base.__all__ = ['BaseFilterStage']