nemo_curator.stages.audio.inference.asr_nemo

View as Markdown

Module Contents

Classes

NameDescription
InferenceAsrNemoStageSpeech recognition inference using a NeMo ASR model.

API

class nemo_curator.stages.audio.inference.asr_nemo.InferenceAsrNemoStage(
name: str = 'ASR_inference',
model_name: str = '',
cache_dir: str | None = None,
asr_model: typing.Any | None = None,
filepath_key: str = 'audio_filepath',
pred_text_key: str = 'pred_text',
resources: nemo_curator.stages.resources.Resources = (lambda: Resources(cpus=1.0...,
batch_size: int = 16
)
Dataclass

Bases: ProcessingStage[AudioTask, AudioTask]

Speech recognition inference using a NeMo ASR model.

Overrides process_batch for batched GPU inference.

Parameters:

model_name
strDefaults to ''
cache_dir
str | NoneDefaults to None

Optional directory for model download cache. When set, NeMo stores/loads the pretrained checkpoint here instead of the default cache location.

filepath_key
strDefaults to 'audio_filepath'

Key in the entry dict pointing to the audio file.

pred_text_key
strDefaults to 'pred_text'

Key where the predicted transcription is stored.

asr_model
Any | None = field(default=None, repr=False)
batch_size
int = 16
cache_dir
str | None = None
filepath_key
str = 'audio_filepath'
model_name
str = ''
name
str = 'ASR_inference'
pred_text_key
str = 'pred_text'
resources
Resources
nemo_curator.stages.audio.inference.asr_nemo.InferenceAsrNemoStage.__post_init__() -> None
nemo_curator.stages.audio.inference.asr_nemo.InferenceAsrNemoStage.check_cuda() -> torch.device
nemo_curator.stages.audio.inference.asr_nemo.InferenceAsrNemoStage.inputs() -> tuple[list[str], list[str]]
nemo_curator.stages.audio.inference.asr_nemo.InferenceAsrNemoStage.outputs() -> tuple[list[str], list[str]]
nemo_curator.stages.audio.inference.asr_nemo.InferenceAsrNemoStage.process(
task: nemo_curator.tasks.AudioTask
) -> nemo_curator.tasks.AudioTask
nemo_curator.stages.audio.inference.asr_nemo.InferenceAsrNemoStage.process_batch(
tasks: list[nemo_curator.tasks.AudioTask]
) -> list[nemo_curator.tasks.AudioTask]
nemo_curator.stages.audio.inference.asr_nemo.InferenceAsrNemoStage.setup(
_worker_metadata: nemo_curator.backends.base.WorkerMetadata | None = None
) -> None
nemo_curator.stages.audio.inference.asr_nemo.InferenceAsrNemoStage.setup_on_node(
_node_info: nemo_curator.backends.base.NodeInfo | None = None,
_worker_metadata: nemo_curator.backends.base.WorkerMetadata | None = None
) -> None
nemo_curator.stages.audio.inference.asr_nemo.InferenceAsrNemoStage.transcribe(
files: list[str]
) -> list[str]