nemo_curator.stages.video.io.clip_writer

View as Markdown

Module Contents

Classes

NameDescription
ClipWriterStageStage that writes clips and metadata for clip transcoding.

API

class nemo_curator.stages.video.io.clip_writer.ClipWriterStage(
output_path: str,
input_path: str,
upload_clips: bool,
dry_run: bool,
generate_embeddings: bool,
generate_previews: bool,
generate_captions: bool,
embedding_algorithm: str = 'cosmos-embed1',
caption_models: list[str] | None = None,
enhanced_caption_models: list[str] | None = None,
verbose: bool = False,
max_workers: int = 6,
log_stats: bool = False,
name: str = 'clip_writer'
)
Dataclass

Bases: ProcessingStage[VideoTask, VideoTask]

Stage that writes clips and metadata for clip transcoding.

This class processes video clips through a series of steps including embedding generation, metadata extraction, and writing to storage.

caption_models
list[str] | None = None
dry_run
bool
embedding_algorithm
str = 'cosmos-embed1'
enhanced_caption_models
list[str] | None = None
generate_captions
bool
generate_embeddings
bool
generate_previews
bool
input_path
str
log_stats
bool = False
max_workers
int = 6
name
str = 'clip_writer'
output_path
str
upload_clips
bool
verbose
bool = False
nemo_curator.stages.video.io.clip_writer.ClipWriterStage.__post_init__()
nemo_curator.stages.video.io.clip_writer.ClipWriterStage._get_clip_chunk_uri(
input_video_path: str,
idx: int
) -> pathlib.Path
nemo_curator.stages.video.io.clip_writer.ClipWriterStage._get_clip_uri(
video_span_uuid: uuid.UUID,
path_prefix: str,
file_type: str
) -> pathlib.Path
nemo_curator.stages.video.io.clip_writer.ClipWriterStage._get_output_path(
output_path: str,
extra: str
) -> str
staticmethod
nemo_curator.stages.video.io.clip_writer.ClipWriterStage._get_video_uri(
input_video_path: str
) -> pathlib.Path
nemo_curator.stages.video.io.clip_writer.ClipWriterStage._get_window_uri(
video_span_uuid: uuid.UUID,
window: tuple[int, int],
path_prefix: str,
file_type: str
) -> pathlib.Path
nemo_curator.stages.video.io.clip_writer.ClipWriterStage._write_clip_embedding(
clip: nemo_curator.tasks.video.Clip
) -> nemo_curator.tasks.video.ClipStats
nemo_curator.stages.video.io.clip_writer.ClipWriterStage._write_clip_embedding_to_buffer(
clip: nemo_curator.tasks.video.Clip
) -> nemo_curator.tasks.video.ClipStats
nemo_curator.stages.video.io.clip_writer.ClipWriterStage._write_clip_metadata(
clip: nemo_curator.tasks.video.Clip,
video_metadata: nemo_curator.tasks.video.VideoMetadata,
filtered: bool = False
) -> nemo_curator.tasks.video.ClipStats
nemo_curator.stages.video.io.clip_writer.ClipWriterStage._write_clip_mp4(
clip: nemo_curator.tasks.video.Clip,
filtered: bool = False
) -> nemo_curator.tasks.video.ClipStats
nemo_curator.stages.video.io.clip_writer.ClipWriterStage._write_clip_window_webp(
clip: nemo_curator.tasks.video.Clip
) -> nemo_curator.tasks.video.ClipStats
nemo_curator.stages.video.io.clip_writer.ClipWriterStage._write_data(
buffer: bytes,
dest: pathlib.Path,
desc: str,
source_video: str
) -> None
nemo_curator.stages.video.io.clip_writer.ClipWriterStage._write_json_data(
data: dict,
dest: pathlib.Path,
desc: str,
source_video: str
) -> None
nemo_curator.stages.video.io.clip_writer.ClipWriterStage._write_video_embeddings_to_parquet(
video: nemo_curator.tasks.video.Video
) -> None
nemo_curator.stages.video.io.clip_writer.ClipWriterStage._write_video_metadata(
video: nemo_curator.tasks.video.Video
) -> None
nemo_curator.stages.video.io.clip_writer.ClipWriterStage.calculate_sha256(
buffer: bytes
) -> str
staticmethod

Get sha256 of byte array.

nemo_curator.stages.video.io.clip_writer.ClipWriterStage.get_output_path_ce1_embd(
output_path: str
) -> str
staticmethod

Get path to store generated clip embeddings of Cosmos-Embed1.

nemo_curator.stages.video.io.clip_writer.ClipWriterStage.get_output_path_ce1_embd_parquet(
output_path: str
) -> str
staticmethod

Get path to store generated clip embeddings of Cosmos-Embed1 in a parquet file.

nemo_curator.stages.video.io.clip_writer.ClipWriterStage.get_output_path_clips(
output_path: str,
filtered: bool = False
) -> str
staticmethod

Get path to store generated clips.

nemo_curator.stages.video.io.clip_writer.ClipWriterStage.get_output_path_metas(
output_path: str,
version: str
) -> str
staticmethod

Get path to store clip metadatas.

nemo_curator.stages.video.io.clip_writer.ClipWriterStage.get_output_path_previews(
output_path: str
) -> str
staticmethod

Get path to store generated clips.

nemo_curator.stages.video.io.clip_writer.ClipWriterStage.get_output_path_processed_clip_chunks(
output_path: str
) -> str
staticmethod

Get path to store processed clip chunks.

nemo_curator.stages.video.io.clip_writer.ClipWriterStage.get_output_path_processed_videos(
output_path: str
) -> str
staticmethod

Get path to store processed videos.

nemo_curator.stages.video.io.clip_writer.ClipWriterStage.inputs() -> tuple[list[str], list[str]]
nemo_curator.stages.video.io.clip_writer.ClipWriterStage.outputs() -> tuple[list[str], list[str]]
nemo_curator.stages.video.io.clip_writer.ClipWriterStage.process(
task: nemo_curator.tasks.video.VideoTask
) -> nemo_curator.tasks.video.VideoTask
nemo_curator.stages.video.io.clip_writer.ClipWriterStage.setup(
worker_metadata: nemo_curator.backends.base.WorkerMetadata | None = None
) -> None