tasks.video#

Module Contents#

Classes#

Clip

Container for video clip data including metadata, frames, and processing results.

ClipStats

Statistics for video clips including filtering, transcoding, and captioning results.

Video

Container for video content including metadata, frames, and processing results.

VideoMetadata

Metadata for video content including dimensions, timing, and codec information.

VideoTask

Task for processing a single video.

API#

class tasks.video.Clip#

Container for video clip data including metadata, frames, and processing results.

This class stores information about a video segment, including its source, timing, extracted frames, motion data, aesthetic scores, and generated captions.

aesthetic_score: float | None#

None

buffer: bytes | None#

None

cosmos_embed1_embedding: numpy.typing.NDArray[numpy.float32] | None#

None

cosmos_embed1_frames: numpy.typing.NDArray[numpy.float32] | None#

None

cosmos_embed1_text_match: tuple[str, float] | None#

None

decoded_motion_data: None#

None

property duration: float#

Calculate the duration of the clip.

Returns: Duration of the clip in seconds.

egomotion: dict[str, bytes]#

‘field(…)’

errors: dict[str, str]#

‘field(…)’

extract_metadata() dict[str, Any] | None#

Extract metadata from the clip’s buffer.

Returns: A dictionary containing the extracted metadata (width, height, framerate, num_frames, video_codec, num_bytes) if buffer exists, None otherwise.

Raises: Exception: Any exception from extract_video_metadata is propagated.

extracted_frames: dict[str, numpy.typing.NDArray[numpy.uint8]]#

‘field(…)’

get_major_size() int#

Calculate total memory size of the clip.

Returns: Total size in bytes.

intern_video_2_embedding: numpy.typing.NDArray[numpy.float32] | None#

None

intern_video_2_frames: numpy.typing.NDArray[numpy.float32] | None#

None

intern_video_2_text_match: tuple[str, float] | None#

None

motion_score_global_mean: float | None#

None

motion_score_per_patch_min_256: float | None#

None

source_video: str#

None

span: tuple[float, float]#

None

uuid: uuid.UUID#

None

windows: list[tasks.video._Window]#

‘field(…)’

class tasks.video.ClipStats#

Statistics for video clips including filtering, transcoding, and captioning results.

This class accumulates statistics about the number of clips processed through different stages of the video processing pipeline, including motion filtering, aesthetic filtering, and captioning.

combine(other: tasks.video.ClipStats) None#

Combine two ClipStats objects.

Args: other: ClipStats object to combine with.

max_clip_duration: float#

0.0

num_filtered_by_aesthetic: int#

0

num_filtered_by_motion: int#

0

num_passed: int#

0

num_transcoded: int#

0

num_with_caption: int#

0

num_with_embeddings: int#

0

num_with_webp: int#

0

total_clip_duration: float#

0.0

class tasks.video.Video#

Container for video content including metadata, frames, and processing results.

This class stores information about a video segment, including its source, timing, extracted frames, motion data, aesthetic scores, and generated captions.

clip_chunk_index: int#

0

clip_stats: tasks.video.ClipStats#

‘field(…)’

clips: list[tasks.video.Clip]#

‘field(…)’

errors: dict[str, str]#

‘field(…)’

filtered_clips: list[tasks.video.Clip]#

‘field(…)’

property fraction: float#

Calculate the fraction of processed clips.

Returns: Fraction of processed clips.

frame_array: numpy.typing.NDArray[numpy.uint8] | None#

None

get_major_size() int#

Calculate total memory size of the video.

Returns: Total size in bytes.

has_metadata() bool#

Check if all metadata fields are present.

Returns: True if all metadata fields are present, False otherwise.

property input_path: str#

Get the input path of the video.

Returns: Input path of the video.

input_video: pathlib.Path#

None

is_10_bit_color() bool | None#

Heuristic function to determine if the input video has 10-bit color.

metadata: tasks.video.VideoMetadata#

‘field(…)’

num_clip_chunks: int#

0

num_total_clips: int#

0

populate_metadata() None#

Extract and assign video metadata from source_bytes.

This method extracts metadata from the video data in source_bytes and assigns it to self.metadata.

Raises: ValueError: If source_bytes is None. Exception: Any exception from extract_video_metadata is propagated.

source_bytes: bytes | None#

None

property weight: float#

Calculate the weight of the video.

Returns: Weight of the video.

class tasks.video.VideoMetadata#

Metadata for video content including dimensions, timing, and codec information.

This class stores essential video properties such as resolution, frame rate, duration, and encoding details.

audio_codec: str | None#

None

bit_rate_k: int | None#

None

duration: float | None#

None

framerate: float | None#

None

height: int | None#

None

num_frames: int | None#

None

pixel_format: str | None#

None

size: int | None#

None

video_codec: str | None#

None

width: int | None#

None

class tasks.video.VideoTask#

Bases: tasks.tasks.Task[tasks.video.Video]

Task for processing a single video.

data: tasks.video.Video#

‘field(…)’

property num_items: int#

Get the number of items in this task.

validate() bool#

Validate the task data.