nv_ingest_api.util.dataloader package#

Submodules#

nv_ingest_api.util.dataloader.dataloader module#

class nv_ingest_api.util.dataloader.dataloader.DataLoader( path: str, output_dir: str, split_type: SplitType = SplitType.SIZE, split_interval: int = 450, interface: LoaderInterface | None = None, size: int = 2, video_audio_separate: bool = False, audio_only: bool = False, )[source]#

Bases: object

DataLoader is a class that is used to load data from a list of paths and push it to a queue. paths: list[str], list of paths to process size: int, size of the queue

get_metadata()[source]#: Get the metadata for a path. path: str, path to get the metadata for if None, get the metadata for all paths

stop()[source]#: Reset itertor by stopping the thread and clearing the queue.

class nv_ingest_api.util.dataloader.dataloader.LoaderInterface[source]#

Bases: ABC

abstract split( input_path: str, output_dir: str, split_interval: int = 0, )[source]#

class nv_ingest_api.util.dataloader.dataloader.MediaInterface[source]#

Bases: LoaderInterface

find_num_splits( file_size: int, sample_rate: float, duration: float, split_interval: int, split_type: SplitType, )[source]#: Find the number of splits for a media file based on the split type and interval. file_size: int, size of the media file in bytes sample_rate: float, sample rate of the media file in samples per second duration: float, duration of the media file in seconds split_interval: int, size of the chunk to split the media file into depending on the split type split_type: SplitType, type of split to perform, either size, time, or frame

get_audio_from_video( input_path: str, output_file: str, cache_path: str | None = None, )[source]#

probe_media( path_file: Path, split_interval: int, split_type: SplitType, file_handle=None, )[source]#

split( input_path: str, output_dir: str, split_interval: int = 0, split_type: SplitType = SplitType.SIZE, cache_path: str | None = None, video_audio_separate: bool = False, audio_only: bool = False, )[source]#: Split a media file into smaller chunks of split_interval size. if video_audio_separate is True and the file is a video, the audio will be extracted from the video and saved to a separate files. Data can be returned as a tuple of (video_files, audio_files) or just files (i.e. audio files). input_path: str, path to the media file output_dir: str, path to the output directory split_interval: the size of the chunk to split the media file into depending on the split type split_type: SplitType, type of split to perform, either size, time, or frame video_audio_separate: bool, whether to separate the video and audio files audio_only: bool, whether to only return the audio files

class nv_ingest_api.util.dataloader.dataloader.SplitType(value)[source]#

Bases: Enum

An enumeration.

FRAME = 'frame'#

SIZE = 'size'#

TIME = 'time'#

nv_ingest_api.util.dataloader.dataloader.load_data( queue: Queue, paths: list[str], thread_stop: Event, )[source]#

nv_ingest_api.util.dataloader.dataloader.strip_audio_from_video_files( input_path: str, output_dir: str, cache_path: str | None = None, file_type='.mp4', )[source]#: Strip the audio from a series of video files and return the paths to the new files. input_path: str, path to the video file output_dir: str, path to the output directory cache_path: str, path to the cache directory

Module contents#

class nv_ingest_api.util.dataloader.DataLoader( path: str, output_dir: str, split_type: SplitType = SplitType.SIZE, split_interval: int = 450, interface: LoaderInterface | None = None, size: int = 2, video_audio_separate: bool = False, audio_only: bool = False, )[source]#

Bases: object

DataLoader is a class that is used to load data from a list of paths and push it to a queue. paths: list[str], list of paths to process size: int, size of the queue

get_metadata()[source]#: Get the metadata for a path. path: str, path to get the metadata for if None, get the metadata for all paths

stop()[source]#: Reset itertor by stopping the thread and clearing the queue.

class nv_ingest_api.util.dataloader.MediaInterface[source]#

Bases: LoaderInterface

find_num_splits( file_size: int, sample_rate: float, duration: float, split_interval: int, split_type: SplitType, )[source]#: Find the number of splits for a media file based on the split type and interval. file_size: int, size of the media file in bytes sample_rate: float, sample rate of the media file in samples per second duration: float, duration of the media file in seconds split_interval: int, size of the chunk to split the media file into depending on the split type split_type: SplitType, type of split to perform, either size, time, or frame

get_audio_from_video( input_path: str, output_file: str, cache_path: str | None = None, )[source]#

probe_media( path_file: Path, split_interval: int, split_type: SplitType, file_handle=None, )[source]#

split( input_path: str, output_dir: str, split_interval: int = 0, split_type: SplitType = SplitType.SIZE, cache_path: str | None = None, video_audio_separate: bool = False, audio_only: bool = False, )[source]#: Split a media file into smaller chunks of split_interval size. if video_audio_separate is True and the file is a video, the audio will be extracted from the video and saved to a separate files. Data can be returned as a tuple of (video_files, audio_files) or just files (i.e. audio files). input_path: str, path to the media file output_dir: str, path to the output directory split_interval: the size of the chunk to split the media file into depending on the split type split_type: SplitType, type of split to perform, either size, time, or frame video_audio_separate: bool, whether to separate the video and audio files audio_only: bool, whether to only return the audio files