nemo_automodel.components.datasets.diffusion.meta_files_dataset#

Module Contents#

Classes#

MetaFilesDataset

PyTorch dataset for WAN2.1 .meta files.

Functions#

Data#

API#

nemo_automodel.components.datasets.diffusion.meta_files_dataset.logger#

‘getLogger(…)’

class nemo_automodel.components.datasets.diffusion.meta_files_dataset.MetaFilesDataset(
meta_folder: str,
transform_text: Optional[Callable[[torch.Tensor], torch.Tensor]] = None,
transform_video: Optional[Callable[[torch.Tensor], torch.Tensor]] = None,
filter_fn: Optional[Callable[[Dict], bool]] = None,
device: str = 'cpu',
max_files: Optional[int] = None,
)#

Bases: torch.utils.data.Dataset

PyTorch dataset for WAN2.1 .meta files.

Initialization

_log_dataset_stats() None#
__len__() int#
__getitem__(index: int) Dict[str, torch.Tensor]#
nemo_automodel.components.datasets.diffusion.meta_files_dataset.collate_fn(
batch: List[Dict[str, torch.Tensor]],
) Dict[str, torch.Tensor]#
nemo_automodel.components.datasets.diffusion.meta_files_dataset.build_node_parallel_sampler(
dataset: torch.utils.data.Dataset,
dp_rank: int,
dp_world_size: int,
shuffle: bool = True,
) Optional[torch.utils.data.DistributedSampler]#
nemo_automodel.components.datasets.diffusion.meta_files_dataset.build_dataloader(
*,
meta_folder: str,
batch_size: int,
dp_rank: int,
dp_world_size: int,
shuffle: bool = True,
num_workers: int = 2,
device: str = 'cpu',
transform_text: Optional[Callable[[torch.Tensor], torch.Tensor]] = None,
transform_video: Optional[Callable[[torch.Tensor], torch.Tensor]] = None,
filter_fn: Optional[Callable[[Dict], bool]] = None,
max_files: Optional[int] = None,
) Tuple[torch.utils.data.DataLoader, Optional[torch.utils.data.DistributedSampler]]#
nemo_automodel.components.datasets.diffusion.meta_files_dataset.create_dataloader(
meta_folder: str,
batch_size: int,
num_nodes: int,
) Tuple[torch.utils.data.DataLoader, Optional[torch.utils.data.DistributedSampler]]#