datasets.image_text_pair_dataset#

Module Contents#

Classes#

API#

class datasets.image_text_pair_dataset.ImageTextPairDataset(
path: str,
metadata: dask.dataframe.DataFrame,
tar_files: list[str],
id_col: str,
)#

Initialization

classmethod from_webdataset(
path: str,
id_col: str,
) datasets.image_text_pair_dataset.ImageTextPairDataset#
save_metadata(
path: str | None = None,
columns: list[str] | None = None,
) None#
to_webdataset(
path: str,
filter_column: str,
samples_per_shard: int = 10000,
max_shards: int = 5,
old_id_col: str | None = None,
) None#