nemo_curator.utils.split_large_files

View as Markdown

Module Contents

Functions

API

nemo_curator.utils.split_large_files._split_table(
table: pyarrow.Table,
target_size: int
) -> list[pyarrow.Table]
nemo_curator.utils.split_large_files._write_table_to_file(
table: pyarrow.Table,
outdir: str,
output_prefix: str,
ext: str,
file_idx: int
) -> int
nemo_curator.utils.split_large_files.main(
args: argparse.ArgumentParser | None = None
) -> None
nemo_curator.utils.split_large_files.parse_args(
args: argparse.ArgumentParser | None = None
) -> argparse.Namespace
nemo_curator.utils.split_large_files.split_parquet_file_by_size(
input_file: str,
outdir: str,
target_size_mb: int
) -> None