utils.fuzzy_dedup_utils.shuffle_utils
#
Module Contents#
Functions#
Data#
API#
- utils.fuzzy_dedup_utils.shuffle_utils.USE_EXCOMMS#
None
- utils.fuzzy_dedup_utils.shuffle_utils.dask_cuda_version#
‘Version(…)’
- utils.fuzzy_dedup_utils.shuffle_utils.get_shuffle_part_ids_df(
- agg_df: cudf.DataFrame,
- partition_on: str,
- output_col: str,
- size_col: str,
- num_workers: int = 0,
- utils.fuzzy_dedup_utils.shuffle_utils.rearange_by_column_direct(
- df: cudf.DataFrame,
- col: str,
- npartitions: int,
- ignore_index: bool,
- excomms_default: bool = USE_EXCOMMS,
- utils.fuzzy_dedup_utils.shuffle_utils.write_partitioned_file(
- df: cudf.DataFrame,
- output_path: str,
- partition_on: str,
- batch_id: int,