utils.fuzzy_dedup_utils.shuffle_utils#

Module Contents#

Functions#

Data#

API#

utils.fuzzy_dedup_utils.shuffle_utils.USE_EXCOMMS#

None

utils.fuzzy_dedup_utils.shuffle_utils.dask_cuda_version#

‘Version(…)’

utils.fuzzy_dedup_utils.shuffle_utils.get_shuffle_part_ids_df(
agg_df: cudf.DataFrame,
partition_on: str,
output_col: str,
size_col: str,
num_workers: int = 0,
) cudf.DataFrame#
utils.fuzzy_dedup_utils.shuffle_utils.rearange_by_column_direct(
df: cudf.DataFrame,
col: str,
npartitions: int,
ignore_index: bool,
excomms_default: bool = USE_EXCOMMS,
) cudf.DataFrame#
utils.fuzzy_dedup_utils.shuffle_utils.write_partitioned_file(
df: cudf.DataFrame,
output_path: str,
partition_on: str,
batch_id: int,
) cudf.Series#