utils.fuzzy_dedup_utils.merge_utils
#
Module Contents#
Functions#
API#
- utils.fuzzy_dedup_utils.merge_utils.apply_bk_mapping(
- part: utils.fuzzy_dedup_utils.merge_utils.cudf | pandas.DataFrame,
- bk_map: utils.fuzzy_dedup_utils.merge_utils.cudf | pandas.DataFrame,
- utils.fuzzy_dedup_utils.merge_utils.blockwise_merge(
- left: dask.dataframe.DataFrame,
- right: dask.dataframe.DataFrame,
- on: str,
- how: str = 'inner',
- utils.fuzzy_dedup_utils.merge_utils.extract_partitioning_index(
- left_df: dask.dataframe.DataFrame,
- merge_on: str,
- bk_mapping: dask.dataframe.DataFrame,
- parts_per_bucket_batch: int,
- total_bucket_partitions: int,
- utils.fuzzy_dedup_utils.merge_utils.filter_text_rows_by_bucket_batch(
- left_df: dask.dataframe.DataFrame,
- global_partitioning_index: dask.dataframe.Series,
- bucket_part_offset: int,
- bucket_part_end_offset: int,
- total_bucket_partitions: int,
- utils.fuzzy_dedup_utils.merge_utils.merge_left_to_shuffled_right(
- subset_text_df: dask.dataframe.DataFrame,
- subset_bucket_df: dask.dataframe.DataFrame,
- merge_on: str,