utils.duplicates_removal#

Module Contents#

Functions#

API#

utils.duplicates_removal.deduplicate_groups(
duplicates: dask.dataframe.DataFrame,
group_field: str | None,
perform_shuffle: bool,
) dask.dataframe.DataFrame#
utils.duplicates_removal.left_anti_join(
left: dask.dataframe.DataFrame,
right: dask.dataframe.DataFrame,
left_on: str | list[str],
right_on: str | list[str],
) dask.dataframe.DataFrame#
utils.duplicates_removal.remove_duplicates(
left: dask.dataframe.DataFrame,
duplicates: dask.dataframe.DataFrame,
id_field: str,
group_field: str | None = None,
perform_shuffle: bool = False,
) dask.dataframe.DataFrame#