modules.fuzzy_dedup.connectedcomponents
#
Module Contents#
Classes#
API#
- class modules.fuzzy_dedup.connectedcomponents.ConnectedComponents(
- cache_dir: str,
- jaccard_pairs_path: str,
- id_column: str = 'id',
- jaccard_threshold: float = 0.8,
- logger: logging.LoggerAdapter | str = './',
- profile_dir: str | None = None,
Initialization
- cc_workflow(output_path: str) None #
- static thresholding(
- df: cudf.DataFrame,
- threshold: float,
- column_to_threshold: str,