modules.joiner#

Module Contents#

Classes#

API#

class modules.joiner.DocumentJoiner(
separator: str,
text_field: str = 'text',
segment_id_field: str = 'segment_id',
document_id_field: str = 'id',
drop_segment_id_field: bool = True,
max_length: int | None = None,
length_field: str | None = None,
)#

Bases: nemo_curator.modules.base.BaseModule

Initialization

call(
dataset: nemo_curator.datasets.DocumentDataset,
) nemo_curator.datasets.DocumentDataset#