stages.text.io.writer.jsonl#

Module Contents#

Classes#

JsonlWriter

Writer that writes a DocumentBatch to a JSONL file.

API#

class stages.text.io.writer.jsonl.JsonlWriter#

Bases: stages.text.io.writer.base.BaseWriter

Writer that writes a DocumentBatch to a JSONL file.

file_extension: str#

‘jsonl’

write_data(
task: nemo_curator.tasks.DocumentBatch,
file_path: str,
) None#

Write data to JSONL file using pandas DataFrame.to_json.

write_kwargs: dict[str, Any]#

‘field(…)’