nemo_curator.stages.text.io.writer.jsonl

View as Markdown

Module Contents

Classes

NameDescription
JsonlWriterWriter that writes a DocumentBatch to a JSONL file.

API

class nemo_curator.stages.text.io.writer.jsonl.JsonlWriter(
path: str,
file_extension: str = 'jsonl',
write_kwargs: dict[str, typing.Any] = dict(),
fields: list[str] | None = None,
name: str = 'jsonl_writer',
mode: typing.Literal['ignore', 'overwrite', 'append', 'error'] = 'ignore',
append_mode_implemented: bool = False
)
Dataclass

Bases: BaseWriter

Writer that writes a DocumentBatch to a JSONL file.

file_extension
str = 'jsonl'
name
str = 'jsonl_writer'
write_kwargs
dict[str, Any] = field(default_factory=dict)
nemo_curator.stages.text.io.writer.jsonl.JsonlWriter.write_data(
task: nemo_curator.tasks.DocumentBatch,
file_path: str
) -> None

Write data to JSONL file using pandas DataFrame.to_json.