nemo_curator.stages.text.modifiers.unicode.unicode_reformatter

View as MarkdownOpen in Claude

Module Contents

Classes

NameDescription
UnicodeReformatter-

API

class nemo_curator.stages.text.modifiers.unicode.unicode_reformatter.UnicodeReformatter(
config: ftfy.TextFixerConfig | None = None,
unescape_html: str | bool = 'auto',
remove_terminal_escapes: bool = True,
fix_encoding: bool = True,
restore_byte_a0: bool = True,
replace_lossy_sequences: bool = True,
decode_inconsistent_utf8: bool = True,
fix_c1_controls: bool = True,
fix_latin_ligatures: bool = False,
fix_character_width: bool = False,
uncurl_quotes: bool = False,
fix_line_breaks: bool = False,
fix_surrogates: bool = True,
remove_control_chars: bool = True,
normalization: typing.Literal['NFC', 'NFD', 'NFKC', 'NFKD'] | None = None,
max_decode_length: int = 1000000,
explain: bool = True
)

Bases: DocumentModifier

config
nemo_curator.stages.text.modifiers.unicode.unicode_reformatter.UnicodeReformatter.modify_document(
text: str
) -> str