API ReferenceFull Library ReferenceNemo CuratorNemo CuratorStagesTextUtilsnemo_curator.stages.text.utils.constantsAsk a question|Copy page|View as Markdown|More actionsModule Contents Data bullet_list common_english_words ellipsis_marks end_marks policy_substrings regex_alpha regex_alphanum regex_digit regex_hash regex_paren regex_url white_space_list API nemo_curator.stages.text.utils.constants.bullet_list = {'•', '‣', '⁃', '⁌', '⁍', '∙', '○', '●', '◘', '◦', '⦾', '⦿'} nemo_curator.stages.text.utils.constants.common_english_words = {'the', 'be', 'to', 'of', 'and', 'that', 'have', 'with'} nemo_curator.stages.text.utils.constants.ellipsis_marks = {'...', '[...]', '…', '(...)', '[…]', '-»', 'read more..', 'read more'} nemo_curator.stages.text.utils.constants.end_marks = ('.', '?', '!', '"', "'") nemo_curator.stages.text.utils.constants.policy_substrings = ['terms of use', 'privacy policy', 'cookie policy', 'uses cookies', 'privacy ove... nemo_curator.stages.text.utils.constants.regex_alpha = regex.compile('[[:alpha:]]') nemo_curator.stages.text.utils.constants.regex_alphanum = re.compile('[a-zA-Z0-9\n?!,.]') nemo_curator.stages.text.utils.constants.regex_digit = regex.compile('[[:digit:]]') nemo_curator.stages.text.utils.constants.regex_hash = re.compile('#+') nemo_curator.stages.text.utils.constants.regex_paren = re.compile('{|}|⟨|⟩|\\[|\\]|\\(|\\)') nemo_curator.stages.text.utils.constants.regex_url = re.compile('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0... nemo_curator.stages.text.utils.constants.white_space_list = ['\t', '\n', '\r', '\x08', ' ']