synthetic.nemotron_cc#
Module Contents#
Classes#
API#
- class synthetic.nemotron_cc.NemotronCCDiverseQAPostprocessor(
- tokenizer: transformers.AutoTokenizer | None = None,
- text_field: str = 'text',
- response_field: str = 'response',
- max_num_pairs: int = 1,
- prefix: str = 'Here are the questions and answers based on the provided text:',
Bases:
nemo_curator.BaseModuleInitialization
- call(
- dataset: nemo_curator.datasets.DocumentDataset,
- class synthetic.nemotron_cc.NemotronCCGenerator(llm_client: nemo_curator.services.LLMClient)#
Initialization
- distill(
- document: str,
- model: str,
- prompt_template: str = DISTILL_PROMPT_TEMPLATE,
- system_prompt: str = NEMOTRON_CC_DISTILL_SYSTEM_PROMPT,
- prompt_kwargs: dict | None = None,
- model_kwargs: dict | None = None,
- extract_knowledge(
- document: str,
- model: str,
- prompt_template: str = EXTRACT_KNOWLEDGE_PROMPT_TEMPLATE,
- system_prompt: str = NEMOTRON_CC_SYSTEM_PROMPT,
- prompt_kwargs: dict | None = None,
- model_kwargs: dict | None = None,
- generate_diverse_qa(
- document: str,
- model: str,
- prompt_template: str = DIVERSE_QA_PROMPT_TEMPLATE,
- system_prompt: str = NEMOTRON_CC_SYSTEM_PROMPT,
- prompt_kwargs: dict | None = None,
- model_kwargs: dict | None = None,
- generate_knowledge_list(
- document: str,
- model: str,
- prompt_template: str = KNOWLEDGE_LIST_PROMPT_TEMPLATE,
- system_prompt: str = NEMOTRON_CC_SYSTEM_PROMPT,
- prompt_kwargs: dict | None = None,
- model_kwargs: dict | None = None,
- rewrite_to_wikipedia_style(
- document: str,
- model: str,
- prompt_template: str = WIKIPEDIA_REPHRASING_PROMPT_TEMPLATE,
- system_prompt: str = NEMOTRON_CC_SYSTEM_PROMPT,
- prompt_kwargs: dict | None = None,
- model_kwargs: dict | None = None,