synthetic.nemotron#

Module Contents#

Classes#

API#

class synthetic.nemotron.NemotronFormatter#

Bases: nemo_curator.services.conversation_formatter.ConversationFormatter

PROMPT_PREFIX = <Multiline-String>#
static format_conversation(conv: list[dict]) str#
class synthetic.nemotron.NemotronGenerator(
llm_client: nemo_curator.services.model_client.LLMClient,
)#

Initialization

classify_math_entity(
entity: str,
model: str,
prompt_template: str = DEFAULT_MATH_CLASSIFICATION_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
model_kwargs: dict | None = None,
) list[str]#
classify_python_entity(
entity: str,
model: str,
prompt_template: str = DEFAULT_PYTHON_CLASSIFICATION_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
model_kwargs: dict | None = None,
) list[str]#
convert_response_to_yaml_list(
llm_response: str,
model: str,
prompt_template: str = DEFAULT_YAML_CONVERSION_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
model_kwargs: dict | None = None,
) list[str]#
generate_closed_qa_instructions(
document: str,
n_openlines: str | int,
model: str,
prompt_template: str = DEFAULT_CLOSED_QA_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
model_kwargs: dict | None = None,
) list[str]#
generate_dialogue(
openline: str,
user_model: str,
assistant_model: str,
n_user_turns: int = 3,
prompt_template: str = DIALOGUE_NORMAL_USER_TURN_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
user_model_kwargs: dict | None = None,
assistant_model_kwargs: dict | None = None,
) list[dict]#
generate_macro_topics(
n_macro_topics: int | str,
model: str,
prompt_template: str = DEFAULT_MACRO_TOPICS_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
model_kwargs: dict | None = None,
) list[str]#
generate_math_macro_topics(
n_macro_topics: int | str,
school_level: str,
model: str,
prompt_template: str = DEFAULT_MATH_MACRO_TOPICS_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
model_kwargs: dict | None = None,
) list[str]#
generate_math_problem(
topic: str,
n_openlines: str | int,
model: str,
prompt_template: str = MATH_PROBLEM_GENERAL_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
model_kwargs: dict | None = None,
) list[str]#
generate_math_subtopics(
macro_topic: str,
n_subtopics: int | str,
model: str,
prompt_template: str = DEFAULT_MATH_SUBTOPICS_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
model_kwargs: dict | None = None,
) list[str]#
generate_open_qa_from_topic(
topic: str,
n_openlines: str | int,
model: str,
prompt_template: str = DEFAULT_OPEN_QA_FROM_TOPICS_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
model_kwargs: dict | None = None,
) list[str]#
generate_python_macro_topics(
n_macro_topics: int | str,
model: str,
prompt_template: str = DEFAULT_PYTHON_MACRO_TOPICS_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
model_kwargs: dict | None = None,
) list[str]#
generate_python_problem(
topic: str,
n_openlines: str | int,
model: str,
language: str = 'Python',
prompt_template: str = PYTHON_PROBLEM_BEGINNER_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
model_kwargs: dict | None = None,
) list[str]#
generate_python_subtopics(
macro_topic: str,
n_subtopics: int | str,
model: str,
prompt_template: str = DEFAULT_PYTHON_SUBTOPICS_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
model_kwargs: dict | None = None,
) list[str]#
generate_subtopics(
macro_topic: str,
n_subtopics: int | str,
model: str,
prompt_template: str = DEFAULT_SUBTOPICS_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
model_kwargs: dict | None = None,
) list[str]#
generate_two_turn_prompt(
openline: str,
user_model: str,
assistant_model: str,
prompt_template: str = DIALOGUE_NORMAL_USER_TURN_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
user_model_kwargs: dict | None = None,
assistant_model_kwargs: dict | None = None,
) list[dict]#
generate_writing_tasks(
topic: str,
text_material_type: str,
n_openlines: str | int,
model: str,
prompt_template: str = DEFAULT_WRITING_TASK_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
model_kwargs: dict | None = None,
) list[str]#
revise_open_qa(
openline: str,
n_revisions: str | int,
model: str,
prompt_template: str = DEFAULT_REVISE_OPEN_QA_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
model_kwargs: dict | None = None,
) list[str]#
revise_writing_tasks(
openline: str,
n_revisions: str | int,
model: str,
prompt_template: str = DEFAULT_REVISE_WRITING_TASK_PROMPT_TEMPLATE,
prompt_kwargs: dict | None = None,
model_kwargs: dict | None = None,
) list[str]#
run_closed_qa_pipeline(
documents: list[str],
n_openlines: str | int,
model: str,
closed_qa_prompt_template: str = DEFAULT_CLOSED_QA_PROMPT_TEMPLATE,
yaml_conversion_prompt_template: str = DEFAULT_YAML_CONVERSION_PROMPT_TEMPLATE,
base_model_kwargs: dict | None = None,
conversion_model_kwargs: dict | None = None,
ignore_conversion_failure: bool = False,
) list[tuple[int, str]]#
run_math_pipeline(
n_macro_topics: str | int,
school_level: str,
n_subtopics: str | int,
n_openlines: str | int,
model: str,
macro_topic_prompt_template: str = DEFAULT_MATH_MACRO_TOPICS_PROMPT_TEMPLATE,
subtopic_prompt_template: str = DEFAULT_MATH_SUBTOPICS_PROMPT_TEMPLATE,
math_problem_prompt_template: str = MATH_PROBLEM_GENERAL_PROMPT_TEMPLATE,
yaml_conversion_prompt_template: str = DEFAULT_YAML_CONVERSION_PROMPT_TEMPLATE,
base_model_kwargs: dict | None = None,
conversion_model_kwargs: dict | None = None,
additional_macro_topics: list[str] | None = None,
additional_subtopics: list[str] | None = None,
ignore_conversion_failure: bool = False,
combine_topics: bool = True,
) list[str]#
run_open_qa_pipeline(
n_macro_topics: str | int,
n_subtopics: str | int,
n_openlines: str | int,
n_revisions: str | int,
model: str,
macro_topic_prompt_template: str = DEFAULT_MACRO_TOPICS_PROMPT_TEMPLATE,
subtopic_prompt_template: str = DEFAULT_SUBTOPICS_PROMPT_TEMPLATE,
open_qa_from_topics_prompt_template: str = DEFAULT_OPEN_QA_FROM_TOPICS_PROMPT_TEMPLATE,
revise_open_qa_prompt_template: str = DEFAULT_REVISE_OPEN_QA_PROMPT_TEMPLATE,
yaml_conversion_prompt_template: str = DEFAULT_YAML_CONVERSION_PROMPT_TEMPLATE,
base_model_kwargs: dict | None = None,
conversion_model_kwargs: dict | None = None,
additional_macro_topics: list[str] | None = None,
additional_subtopics: list[str] | None = None,
ignore_conversion_failure: bool = False,
combine_topics: bool = True,
) list[str]#
run_python_pipeline(
n_macro_topics: str | int,
n_subtopics: str | int,
n_openlines: str | int,
model: str,
macro_topic_prompt_template: str = DEFAULT_PYTHON_MACRO_TOPICS_PROMPT_TEMPLATE,
subtopic_prompt_template: str = DEFAULT_PYTHON_SUBTOPICS_PROMPT_TEMPLATE,
python_problem_prompt_template: str = PYTHON_PROBLEM_BEGINNER_PROMPT_TEMPLATE,
yaml_conversion_prompt_template: str = DEFAULT_YAML_CONVERSION_PROMPT_TEMPLATE,
base_model_kwargs: dict | None = None,
conversion_model_kwargs: dict | None = None,
additional_macro_topics: list[str] | None = None,
additional_subtopics: list[str] | None = None,
ignore_conversion_failure: bool = False,
combine_topics: bool = True,
) list[str]#
run_writing_pipeline(
topics: list[str],
text_material_types: list[str],
n_openlines: str | int,
n_revisions: str | int,
model: str,
writing_task_prompt_template: str = DEFAULT_WRITING_TASK_PROMPT_TEMPLATE,
revise_writing_task_prompt_template: str = DEFAULT_REVISE_WRITING_TASK_PROMPT_TEMPLATE,
yaml_conversion_prompt_template: str = DEFAULT_YAML_CONVERSION_PROMPT_TEMPLATE,
base_model_kwargs: dict | None = None,
conversion_model_kwargs: dict | None = None,
ignore_conversion_failure: bool = False,
) list[str]#