filters.synthetic#

Module Contents#

Classes#

Functions#

API#

class filters.synthetic.AnswerabilityFilter(
base_url: str,
api_key: str,
model: str,
answerability_system_prompt: str,
answerability_user_prompt_template: str,
num_criteria: int,
text_fields: list[str] | None = None,
)#

Bases: nemo_curator.filters.doc_filter.DocumentFilter

Initialization

keep_document(scores: pandas.Series) pandas.Series#
score_document(df: pandas.DataFrame) pandas.Series#
class filters.synthetic.EasinessFilter(
base_url: str,
api_key: str,
model: str,
percentile: float = 0.7,
truncate: str = 'NONE',
batch_size: int = 1,
text_fields: list[str] | None = None,
)#

Bases: nemo_curator.filters.doc_filter.DocumentFilter

Initialization

keep_document(scores: pandas.Series) pandas.Series#
score_document(df: pandas.DataFrame) pandas.Series#
filters.synthetic.create_client(base_url: str, api_key: str) openai.OpenAI#