classifiers.aegis#
Module Contents#
Classes#
Data#
API#
- classifiers.aegis.ACCESS_ERROR_MESSAGE = <Multiline-String>#
- classifiers.aegis.AEGIS_LABELS#
[‘unknown’, ‘safe’, ‘O1’, ‘O2’, ‘O3’, ‘O4’, ‘O5’, ‘O6’, ‘O7’, ‘O8’, ‘O9’, ‘O10’, ‘O11’, ‘O12’, ‘O13’…
- class classifiers.aegis.AegisClassifier(
- aegis_variant: str = 'nvidia/Aegis-AI-Content-Safety-LlamaGuard-Defensive-1.0',
- token: str | bool | None = None,
- filter_by: list[str] | None = None,
- batch_size: int = 64,
- text_field: str = 'text',
- pred_column: str = 'aegis_pred',
- raw_pred_column: str = '_aegis_raw_pred',
- keep_raw_pred: bool = False,
- max_chars: int = 6000,
- device_type: str = 'cuda',
- autocast: bool = True,
- max_mem_gb: int | None = None,
Bases:
nemo_curator.classifiers.base.DistributedDataClassifierInitialization
- class classifiers.aegis.AegisConfig#
- add_instruction_data_guard: bool#
False
- dtype: torch.dtype#
None
- instruction_data_guard_path: str#
‘nvidia/instruction-data-guard’
- max_length: int#
4096
- peft_model_name_or_path: str#
None
- pretrained_model_name_or_path: str#
‘meta-llama/LlamaGuard-7b’
- token: str | bool | None#
None
- class classifiers.aegis.AegisHFModel(
- config: classifiers.aegis.AegisConfig,
- max_mem_gb: int | None = None,
Bases:
crossfit.backend.torch.hf.model.HFModelInitialization
- load_cfg() transformers.AutoConfig#
- load_config() transformers.AutoConfig#
- load_model(device: str = 'cuda') classifiers.aegis.AegisModel#
- load_tokenizer() transformers.AutoTokenizer#
- max_seq_length() int#
- class classifiers.aegis.AegisModel(
- pretrained_model_name_or_path: str,
- peft_model_name_or_path: str,
- dtype: torch.dtype,
- token: str | bool | None,
- add_instruction_data_guard: bool = False,
- autocast: bool = False,
Bases:
torch.nn.ModuleInitialization
- forward(batch: dict[str, torch.Tensor]) torch.Tensor#
- class classifiers.aegis.InstructionDataGuardClassifier(
- token: str | bool | None = None,
- batch_size: int = 64,
- text_field: str = 'text',
- pred_column: str = 'is_poisoned',
- prob_column: str = 'instruction_data_guard_poisoning_score',
- max_chars: int = 6000,
- autocast: bool = True,
- device_type: str = 'cuda',
- max_mem_gb: int | None = None,
Bases:
nemo_curator.classifiers.base.DistributedDataClassifierInitialization