classifiers.aegis#

Module Contents#

Classes#

Data#

API#

classifiers.aegis.ACCESS_ERROR_MESSAGE = <Multiline-String>#
classifiers.aegis.AEGIS_LABELS#

[‘unknown’, ‘safe’, ‘O1’, ‘O2’, ‘O3’, ‘O4’, ‘O5’, ‘O6’, ‘O7’, ‘O8’, ‘O9’, ‘O10’, ‘O11’, ‘O12’, ‘O13’…

class classifiers.aegis.AegisClassifier(
aegis_variant: str = 'nvidia/Aegis-AI-Content-Safety-LlamaGuard-Defensive-1.0',
token: str | bool | None = None,
filter_by: list[str] | None = None,
batch_size: int = 64,
text_field: str = 'text',
pred_column: str = 'aegis_pred',
raw_pred_column: str = '_aegis_raw_pred',
keep_raw_pred: bool = False,
max_chars: int = 6000,
device_type: str = 'cuda',
autocast: bool = True,
max_mem_gb: int | None = None,
)#

Bases: nemo_curator.classifiers.base.DistributedDataClassifier

Initialization

class classifiers.aegis.AegisConfig#
add_instruction_data_guard: bool#

False

dtype: torch.dtype#

None

instruction_data_guard_path: str#

‘nvidia/instruction-data-guard’

max_length: int#

4096

peft_model_name_or_path: str#

None

pretrained_model_name_or_path: str#

‘meta-llama/LlamaGuard-7b’

token: str | bool | None#

None

class classifiers.aegis.AegisHFModel(
config: classifiers.aegis.AegisConfig,
max_mem_gb: int | None = None,
)#

Bases: crossfit.backend.torch.hf.model.HFModel

Initialization

load_cfg() transformers.AutoConfig#
load_config() transformers.AutoConfig#
load_model(device: str = 'cuda') classifiers.aegis.AegisModel#
load_tokenizer() transformers.AutoTokenizer#
max_seq_length() int#
class classifiers.aegis.AegisModel(
pretrained_model_name_or_path: str,
peft_model_name_or_path: str,
dtype: torch.dtype,
token: str | bool | None,
add_instruction_data_guard: bool = False,
autocast: bool = False,
)#

Bases: torch.nn.Module

Initialization

forward(batch: dict[str, torch.Tensor]) torch.Tensor#
class classifiers.aegis.InstructionDataGuardClassifier(
token: str | bool | None = None,
batch_size: int = 64,
text_field: str = 'text',
pred_column: str = 'is_poisoned',
prob_column: str = 'instruction_data_guard_poisoning_score',
max_chars: int = 6000,
autocast: bool = True,
device_type: str = 'cuda',
max_mem_gb: int | None = None,
)#

Bases: nemo_curator.classifiers.base.DistributedDataClassifier

Initialization

class classifiers.aegis.InstructionDataGuardNet(input_dim: int, dropout: float = 0.7)#

Bases: torch.nn.Module, huggingface_hub.PyTorchModelHubMixin

Initialization

forward(x: torch.Tensor) torch.Tensor#