classifiers.content_type#

Module Contents#

Classes#

Data#

API#

classifiers.content_type.CONTENT_TYPE_IDENTIFIER#

‘nvidia/content-type-classifier-deberta’

class classifiers.content_type.ContentTypeClassifier(
filter_by: list[str] | None = None,
batch_size: int = 256,
text_field: str = 'text',
pred_column: str = 'content_pred',
prob_column: str | None = None,
max_chars: int = 5000,
device_type: str = 'cuda',
autocast: bool = True,
max_mem_gb: int | None = None,
)#

Bases: nemo_curator.classifiers.base.DistributedDataClassifier

Initialization

class classifiers.content_type.ContentTypeModel(
config: classifiers.content_type.ContentTypeModelConfig,
autocast: bool = False,
max_mem_gb: int | None = None,
)#

Bases: crossfit.backend.torch.hf.model.HFModel

Initialization

load_config() transformers.AutoConfig#
load_model(
device: str = 'cuda',
) nemo_curator.classifiers.base.HFDeberta#
load_tokenizer() transformers.AutoTokenizer#
class classifiers.content_type.ContentTypeModelConfig#
fc_dropout: float#

0.2

max_len: int#

1024

model: str#

‘microsoft/deberta-v3-base’