classifiers.domain#

Module Contents#

Classes#

Data#

API#

classifiers.domain.DOMAIN_BASE_MODEL#

‘microsoft/deberta-v3-base’

classifiers.domain.DOMAIN_IDENTIFIER#

‘nvidia/domain-classifier’

class classifiers.domain.DomainClassifier(
filter_by: list[str] | None = None,
batch_size: int = 256,
text_field: str = 'text',
pred_column: str = 'domain_pred',
prob_column: str | None = None,
max_chars: int = 2000,
device_type: str = 'cuda',
autocast: bool = True,
max_mem_gb: int | None = None,
)#

Bases: classifiers.domain._DomainClassifier

Initialization

class classifiers.domain.DomainModel(
config: classifiers.domain.DomainModelConfig,
autocast: bool = False,
max_mem_gb: int | None = None,
)#

Bases: crossfit.backend.torch.hf.model.HFModel

Initialization

load_config() transformers.AutoConfig#
load_model(
device: str = 'cuda',
) nemo_curator.classifiers.base.HFDeberta#
load_tokenizer() transformers.AutoTokenizer#
class classifiers.domain.DomainModelConfig#
base_model: str#

None

fc_dropout: float#

0.2

identifier: str#

None

max_len: int#

512

classifiers.domain.MULTILINGUAL_DOMAIN_BASE_MODEL#

‘microsoft/mdeberta-v3-base’

classifiers.domain.MULTILINGUAL_DOMAIN_IDENTIFIER#

‘nvidia/multilingual-domain-classifier’

class classifiers.domain.MultilingualDomainClassifier(
filter_by: list[str] | None = None,
batch_size: int = 256,
text_field: str = 'text',
pred_column: str = 'domain_pred',
prob_column: str | None = None,
max_chars: int = 2000,
device_type: str = 'cuda',
autocast: bool = True,
max_mem_gb: int | None = None,
)#

Bases: classifiers.domain._DomainClassifier

Initialization