nemo_automodel._transformers.auto_tokenizer#

Module Contents#

Classes#

Functions#

API#

class nemo_automodel._transformers.auto_tokenizer.NeMoAutoTokenizer(
base_tokenizer,
*,
add_bos_token: bool,
add_eos_token: bool,
)#

Initialization

classmethod from_pretrained(
pretrained_model_name_or_path,
*args,
force_hf=False,
add_bos_token=True,
add_eos_token=True,
**kwargs,
)#

Load the HF tokenizer class via AutoTokenizer and (optionally) wrap it to add BOS/EOS.

There are pre-existing issues with some tokenizers (e.g. GPT2Tokenizer) where the BOS/EOS tokens are not added

property add_bos_token#
property add_eos_token#
__getattr__(name)#
__setattr__(name, value)#
__call__(*args, **kwargs)#
encode(*args, **kwargs)#
nemo_automodel._transformers.auto_tokenizer._add_token(tokenized, value, position, key)#