core.tokenizers.vision.libraries.null_multimodal_tokenizer#

Module Contents#

Classes#

MegatronNullMultimodalTokenizer

Megatron Null Multimodal Tokenizer

API#

class core.tokenizers.vision.libraries.null_multimodal_tokenizer.MegatronNullMultimodalTokenizer(
vocab_size,
image_token=None,
image_token_id=None,
)#

Megatron Null Multimodal Tokenizer

Initialization

tokenize(text)#

Text tokenization.

Parameters:

text (str | list) – text to be tokenized.

Returns:

list of ids.

Return type:

list

detokenize(ids)#

Text detokenization.

Parameters:

ids (list) – text to be tokenized.

Returns:

detokenized text.

Return type:

text

offsets(ids: list[int], text: str) list[int]#

Offsets calculation.

convert_tokens_to_ids(tokens)#

Convert tokens to IDs.

property vocab_size#

Vocab size.

property cls#

CLS token id.

property sep#

SEP token id.

property mask#

MASK token id.

property eod#

EOD token id.

property additional_special_tokens_ids#

Returns IDs of additional special tokens.