nemo_automodel.components.models.nemotron_parse.model#

Module Contents#

Classes#

NemotronParseTextConfig

Configuration class for NemotronParse text decoder (mBART-based).

NemotronParseEncoderConfig

Configuration class for NemotronParse vision encoder (RADIO-based).

NemotronParseConfig

Configuration class for NemotronParse model.

NemotronParseDecoder

Transformer decoder consisting of config.decoder_layers layers.

RadioWithNeck

Vision encoder using RADIO model with custom neck.

NemotronParsePreTrainedModel

Abstract class to handle weights initialization.

NemotronParseForConditionalGeneration

NemotronParse model for conditional generation tasks.

Data#

API#

class nemo_automodel.components.models.nemotron_parse.model.NemotronParseTextConfig(
vocab_size: int = 250027,
d_model: int = 1024,
encoder_layers: int = 12,
decoder_layers: int = 12,
encoder_attention_heads: int = 16,
decoder_attention_heads: int = 16,
decoder_ffn_dim: int = 4096,
encoder_ffn_dim: int = 4096,
activation_function: str = 'gelu',
dropout: float = 0.1,
attention_dropout: float = 0.0,
activation_dropout: float = 0.0,
classifier_dropout: float = 0.0,
init_std: float = 0.02,
encoder_layerdrop: float = 0.0,
decoder_layerdrop: float = 0.0,
scale_embedding: bool = False,
use_cache: bool = True,
num_labels: int = 3,
forced_eos_token_id: int = 2,
add_cross_attention: bool = True,
is_decoder: bool = True,
max_sequence_length: int = 9000,
**kwargs,
)#

Bases: transformers.configuration_utils.PretrainedConfig

Configuration class for NemotronParse text decoder (mBART-based).

Initialization

model_type#

‘nemotron_parse_text’

class nemo_automodel.components.models.nemotron_parse.model.NemotronParseEncoderConfig(
patch_size: int = 16,
max_resolution: int = 2048,
preferred_resolution: List[int] = None,
torch_dtype: str = 'bfloat16',
**kwargs,
)#

Bases: transformers.configuration_utils.PretrainedConfig

Configuration class for NemotronParse vision encoder (RADIO-based).

Initialization

model_type#

‘nemotron_parse_encoder’

class nemo_automodel.components.models.nemotron_parse.model.NemotronParseConfig(
encoder: Optional[dict] = None,
decoder: Optional[dict] = None,
tie_word_embeddings: bool = False,
decoder_start_token_id: int = 2,
pad_token_id: int = 1,
eos_token_id: int = 2,
bos_token_id: int = 0,
image_size: List[int] = None,
is_encoder_decoder: bool = True,
max_sequence_length: int = 9000,
**kwargs,
)#

Bases: transformers.configuration_utils.PretrainedConfig

Configuration class for NemotronParse model.

Initialization

model_type#

‘nemotron_parse’

is_composition#

True

to_dict()#
class nemo_automodel.components.models.nemotron_parse.model.NemotronParseDecoder(
config: transformers.models.mbart.modeling_mbart.MBartConfig,
embed_tokens: Optional[torch.nn.Embedding] = None,
)#

Bases: transformers.models.mbart.modeling_mbart.MBartPreTrainedModel

Transformer decoder consisting of config.decoder_layers layers.

Initialization

get_input_embeddings()#
set_input_embeddings(value)#
forward(
input_ids: Optional[torch.LongTensor] = None,
attention_mask: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None,
encoder_attention_mask: Optional[torch.LongTensor] = None,
head_mask: Optional[torch.Tensor] = None,
cross_attn_head_mask: Optional[torch.Tensor] = None,
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) Union[Tuple, transformers.models.mbart.modeling_mbart.BaseModelOutputWithPastAndCrossAttentions]#
class nemo_automodel.components.models.nemotron_parse.model.RadioWithNeck(config)#

Bases: torch.nn.Module

Vision encoder using RADIO model with custom neck.

Initialization

forward(
pixel_values,
output_attentions=False,
output_hidden_states=False,
return_dict=False,
**kwargs,
)#
class nemo_automodel.components.models.nemotron_parse.model.NemotronParsePreTrainedModel#

Bases: transformers.PreTrainedModel

Abstract class to handle weights initialization.

config_class#

None

base_model_prefix#

‘vision_encoder_decoder’

main_input_name#

‘pixel_values’

supports_gradient_checkpointing#

True

_no_split_modules#

[‘RadioWithNeck’, ‘MBartDecoder’]

_skip_keys_device_placement#

‘past_key_values’

_init_weights(module)#
class nemo_automodel.components.models.nemotron_parse.model.NemotronParseForConditionalGeneration(
config: nemo_automodel.components.models.nemotron_parse.model.NemotronParseConfig,
**kwargs,
)#

Bases: nemo_automodel.components.models.nemotron_parse.model.NemotronParsePreTrainedModel, transformers.GenerationMixin

NemotronParse model for conditional generation tasks.

Initialization

get_encoder()#
get_decoder()#
get_output_embeddings()#
set_output_embeddings(new_embeddings)#
get_input_embeddings()#
forward(
pixel_values: Optional[torch.FloatTensor] = None,
decoder_input_ids: Optional[torch.LongTensor] = None,
decoder_attention_mask: Optional[torch.BoolTensor] = None,
encoder_outputs: Optional[Tuple[torch.FloatTensor]] = None,
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
decoder_inputs_embeds: Optional[torch.FloatTensor] = None,
labels: Optional[torch.LongTensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
**kwargs,
) Union[Tuple[torch.FloatTensor], transformers.modeling_outputs.Seq2SeqLMOutput]#
prepare_decoder_input_ids_from_labels(labels: torch.Tensor)#
_reorder_cache(past_key_values, beam_idx)#
nemo_automodel.components.models.nemotron_parse.model.ModelClass#

None