nv_ingest_api.internal.schemas.transform package#

Submodules#

nv_ingest_api.internal.schemas.transform.transform_image_caption_schema module#

class nv_ingest_api.internal.schemas.transform.transform_image_caption_schema.ImageCaptionExtractionSchema(
*,
api_key: str = 'api_key',
endpoint_url: str = 'https://integrate.api.nvidia.com/v1/chat/completions',
prompt: str = 'Caption the content of this image:',
model_name: str = 'nvidia/llama-3.1-nemotron-nano-vl-8b-v1',
raise_on_failure: bool = False,
)[source]#

Bases: BaseModel

api_key: str#
endpoint_url: str#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_name: str#
prompt: str#
raise_on_failure: bool#

nv_ingest_api.internal.schemas.transform.transform_image_filter_schema module#

class nv_ingest_api.internal.schemas.transform.transform_image_filter_schema.ImageFilterSchema(
*,
raise_on_failure: Annotated[bool, Strict(strict=True)] = False,
cpu_only: Annotated[bool, Strict(strict=True)] = False,
)[source]#

Bases: BaseModel

cpu_only: Annotated[bool, Strict(strict=True)]#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

raise_on_failure: Annotated[bool, Strict(strict=True)]#

nv_ingest_api.internal.schemas.transform.transform_text_embedding_schema module#

class nv_ingest_api.internal.schemas.transform.transform_text_embedding_schema.TextEmbeddingSchema(
*,
api_key: str = 'api_key',
batch_size: int = 4,
embedding_model: str = 'nvidia/llama-3.2-nv-embedqa-1b-v2',
embedding_nim_endpoint: str = 'http://embedding:8000/v1',
encoding_format: str = 'float',
httpx_log_level: LogLevel = LogLevel.WARNING,
input_type: str = 'passage',
raise_on_failure: bool = False,
truncate: str = 'END',
text_elements_modality: str = 'text',
image_elements_modality: str = 'text',
structured_elements_modality: str = 'text',
audio_elements_modality: str = 'text',
)[source]#

Bases: BaseModel

api_key: str#
audio_elements_modality: str#
batch_size: int#
embedding_model: str#
embedding_nim_endpoint: str#
encoding_format: str#
httpx_log_level: LogLevel#
image_elements_modality: str#
input_type: str#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

raise_on_failure: bool#
structured_elements_modality: str#
text_elements_modality: str#
truncate: str#

nv_ingest_api.internal.schemas.transform.transform_text_splitter_schema module#

class nv_ingest_api.internal.schemas.transform.transform_text_splitter_schema.TextSplitterSchema(
*,
tokenizer: str | None = None,
chunk_size: Annotated[int, Gt(gt=0)] = 1024,
chunk_overlap: Annotated[int, Ge(ge=0)] = 150,
raise_on_failure: bool = False,
)[source]#

Bases: BaseModel

classmethod check_chunk_overlap(v, values)[source]#
chunk_overlap: int#
chunk_size: int#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

raise_on_failure: bool#
tokenizer: str | None#

Module contents#