nv_ingest_api.internal.schemas.meta package#

Submodules#

nv_ingest_api.internal.schemas.meta.base_model_noext module#

class nv_ingest_api.internal.schemas.meta.base_model_noext.BaseModelNoExt[source]#

Bases: BaseModel

model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

nv_ingest_api.internal.schemas.meta.ingest_job_schema module#

class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestJobSchema(
*,
job_payload: JobPayloadSchema,
job_id: str | int,
tasks: List[IngestTaskSchema],
tracing_options: TracingOptionsSchema | None = None,
)[source]#

Bases: BaseModelNoExt

job_id: str | int#
job_payload: JobPayloadSchema#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

tasks: List[IngestTaskSchema]#
tracing_options: TracingOptionsSchema | None#
class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestTaskAudioExtraction(
*,
auth_token: str | None = None,
grpc_endpoint: str | None = None,
http_endpoint: str | None = None,
infer_protocol: str | None = None,
function_id: str | None = None,
use_ssl: bool | None = None,
ssl_cert: str | None = None,
segment_audio: bool | None = None,
)[source]#

Bases: BaseModelNoExt

auth_token: str | None#
function_id: str | None#
grpc_endpoint: str | None#
http_endpoint: str | None#
infer_protocol: str | None#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

segment_audio: bool | None#
ssl_cert: str | None#
use_ssl: bool | None#
class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestTaskCaptionSchema(
*,
api_key: str | None = None,
endpoint_url: str | None = None,
prompt: str | None = None,
model_name: str | None = None,
)[source]#

Bases: BaseModelNoExt

api_key: str | None#
endpoint_url: str | None#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_name: str | None#
prompt: str | None#
class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestTaskChartExtraction(*, params: dict = <factory>)[source]#

Bases: BaseModelNoExt

model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

params: dict#
class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestTaskDedupParams(*, filter: bool = False)[source]#

Bases: BaseModelNoExt

filter: bool#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestTaskDedupSchema(
*,
content_type: ContentTypeEnum = ContentTypeEnum.IMAGE,
params: IngestTaskDedupParams = IngestTaskDedupParams(filter=False),
)[source]#

Bases: BaseModelNoExt

content_type: ContentTypeEnum#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

params: IngestTaskDedupParams#
class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestTaskEmbedSchema(
*,
endpoint_url: str | None = None,
model_name: str | None = None,
api_key: str | None = None,
filter_errors: bool = False,
)[source]#

Bases: BaseModelNoExt

api_key: str | None#
endpoint_url: str | None#
filter_errors: bool#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_name: str | None#
class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestTaskExtractSchema(
*,
document_type: DocumentTypeEnum,
method: str,
params: dict,
)[source]#

Bases: BaseModelNoExt

classmethod case_insensitive_document_type(v)[source]#
document_type: DocumentTypeEnum#
method: str#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

params: dict#
class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestTaskFilterParamsSchema(
*,
min_size: int = 128,
max_aspect_ratio: float | int = 5.0,
min_aspect_ratio: float | int = 0.2,
filter: bool = False,
)[source]#

Bases: BaseModelNoExt

filter: bool#
max_aspect_ratio: float | int#
min_aspect_ratio: float | int#
min_size: int#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestTaskFilterSchema(
*,
content_type: ContentTypeEnum = ContentTypeEnum.IMAGE,
params: IngestTaskFilterParamsSchema = IngestTaskFilterParamsSchema(min_size=128, max_aspect_ratio=5.0, min_aspect_ratio=0.2, filter=False),
)[source]#

Bases: BaseModelNoExt

content_type: ContentTypeEnum#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

params: IngestTaskFilterParamsSchema#
class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestTaskInfographicExtraction(*, params: dict = <factory>)[source]#

Bases: BaseModelNoExt

model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

params: dict#
class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestTaskSchema(
*,
type: TaskTypeEnum,
task_properties: IngestTaskSplitSchema | IngestTaskExtractSchema | IngestTaskStoreEmbedSchema | IngestTaskStoreSchema | IngestTaskEmbedSchema | IngestTaskCaptionSchema | IngestTaskDedupSchema | IngestTaskFilterSchema | IngestTaskVdbUploadSchema | IngestTaskAudioExtraction | IngestTaskTableExtraction | IngestTaskChartExtraction | IngestTaskInfographicExtraction,
raise_on_failure: bool = False,
)[source]#

Bases: BaseModelNoExt

classmethod case_insensitive_task_type(v)[source]#
classmethod check_task_properties_type(values)[source]#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

raise_on_failure: bool#
task_properties: IngestTaskSplitSchema | IngestTaskExtractSchema | IngestTaskStoreEmbedSchema | IngestTaskStoreSchema | IngestTaskEmbedSchema | IngestTaskCaptionSchema | IngestTaskDedupSchema | IngestTaskFilterSchema | IngestTaskVdbUploadSchema | IngestTaskAudioExtraction | IngestTaskTableExtraction | IngestTaskChartExtraction | IngestTaskInfographicExtraction#
type: TaskTypeEnum#
class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestTaskSplitSchema(
*,
tokenizer: str | None = None,
chunk_size: Annotated[int, Gt(gt=0)] = 1024,
chunk_overlap: Annotated[int, Ge(ge=0)] = 150,
params: dict,
)[source]#

Bases: BaseModelNoExt

classmethod check_chunk_overlap(v, values, **kwargs)[source]#
chunk_overlap: Annotated[int, FieldInfo(annotation=NoneType, required=True, metadata=[Ge(ge=0)])]#
chunk_size: Annotated[int, FieldInfo(annotation=NoneType, required=True, metadata=[Gt(gt=0)])]#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

params: dict#
tokenizer: str | None#
class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestTaskStoreEmbedSchema(*, params: dict)[source]#

Bases: BaseModelNoExt

model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

params: dict#
class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestTaskStoreSchema(
*,
structured: bool = True,
images: bool = False,
method: str,
params: dict,
)[source]#

Bases: BaseModelNoExt

images: bool#
method: str#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

params: dict#
structured: bool#
class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestTaskTableExtraction(*, params: dict = <factory>)[source]#

Bases: BaseModelNoExt

model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

params: dict#
class nv_ingest_api.internal.schemas.meta.ingest_job_schema.IngestTaskVdbUploadSchema(
*,
bulk_ingest: bool = False,
bulk_ingest_path: str | None = None,
params: dict | None = None,
filter_errors: bool = True,
)[source]#

Bases: BaseModelNoExt

bulk_ingest: bool#
bulk_ingest_path: str | None#
filter_errors: bool#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

params: dict | None#
class nv_ingest_api.internal.schemas.meta.ingest_job_schema.JobPayloadSchema(
*,
content: List[str | bytes],
source_name: List[str],
source_id: List[str | int],
document_type: List[str],
)[source]#

Bases: BaseModelNoExt

content: List[str | bytes]#
document_type: List[str]#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

source_id: List[str | int]#
source_name: List[str]#
class nv_ingest_api.internal.schemas.meta.ingest_job_schema.TracingOptionsSchema(
*,
trace: bool = False,
ts_send: int,
trace_id: str | None = None,
)[source]#

Bases: BaseModelNoExt

model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

trace: bool#
trace_id: str | None#
ts_send: int#
nv_ingest_api.internal.schemas.meta.ingest_job_schema.validate_ingest_job(
job_data: Dict[str, Any],
) IngestJobSchema[source]#

Validates a dictionary representing an ingest_job using the IngestJobSchema.

Parameters: - job_data: Dictionary representing an ingest job.

Returns: - IngestJobSchema: The validated ingest job.

Raises: - ValidationError: If the input data does not conform to the IngestJobSchema.

nv_ingest_api.internal.schemas.meta.metadata_schema module#

class nv_ingest_api.internal.schemas.meta.metadata_schema.AudioMetadataSchema(
*,
audio_transcript: str = '',
audio_type: str = '',
)[source]#

Bases: BaseModelNoExt

audio_transcript: str#
audio_type: str#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

class nv_ingest_api.internal.schemas.meta.metadata_schema.ChartMetadataSchema(
*,
caption: str = '',
table_format: TableFormatEnum,
table_content: str = '',
table_content_format: TableFormatEnum | str = '',
table_location: tuple = (0, 0, 0, 0),
table_location_max_dimensions: tuple = (0, 0),
uploaded_image_uri: str = '',
)[source]#

Bases: BaseModelNoExt

caption: str#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

table_content: str#
table_content_format: TableFormatEnum | str#
table_format: TableFormatEnum#
table_location: tuple#
table_location_max_dimensions: tuple#
uploaded_image_uri: str#
class nv_ingest_api.internal.schemas.meta.metadata_schema.ContentHierarchySchema(
*,
page_count: int = -1,
page: int = -1,
block: int = -1,
line: int = -1,
span: int = -1,
nearby_objects: NearbyObjectsSchema = NearbyObjectsSchema(text=NearbyObjectsSubSchema(content=[], bbox=[], type=[]), images=NearbyObjectsSubSchema(content=[], bbox=[], type=[]), structured=NearbyObjectsSubSchema(content=[], bbox=[], type=[])),
)[source]#

Bases: BaseModelNoExt

Schema for the extracted content hierarchy.

block: int#
line: int#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

nearby_objects: NearbyObjectsSchema#
page: int#
page_count: int#
span: int#
class nv_ingest_api.internal.schemas.meta.metadata_schema.ContentMetadataSchema(
*,
type: ContentTypeEnum,
description: str = '',
page_number: int = -1,
hierarchy: ContentHierarchySchema = ContentHierarchySchema(page_count=-1, page=-1, block=-1, line=-1, span=-1, nearby_objects=NearbyObjectsSchema(text=NearbyObjectsSubSchema(content=[], bbox=[], type=[]), images=NearbyObjectsSubSchema(content=[], bbox=[], type=[]), structured=NearbyObjectsSubSchema(content=[], bbox=[], type=[]))),
subtype: ContentTypeEnum | str = '',
start_time: int = -1,
end_time: int = -1,
)[source]#

Bases: BaseModelNoExt

Data extracted from a source; generally Text or Image.

description: str#
end_time: int#
hierarchy: ContentHierarchySchema#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

page_number: int#
start_time: int#
subtype: ContentTypeEnum | str#
type: ContentTypeEnum#
class nv_ingest_api.internal.schemas.meta.metadata_schema.ErrorMetadataSchema(
*,
task: TaskTypeEnum,
status: StatusEnum,
source_id: str = '',
error_msg: str,
)[source]#

Bases: BaseModelNoExt

error_msg: str#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

source_id: str#
status: StatusEnum#
task: TaskTypeEnum#
class nv_ingest_api.internal.schemas.meta.metadata_schema.ImageMetadataSchema(
*,
image_type: DocumentTypeEnum | str,
structured_image_type: ContentTypeEnum = ContentTypeEnum.NONE,
caption: str = '',
text: str = '',
image_location: tuple = (0, 0, 0, 0),
image_location_max_dimensions: tuple = (0, 0),
uploaded_image_url: str = '',
width: int = 0,
height: int = 0,
)[source]#

Bases: BaseModelNoExt

caption: str#
classmethod clamp_non_negative(v, field)[source]#
height: int#
image_location: tuple#
image_location_max_dimensions: tuple#
image_type: DocumentTypeEnum | str#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

structured_image_type: ContentTypeEnum#
text: str#
uploaded_image_url: str#
classmethod validate_image_type(v)[source]#
width: int#
class nv_ingest_api.internal.schemas.meta.metadata_schema.InfoMessageMetadataSchema(
*,
task: TaskTypeEnum,
status: StatusEnum,
message: str,
filter: bool,
)[source]#

Bases: BaseModelNoExt

filter: bool#
message: str#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

status: StatusEnum#
task: TaskTypeEnum#
class nv_ingest_api.internal.schemas.meta.metadata_schema.MetadataSchema(
*,
content: str = '',
content_url: str = '',
embedding: List[float] | None = None,
source_metadata: SourceMetadataSchema | None = None,
content_metadata: ContentMetadataSchema | None = None,
audio_metadata: AudioMetadataSchema | None = None,
text_metadata: TextMetadataSchema | None = None,
image_metadata: ImageMetadataSchema | None = None,
table_metadata: TableMetadataSchema | None = None,
chart_metadata: ChartMetadataSchema | None = None,
error_metadata: ErrorMetadataSchema | None = None,
info_message_metadata: InfoMessageMetadataSchema | None = None,
debug_metadata: Dict[str, Any] | None = None,
raise_on_failure: bool = False,
)[source]#

Bases: BaseModelNoExt

audio_metadata: AudioMetadataSchema | None#
chart_metadata: ChartMetadataSchema | None#
classmethod check_metadata_type(values)[source]#
content: str#
content_metadata: ContentMetadataSchema | None#
content_url: str#
debug_metadata: Dict[str, Any] | None#
embedding: List[float] | None#
error_metadata: ErrorMetadataSchema | None#
image_metadata: ImageMetadataSchema | None#
info_message_metadata: InfoMessageMetadataSchema | None#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

raise_on_failure: bool#
source_metadata: SourceMetadataSchema | None#
table_metadata: TableMetadataSchema | None#
text_metadata: TextMetadataSchema | None#
class nv_ingest_api.internal.schemas.meta.metadata_schema.NearbyObjectsSchema(
*,
text: NearbyObjectsSubSchema = NearbyObjectsSubSchema(content=[], bbox=[], type=[]),
images: NearbyObjectsSubSchema = NearbyObjectsSubSchema(content=[], bbox=[], type=[]),
structured: NearbyObjectsSubSchema = NearbyObjectsSubSchema(content=[], bbox=[], type=[]),
)[source]#

Bases: BaseModelNoExt

Schema to hold types of related extracted objects.

images: NearbyObjectsSubSchema#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

structured: NearbyObjectsSubSchema#
text: NearbyObjectsSubSchema#
class nv_ingest_api.internal.schemas.meta.metadata_schema.NearbyObjectsSubSchema(
*,
content: ~typing.List[str] = <factory>,
bbox: ~typing.List[tuple] = <factory>,
type: ~typing.List[str] = <factory>,
)[source]#

Bases: BaseModelNoExt

Schema to hold related extracted object.

bbox: List[tuple]#
content: List[str]#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

type: List[str]#
class nv_ingest_api.internal.schemas.meta.metadata_schema.SourceMetadataSchema(
*,
source_name: str,
source_id: str,
source_location: str = '',
source_type: DocumentTypeEnum | str,
collection_id: str = '',
date_created: str = '2025-06-12T22:40:16.702243',
last_modified: str = '2025-06-12T22:40:16.702252',
summary: str = '',
partition_id: int = -1,
access_level: AccessLevelEnum | int = AccessLevelEnum.UNKNOWN,
)[source]#

Bases: BaseModelNoExt

Schema for the knowledge base file from which content and metadata is extracted.

access_level: AccessLevelEnum | int#
collection_id: str#
date_created: str#
last_modified: str#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

partition_id: int#
source_id: str#
source_location: str#
source_name: str#
source_type: DocumentTypeEnum | str#
summary: str#
classmethod validate_fields(field_value)[source]#
class nv_ingest_api.internal.schemas.meta.metadata_schema.TableMetadataSchema(
*,
caption: str = '',
table_format: TableFormatEnum,
table_content: str = '',
table_content_format: TableFormatEnum | str = '',
table_location: tuple = (0, 0, 0, 0),
table_location_max_dimensions: tuple = (0, 0),
uploaded_image_uri: str = '',
)[source]#

Bases: BaseModelNoExt

caption: str#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

table_content: str#
table_content_format: TableFormatEnum | str#
table_format: TableFormatEnum#
table_location: tuple#
table_location_max_dimensions: tuple#
uploaded_image_uri: str#
class nv_ingest_api.internal.schemas.meta.metadata_schema.TextMetadataSchema(
*,
text_type: TextTypeEnum,
summary: str = '',
keywords: str | List[str] | Dict = '',
language: LanguageEnum = 'en',
text_location: tuple = (0, 0, 0, 0),
text_location_max_dimensions: tuple = (0, 0, 0, 0),
)[source]#

Bases: BaseModelNoExt

keywords: str | List[str] | Dict#
language: LanguageEnum#
model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#

Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

summary: str#
text_location: tuple#
text_location_max_dimensions: tuple#
text_type: TextTypeEnum#
nv_ingest_api.internal.schemas.meta.metadata_schema.validate_metadata(
metadata: Dict[str, Any],
) MetadataSchema[source]#

Validates the given metadata dictionary against the MetadataSchema.

Parameters: - metadata: A dictionary representing metadata to be validated.

Returns: - An instance of MetadataSchema if validation is successful.

Raises: - ValidationError: If the metadata does not conform to the schema.

Module contents#