nv_ingest_api.internal.enums package#

Submodules#

nv_ingest_api.internal.enums.common module#

class nv_ingest_api.internal.enums.common.AccessLevelEnum(value)[source]#

Bases: int, Enum

Note

This is for future use, and currently has no functional use case.

Enum for representing different access levels.

LEVEL_1#

Represents access level 1.

Type:

int

LEVEL_2#

Represents access level 2.

Type:

int

LEVEL_3#

Represents access level 3.

Type:

int

LEVEL_1: int = 1#
LEVEL_2: int = 2#
LEVEL_3: int = 3#
UNKNOWN: int = -1#
class nv_ingest_api.internal.enums.common.ContentDescriptionEnum(value)[source]#

Bases: str, Enum

Enum for standard content descriptions extracted from different source types.

DOCX_IMAGE#

Description for image extracted from DOCX document.

Type:

str

DOCX_TABLE#

Description for structured table extracted from DOCX document.

Type:

str

DOCX_TEXT#

Description for unstructured text from DOCX document.

Type:

str

PDF_CHART#

Description for structured chart extracted from PDF document.

Type:

str

PDF_IMAGE#

Description for image extracted from PDF document.

Type:

str

PDF_INFOGRAPHIC#

Description for structured infographic extracted from PDF document.

Type:

str

PDF_TABLE#

Description for structured table extracted from PDF document.

Type:

str

PDF_TEXT#

Description for unstructured text from PDF document.

Type:

str

PPTX_IMAGE#

Description for image extracted from PPTX presentation.

Type:

str

PPTX_TABLE#

Description for structured table extracted from PPTX presentation.

Type:

str

PPTX_TEXT#

Description for unstructured text from PPTX presentation.

Type:

str

DOCX_IMAGE: str = 'Image extracted from DOCX document.'#
DOCX_TABLE: str = 'Structured table extracted from DOCX document.'#
DOCX_TEXT: str = 'Unstructured text from DOCX document.'#
PDF_CHART: str = 'Structured chart extracted from PDF document.'#
PDF_IMAGE: str = 'Image extracted from PDF document.'#
PDF_INFOGRAPHIC: str = 'Structured infographic extracted from PDF document.'#
PDF_TABLE: str = 'Structured table extracted from PDF document.'#
PDF_TEXT: str = 'Unstructured text from PDF document.'#
PPTX_IMAGE: str = 'Image extracted from PPTX presentation.'#
PPTX_TABLE: str = 'Structured table extracted from PPTX presentation.'#
PPTX_TEXT: str = 'Unstructured text from PPTX presentation.'#
class nv_ingest_api.internal.enums.common.ContentTypeEnum(value)[source]#

Bases: str, Enum

Enum for representing various content types.

Note: Content type declares the broad category of the content, such as text, image, audio, etc. This is not equivalent to the Document type, which is a specific file format.

AUDIO#

Represents audio content.

Type:

str

EMBEDDING#

Represents embedding content.

Type:

str

IMAGE#

Represents image content.

Type:

str

INFO_MSG#

Represents an informational message.

Type:

str

STRUCTURED#

Represents structured content.

Type:

str

TEXT#

Represents text content.

Type:

str

UNSTRUCTURED#

Represents unstructured content.

Type:

str

VIDEO#

Represents video content.

Type:

str

AUDIO: str = 'audio'#
CHART: str = 'chart'#
EMBEDDING: str = 'embedding'#
IMAGE: str = 'image'#
INFOGRAPHIC: str = 'infographic'#
INFO_MSG: str = 'info_message'#
NONE: str = 'none'#
STRUCTURED: str = 'structured'#
TABLE: str = 'table'#
TEXT: str = 'text'#
UNKNOWN: str = 'unknown'#
VIDEO: str = 'video'#
class nv_ingest_api.internal.enums.common.DocumentTypeEnum(value)[source]#

Bases: str, Enum

Enum for representing various document file types.

Note: Document type refers to the specific file format of the content, such as PDF, DOCX, etc. This is not equivalent to the Content type, which is a broad category of the content.

BMP#

BMP image format.

Type:

str

DOCX#

Microsoft Word document format.

Type:

str

HTML#

HTML document.

Type:

str

JPEG#

JPEG image format.

Type:

str

PDF#

PDF document format.

Type:

str

PNG#

PNG image format.

Type:

str

PPTX#

PowerPoint presentation format.

Type:

str

SVG#

SVG image format.

Type:

str

TIFF#

TIFF image format.

Type:

str

TXT#

Plain text file.

Type:

str

MP3#

MP3 audio format.

Type:

str

WAV#

WAV audio format.

Type:

str

BMP: str = 'bmp'#
DOCX: str = 'docx'#
HTML: str = 'html'#
JPEG: str = 'jpeg'#
MD: str = 'text'#
MP3: str = 'mp3'#
PDF: str = 'pdf'#
PNG: str = 'png'#
PPTX: str = 'pptx'#
SVG: str = 'svg'#
TIFF: str = 'tiff'#
TXT: str = 'text'#
UNKNOWN: str = 'unknown'#
WAV: str = 'wav'#
class nv_ingest_api.internal.enums.common.LanguageEnum(value)[source]#

Bases: str, Enum

Enum for representing various language codes.

AF#

Afrikaans language code.

Type:

str

AR#

Arabic language code.

Type:

str

BG#

Bulgarian language code.

Type:

str

BN#

Bengali language code.

Type:

str

CA#

Catalan language code.

Type:

str

CS#

Czech language code.

Type:

str

CY#

Welsh language code.

Type:

str

DA#

Danish language code.

Type:

str

DE#

German language code.

Type:

str

EL#

Greek language code.

Type:

str

EN#

English language code.

Type:

str

ES#

Spanish language code.

Type:

str

ET#

Estonian language code.

Type:

str

FA#

Persian language code.

Type:

str

FI#

Finnish language code.

Type:

str

FR#

French language code.

Type:

str

GU#

Gujarati language code.

Type:

str

HE#

Hebrew language code.

Type:

str

HI#

Hindi language code.

Type:

str

HR#

Croatian language code.

Type:

str

HU#

Hungarian language code.

Type:

str

ID#

Indonesian language code.

Type:

str

IT#

Italian language code.

Type:

str

JA#

Japanese language code.

Type:

str

KN#

Kannada language code.

Type:

str

KO#

Korean language code.

Type:

str

LT#

Lithuanian language code.

Type:

str

LV#

Latvian language code.

Type:

str

MK#

Macedonian language code.

Type:

str

ML#

Malayalam language code.

Type:

str

MR#

Marathi language code.

Type:

str

NE#

Nepali language code.

Type:

str

NL#

Dutch language code.

Type:

str

NO#

Norwegian language code.

Type:

str

PA#

Punjabi language code.

Type:

str

PL#

Polish language code.

Type:

str

PT#

Portuguese language code.

Type:

str

RO#

Romanian language code.

Type:

str

RU#

Russian language code.

Type:

str

SK#

Slovak language code.

Type:

str

SL#

Slovenian language code.

Type:

str

SO#

Somali language code.

Type:

str

SQ#

Albanian language code.

Type:

str

SV#

Swedish language code.

Type:

str

SW#

Swahili language code.

Type:

str

TA#

Tamil language code.

Type:

str

TE#

Telugu language code.

Type:

str

TH#

Thai language code.

Type:

str

TL#

Tagalog language code.

Type:

str

TR#

Turkish language code.

Type:

str

UK#

Ukrainian language code.

Type:

str

UR#

Urdu language code.

Type:

str

VI#

Vietnamese language code.

Type:

str

ZH_CN#

Chinese (Simplified) language code.

Type:

str

ZH_TW#

Chinese (Traditional) language code.

Type:

str

UNKNOWN#

Represents an unknown language.

Type:

str

AF: str = 'af'#
AR: str = 'ar'#
BG: str = 'bg'#
BN: str = 'bn'#
CA: str = 'ca'#
CS: str = 'cs'#
CY: str = 'cy'#
DA: str = 'da'#
DE: str = 'de'#
EL: str = 'el'#
EN: str = 'en'#
ES: str = 'es'#
ET: str = 'et'#
FA: str = 'fa'#
FI: str = 'fi'#
FR: str = 'fr'#
GU: str = 'gu'#
HE: str = 'he'#
HI: str = 'hi'#
HR: str = 'hr'#
HU: str = 'hu'#
ID: str = 'id'#
IT: str = 'it'#
JA: str = 'ja'#
KN: str = 'kn'#
KO: str = 'ko'#
LT: str = 'lt'#
LV: str = 'lv'#
MK: str = 'mk'#
ML: str = 'ml'#
MR: str = 'mr'#
NE: str = 'ne'#
NL: str = 'nl'#
NO: str = 'no'#
PA: str = 'pa'#
PL: str = 'pl'#
PT: str = 'pt'#
RO: str = 'ro'#
RU: str = 'ru'#
SK: str = 'sk'#
SL: str = 'sl'#
SO: str = 'so'#
SQ: str = 'sq'#
SV: str = 'sv'#
SW: str = 'sw'#
TA: str = 'ta'#
TE: str = 'te'#
TH: str = 'th'#
TL: str = 'tl'#
TR: str = 'tr'#
UK: str = 'uk'#
UNKNOWN: str = 'unknown'#
UR: str = 'ur'#
VI: str = 'vi'#
ZH_CN: str = 'zh-cn'#
ZH_TW: str = 'zh-tw'#
classmethod has_value(value: Any) bool[source]#

Check if the enum contains the given value.

Parameters:

value (Any) – The value to check against the enum members.

Returns:

True if the value exists in the enum, False otherwise.

Return type:

bool

class nv_ingest_api.internal.enums.common.StatusEnum(value)[source]#

Bases: str, Enum

Enum for representing status messages.

ERROR#

Represents an error status.

Type:

str

SUCCESS#

Represents a success status.

Type:

str

ERROR: str = 'error'#
SUCCESS: str = 'success'#
class nv_ingest_api.internal.enums.common.TableFormatEnum(value)[source]#

Bases: str, Enum

Enum for representing table formats.

HTML#

Represents HTML table format.

Type:

str

IMAGE#

Represents image table format.

Type:

str

LATEX#

Represents LaTeX table format.

Type:

str

MARKDOWN#

Represents Markdown table format.

Type:

str

PSEUDO_MARKDOWN#

Represents pseudo Markdown table format.

Type:

str

SIMPLE#

Represents simple table format.

Type:

str

HTML: str = 'html'#
IMAGE: str = 'image'#
LATEX: str = 'latex'#
MARKDOWN: str = 'markdown'#
PSEUDO_MARKDOWN: str = 'pseudo_markdown'#
SIMPLE: str = 'simple'#
class nv_ingest_api.internal.enums.common.TaskTypeEnum(value)[source]#

Bases: str, Enum

Enum for representing various task types.

CAPTION#

Represents a caption task.

Type:

str

DEDUP#

Represents a deduplication task.

Type:

str

EMBED#

Represents an embedding task.

Type:

str

EXTRACT#

Represents an extraction task.

Type:

str

FILTER#

Represents a filtering task.

Type:

str

SPLIT#

Represents a splitting task.

Type:

str

STORE#

Represents a storing task.

Type:

str

STORE_EMBEDDING#

Represents a task for storing embeddings.

Type:

str

VDB_UPLOAD#

Represents a task for uploading to a vector database.

Type:

str

AUDIO_DATA_EXTRACT#

Represents a task for extracting audio data.

Type:

str

TABLE_DATA_EXTRACT#

Represents a task for extracting table data.

Type:

str

CHART_DATA_EXTRACT#

Represents a task for extracting chart data.

Type:

str

INFOGRAPHIC_DATA_EXTRACT#

Represents a task for extracting infographic data.

Type:

str

AUDIO_DATA_EXTRACT: str = 'audio_data_extract'#
CAPTION: str = 'caption'#
CHART_DATA_EXTRACT: str = 'chart_data_extract'#
DEDUP: str = 'dedup'#
EMBED: str = 'embed'#
EXTRACT: str = 'extract'#
FILTER: str = 'filter'#
INFOGRAPHIC_DATA_EXTRACT: str = 'infographic_data_extract'#
SPLIT: str = 'split'#
STORE: str = 'store'#
STORE_EMBEDDING: str = 'store_embedding'#
TABLE_DATA_EXTRACT: str = 'table_data_extract'#
VDB_UPLOAD: str = 'vdb_upload'#
class nv_ingest_api.internal.enums.common.TextTypeEnum(value)[source]#

Bases: str, Enum

Enum for representing different types of text segments.

BLOCK#

Represents a text block.

Type:

str

BODY#

Represents body text.

Type:

str

DOCUMENT#

Represents an entire document.

Type:

str

HEADER#

Represents a header text.

Type:

str

LINE#

Represents a single line of text.

Type:

str

NEARBY_BLOCK#

Represents a block of text in close proximity to another.

Type:

str

OTHER#

Represents other unspecified text type.

Type:

str

PAGE#

Represents a page of text.

Type:

str

SPAN#

Represents an inline text span.

Type:

str

BLOCK: str = 'block'#
BODY: str = 'body'#
DOCUMENT: str = 'document'#
HEADER: str = 'header'#
LINE: str = 'line'#
NEARBY_BLOCK: str = 'nearby_block'#
OTHER: str = 'other'#
PAGE: str = 'page'#
SPAN: str = 'span'#

Module contents#