Source code for nv_ingest_api.util.converters.type_mappings

# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from nv_ingest_api.internal.schemas.meta.ingest_job_schema import DocumentTypeEnum
from nv_ingest_api.internal.enums.common import ContentTypeEnum

DOC_TO_CONTENT_MAP = {
    DocumentTypeEnum.BMP: ContentTypeEnum.IMAGE,
    DocumentTypeEnum.DOCX: ContentTypeEnum.STRUCTURED,
    DocumentTypeEnum.HTML: ContentTypeEnum.TEXT,
    DocumentTypeEnum.JPEG: ContentTypeEnum.IMAGE,
    DocumentTypeEnum.MP3: ContentTypeEnum.AUDIO,
    DocumentTypeEnum.PDF: ContentTypeEnum.STRUCTURED,
    DocumentTypeEnum.PNG: ContentTypeEnum.IMAGE,
    DocumentTypeEnum.PPTX: ContentTypeEnum.STRUCTURED,
    DocumentTypeEnum.SVG: ContentTypeEnum.IMAGE,
    DocumentTypeEnum.TIFF: ContentTypeEnum.IMAGE,
    DocumentTypeEnum.TXT: ContentTypeEnum.TEXT,
    DocumentTypeEnum.WAV: ContentTypeEnum.AUDIO,
}


[docs] def doc_type_to_content_type(doc_type: DocumentTypeEnum) -> ContentTypeEnum: """ Convert DocumentTypeEnum to ContentTypeEnum """ return DOC_TO_CONTENT_MAP[doc_type]