Source code for nv_ingest.util.converters.type_mappings

# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0


from nv_ingest.schemas.ingest_job_schema import DocumentTypeEnum
from nv_ingest.schemas.metadata_schema import ContentTypeEnum

DOC_TO_CONTENT_MAP = {
    DocumentTypeEnum.bmp: ContentTypeEnum.IMAGE,
    DocumentTypeEnum.docx: ContentTypeEnum.STRUCTURED,
    DocumentTypeEnum.html: ContentTypeEnum.STRUCTURED,
    DocumentTypeEnum.jpeg: ContentTypeEnum.IMAGE,
    DocumentTypeEnum.mp3: ContentTypeEnum.AUDIO,
    DocumentTypeEnum.pdf: ContentTypeEnum.STRUCTURED,
    DocumentTypeEnum.png: ContentTypeEnum.IMAGE,
    DocumentTypeEnum.pptx: ContentTypeEnum.STRUCTURED,
    DocumentTypeEnum.svg: ContentTypeEnum.IMAGE,
    DocumentTypeEnum.tiff: ContentTypeEnum.IMAGE,
    DocumentTypeEnum.txt: ContentTypeEnum.TEXT,
    DocumentTypeEnum.wav: ContentTypeEnum.AUDIO,
}


[docs] def doc_type_to_content_type(doc_type: DocumentTypeEnum) -> ContentTypeEnum: """ Convert DocumentTypeEnum to ContentTypeEnum """ return DOC_TO_CONTENT_MAP[doc_type]