Source code for nv_ingest.schemas.ingest_pipeline_config_schema

# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import logging

from pydantic import ConfigDict, BaseModel

from nv_ingest.schemas.audio_extractor_schema import AudioExtractorSchema
from nv_ingest.schemas.chart_extractor_schema import ChartExtractorSchema
from nv_ingest.schemas.embedding_storage_schema import EmbeddingStorageModuleSchema
from nv_ingest.schemas.embed_extractions_schema import EmbedExtractionsSchema
from nv_ingest.schemas.image_caption_extraction_schema import ImageCaptionExtractionSchema
from nv_ingest.schemas.image_dedup_schema import ImageDedupSchema
from nv_ingest.schemas.image_filter_schema import ImageFilterSchema
from nv_ingest.schemas.image_storage_schema import ImageStorageModuleSchema
from nv_ingest.schemas.infographic_extractor_schema import InfographicExtractorSchema
from nv_ingest.schemas.vdb_task_sink_schema import VdbTaskSinkSchema
from nv_ingest.schemas.job_counter_schema import JobCounterSchema
from nv_ingest.schemas.message_broker_sink_schema import MessageBrokerTaskSinkSchema
from nv_ingest.schemas.message_broker_source_schema import MessageBrokerTaskSourceSchema
from nv_ingest.schemas.metadata_injector_schema import MetadataInjectorSchema
from nv_ingest.schemas.text_splitter_schema import TextSplitterSchema
from nv_ingest.schemas.otel_meter_schema import OpenTelemetryMeterSchema
from nv_ingest.schemas.otel_tracer_schema import OpenTelemetryTracerSchema
from nv_ingest.schemas.pdf_extractor_schema import PDFExtractorSchema
from nv_ingest.schemas.pptx_extractor_schema import PPTXExtractorSchema
from nv_ingest.schemas.table_extractor_schema import TableExtractorSchema

logger = logging.getLogger(__name__)


[docs] class PipelineConfigSchema(BaseModel): audio_extractor_schema: AudioExtractorSchema = AudioExtractorSchema() chart_extractor_module: ChartExtractorSchema = ChartExtractorSchema() text_splitter_module: TextSplitterSchema = TextSplitterSchema() embedding_storage_module: EmbeddingStorageModuleSchema = EmbeddingStorageModuleSchema() embed_extractions_module: EmbedExtractionsSchema = EmbedExtractionsSchema() image_caption_extraction_module: ImageCaptionExtractionSchema = ImageCaptionExtractionSchema() image_dedup_module: ImageDedupSchema = ImageDedupSchema() image_filter_module: ImageFilterSchema = ImageFilterSchema() image_storage_module: ImageStorageModuleSchema = ImageStorageModuleSchema() infographic_extractor_module: InfographicExtractorSchema = InfographicExtractorSchema() job_counter_module: JobCounterSchema = JobCounterSchema() metadata_injection_module: MetadataInjectorSchema = MetadataInjectorSchema() otel_meter_module: OpenTelemetryMeterSchema = OpenTelemetryMeterSchema() otel_tracer_module: OpenTelemetryTracerSchema = OpenTelemetryTracerSchema() pdf_extractor_module: PDFExtractorSchema = PDFExtractorSchema() pptx_extractor_module: PPTXExtractorSchema = PPTXExtractorSchema() redis_task_sink: MessageBrokerTaskSinkSchema = MessageBrokerTaskSinkSchema() redis_task_source: MessageBrokerTaskSourceSchema = MessageBrokerTaskSourceSchema() table_extractor_module: TableExtractorSchema = TableExtractorSchema() vdb_task_sink: VdbTaskSinkSchema = VdbTaskSinkSchema() model_config = ConfigDict(extra="forbid")