Source code for nv_ingest.framework.schemas.framework_ingest_config_schema
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import logging
from pydantic import ConfigDict, BaseModel
from nv_ingest.framework.schemas.framework_job_counter_schema import JobCounterSchema
from nv_ingest.framework.schemas.framework_message_broker_sink_schema import MessageBrokerTaskSinkSchema
from nv_ingest.framework.schemas.framework_message_broker_source_schema import MessageBrokerTaskSourceSchema
from nv_ingest.framework.schemas.framework_metadata_injector_schema import MetadataInjectorSchema
from nv_ingest.framework.schemas.framework_otel_meter_schema import OpenTelemetryMeterSchema
from nv_ingest.framework.schemas.framework_otel_tracer_schema import OpenTelemetryTracerSchema
from nv_ingest.framework.schemas.framework_vdb_task_sink_schema import VdbTaskSinkSchema
from nv_ingest_api.internal.schemas.extract.extract_audio_schema import AudioExtractorSchema
from nv_ingest_api.internal.schemas.extract.extract_chart_schema import ChartExtractorSchema
from nv_ingest_api.internal.schemas.extract.extract_infographic_schema import InfographicExtractorSchema
from nv_ingest_api.internal.schemas.extract.extract_pdf_schema import PDFExtractorSchema
from nv_ingest_api.internal.schemas.extract.extract_pptx_schema import PPTXExtractorSchema
from nv_ingest_api.internal.schemas.extract.extract_table_schema import TableExtractorSchema
from nv_ingest_api.internal.schemas.mutate.mutate_image_dedup_schema import ImageDedupSchema
from nv_ingest_api.internal.schemas.store.store_embedding_schema import EmbeddingStorageSchema
from nv_ingest_api.internal.schemas.store.store_image_schema import ImageStorageModuleSchema
from nv_ingest_api.internal.schemas.transform.transform_image_caption_schema import ImageCaptionExtractionSchema
from nv_ingest_api.internal.schemas.transform.transform_image_filter_schema import ImageFilterSchema
from nv_ingest_api.internal.schemas.transform.transform_text_embedding_schema import TextEmbeddingSchema
from nv_ingest_api.internal.schemas.transform.transform_text_splitter_schema import TextSplitterSchema
logger = logging.getLogger(__name__)
[docs]
class PipelineConfigSchema(BaseModel):
audio_extractor_schema: AudioExtractorSchema = AudioExtractorSchema()
chart_extractor_module: ChartExtractorSchema = ChartExtractorSchema()
text_splitter_module: TextSplitterSchema = TextSplitterSchema()
embedding_storage_module: EmbeddingStorageSchema = EmbeddingStorageSchema()
embed_extractions_module: TextEmbeddingSchema = TextEmbeddingSchema()
image_caption_extraction_module: ImageCaptionExtractionSchema = ImageCaptionExtractionSchema()
image_dedup_module: ImageDedupSchema = ImageDedupSchema()
image_filter_module: ImageFilterSchema = ImageFilterSchema()
image_storage_module: ImageStorageModuleSchema = ImageStorageModuleSchema()
infographic_extractor_module: InfographicExtractorSchema = InfographicExtractorSchema()
job_counter_module: JobCounterSchema = JobCounterSchema()
metadata_injection_module: MetadataInjectorSchema = MetadataInjectorSchema()
otel_meter_module: OpenTelemetryMeterSchema = OpenTelemetryMeterSchema()
otel_tracer_module: OpenTelemetryTracerSchema = OpenTelemetryTracerSchema()
pdf_extractor_module: PDFExtractorSchema = PDFExtractorSchema()
pptx_extractor_module: PPTXExtractorSchema = PPTXExtractorSchema()
redis_task_sink: MessageBrokerTaskSinkSchema = MessageBrokerTaskSinkSchema()
redis_task_source: MessageBrokerTaskSourceSchema = MessageBrokerTaskSourceSchema()
table_extractor_module: TableExtractorSchema = TableExtractorSchema()
vdb_task_sink: VdbTaskSinkSchema = VdbTaskSinkSchema()
model_config = ConfigDict(extra="forbid")