Source code for nv_ingest.schemas.file_source_pipe_schema
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import logging
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from pydantic import ConfigDict, BaseModel
from pydantic import Field
logger = logging.getLogger(__name__)
[docs]
class FileSourcePipeSchema(BaseModel):
batch_size: int = 1024
chunk_overlap: int = 51
chunk_size: int = 512
converters_meta: Optional[Dict[Any, Any]] = {} # Flexible dictionary for converters metadata
enable_monitor: bool = False
extractor_config: Optional[Dict[Any, Any]] = {} # Flexible dictionary for extractor configuration
filenames: List[str] = Field(default_factory=list) # List of file paths
num_threads: int = 1 # Number of threads for processing
vdb_resource_name: str
watch: bool = False # Flag to watch file changes
watch_interval: float = -5.0 # Interval to watch file changes
model_config = ConfigDict(extra="forbid")