Source code for nv_ingest_api.internal.schemas.transform.transform_text_splitter_schema

# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from pydantic import Field, BaseModel, field_validator, ConfigDict

from typing import Optional


[docs] class TextSplitterSchema(BaseModel): tokenizer: Optional[str] = None chunk_size: int = Field(default=1024, gt=0) chunk_overlap: int = Field(default=150, ge=0) raise_on_failure: bool = False
[docs] @field_validator("chunk_overlap") @classmethod def check_chunk_overlap(cls, v, values): chunk_size = values.data.get("chunk_size") if chunk_size is not None and v >= chunk_size: raise ValueError("chunk_overlap must be less than chunk_size") return v
model_config = ConfigDict(extra="forbid")