Source code for nv_ingest_api.internal.schemas.transform.transform_text_embedding_schema

# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0


import logging

from pydantic import ConfigDict, BaseModel, Field, model_validator, field_validator

from typing import Optional

from nv_ingest_api.util.logging.configuration import LogLevel

logger = logging.getLogger(__name__)


[docs] class TextEmbeddingSchema(BaseModel): api_key: str = Field(default="", repr=False) batch_size: int = Field(default=4) embedding_model: str = Field(default="nvidia/llama-3.2-nv-embedqa-1b-v2") embedding_nim_endpoint: str = Field(default="http://embedding:8000/v1") encoding_format: str = Field(default="float") httpx_log_level: LogLevel = Field(default=LogLevel.WARNING) input_type: str = Field(default="passage") raise_on_failure: bool = Field(default=False) truncate: str = Field(default="END") text_elements_modality: str = Field(default="text") image_elements_modality: str = Field(default="text") structured_elements_modality: str = Field(default="text") audio_elements_modality: str = Field(default="text") custom_content_field: Optional[str] = None result_target_field: Optional[str] = None dimensions: Optional[int] = None model_config = ConfigDict(extra="forbid") @field_validator("api_key", mode="before") @classmethod def _coerce_api_key_none(cls, v): return "" if v is None else v @model_validator(mode="before") @classmethod def _coerce_none_to_empty(cls, values): """Convert api_key=None to empty string so validation passes when key is omitted.""" if isinstance(values, dict) and values.get("api_key") is None: values["api_key"] = "" return values