Source code for nv_ingest_api.util.exception_handlers.detectors

# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0


import logging
from typing import Any
from typing import Callable
from typing import Dict

from langdetect.lang_detect_exception import LangDetectException

from nv_ingest_api.internal.enums.common import LanguageEnum

logger = logging.getLogger(__name__)


[docs] def langdetect_exception_handler(func: Callable, **kwargs: Dict[str, Any]) -> Callable: """ A decorator that handles `LangDetectException` for language detection functions. This decorator wraps a function that performs language detection and catches any `LangDetectException` that occurs during its execution. If such an exception is raised, it logs a warning and returns a default value of `LanguageEnum.UNKNOWN`. Parameters ---------- func : callable The function to be decorated. This function is expected to handle language detection. kwargs : dict Additional keyword arguments to be passed to the function. Returns ------- callable The wrapped function that executes `func` with exception handling. Notes ----- If a `LangDetectException` is raised while executing the wrapped function, the exception is logged, and `LanguageEnum.UNKNOWN` is returned as a fallback value. Examples -------- >>> @langdetect_exception_handler ... def detect_language(text): ... # Function implementation here ... pass ... >>> detect_language('This is a test sentence.') <LanguageEnum.EN: 'en'> If a `LangDetectException` is encountered, the function will return `LanguageEnum.UNKNOWN`: >>> detect_language('') <LanguageEnum.UNKNOWN: 'unknown'> Raises ------ LangDetectException The exception raised by the wrapped function is caught and handled by logging a warning and returning `LanguageEnum.UNKNOWN`. """ def inner_function(*args, **kwargs): try: return func(*args, **kwargs) except LangDetectException as e: log_error_message = f"LangDetectException: {e}" logger.warning(log_error_message) return LanguageEnum.UNKNOWN return inner_function