Source code for nv_ingest_api.util.exception_handlers.pdf

# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0


import logging

from nv_ingest_api.internal.enums.common import StatusEnum, TaskTypeEnum
from nv_ingest_api.internal.schemas.meta.metadata_schema import validate_metadata

logger = logging.getLogger(__name__)


[docs] def pdfium_exception_handler(descriptor): """ A decorator that handles exceptions for functions interacting with PDFium. This decorator wraps a function and catches any exceptions that occur during its execution. If an exception is raised, it logs a warning with a descriptor and the function name, then returns an empty list as a fallback value. Parameters ---------- descriptor : str A string descriptor to identify the context or source of the function being wrapped. This descriptor is included in the log message if an exception occurs. Returns ------- callable A decorator function that wraps the target function with exception handling. Notes ----- This decorator is useful for ensuring that functions interacting with PDFium can gracefully handle errors without interrupting the entire processing pipeline. Examples -------- >>> @pdfium_exception_handler("PDF Processing") ... def process_pdf(file_path): ... # Function implementation here ... pass ... >>> process_pdf("example.pdf") [] Raises ------ Exception Any exception raised by the wrapped function is caught, logged, and handled by returning an empty list. """ def outer_function(func): def inner_function(*args, **kwargs): try: return func(*args, **kwargs) except Exception as e: log_error_message = f"{descriptor}:{func.__name__} error:{e}" logger.warning(log_error_message) return [] return inner_function return outer_function
[docs] def create_exception_tag(error_message, source_id=None): """ Creates a metadata tag for logging or reporting an exception. This function generates a metadata dictionary containing information about the exception, including the task type, status, source identifier, and error message. The metadata is validated and returned as a list containing a single entry. Parameters ---------- error_message : str The error message describing the exception. source_id : Optional[str], default=None The identifier for the source related to the error, if available. Returns ------- list A list containing a single entry, which is a tuple. The first element of the tuple is `None`, and the second element is the validated metadata dictionary as a `dict`. Notes ----- This function is typically used to generate error metadata for tracking and logging purposes. Examples -------- >>> create_exception_tag("File not found", source_id="12345") [[None, {'task': 'EXTRACT', 'status': 'ERROR', 'source_id': '12345', 'error_msg': 'File not found'}]] Raises ------ ValidationError If the metadata does not pass validation. """ unified_metadata = {} error_metadata = { "task": TaskTypeEnum.EXTRACT, "status": StatusEnum.ERROR, "source_id": source_id, "error_msg": error_message, } unified_metadata["error_metadata"] = error_metadata validated_unified_metadata = validate_metadata(unified_metadata) return [[None, validated_unified_metadata.model_dump()]]