Source code for nv_ingest_api.util.exception_handlers.pdf
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import logging
from nv_ingest_api.internal.enums.common import StatusEnum, TaskTypeEnum
from nv_ingest_api.internal.schemas.meta.metadata_schema import validate_metadata
logger = logging.getLogger(__name__)
[docs]
def pdfium_exception_handler(descriptor):
"""
A decorator that handles exceptions for functions interacting with PDFium.
This decorator wraps a function and catches any exceptions that occur during its execution.
If an exception is raised, it logs a warning with a descriptor and the function name,
then returns an empty list as a fallback value.
Parameters
----------
descriptor : str
A string descriptor to identify the context or source of the function being wrapped.
This descriptor is included in the log message if an exception occurs.
Returns
-------
callable
A decorator function that wraps the target function with exception handling.
Notes
-----
This decorator is useful for ensuring that functions interacting with PDFium can gracefully handle errors
without interrupting the entire processing pipeline.
Examples
--------
>>> @pdfium_exception_handler("PDF Processing")
... def process_pdf(file_path):
... # Function implementation here
... pass
...
>>> process_pdf("example.pdf")
[]
Raises
------
Exception
Any exception raised by the wrapped function is caught, logged, and handled by returning an empty list.
"""
def outer_function(func):
def inner_function(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
log_error_message = f"{descriptor}:{func.__name__} error:{e}"
logger.warning(log_error_message)
return []
return inner_function
return outer_function
[docs]
def create_exception_tag(error_message, source_id=None):
"""
Creates a metadata tag for logging or reporting an exception.
This function generates a metadata dictionary containing information about the exception,
including the task type, status, source identifier, and error message.
The metadata is validated and returned as a list containing a single entry.
Parameters
----------
error_message : str
The error message describing the exception.
source_id : Optional[str], default=None
The identifier for the source related to the error, if available.
Returns
-------
list
A list containing a single entry, which is a tuple. The first element of the tuple is `None`,
and the second element is the validated metadata dictionary as a `dict`.
Notes
-----
This function is typically used to generate error metadata for tracking and logging purposes.
Examples
--------
>>> create_exception_tag("File not found", source_id="12345")
[[None, {'task': 'EXTRACT', 'status': 'ERROR', 'source_id': '12345', 'error_msg': 'File not found'}]]
Raises
------
ValidationError
If the metadata does not pass validation.
"""
unified_metadata = {}
error_metadata = {
"task": TaskTypeEnum.EXTRACT,
"status": StatusEnum.ERROR,
"source_id": source_id,
"error_msg": error_message,
}
unified_metadata["error_metadata"] = error_metadata
validated_unified_metadata = validate_metadata(unified_metadata)
return [[None, validated_unified_metadata.model_dump()]]