nv_ingest_client.primitives package#

Subpackages#

Submodules#

nv_ingest_client.primitives.exceptions module#

Module contents#

class nv_ingest_client.primitives.BatchJobSpec(
job_specs_or_files: List[JobSpec] | List[str] | None = None,
)[source]#

Bases: object

A class used to represent a batch of job specifications (JobSpecs).

This class allows for batch processing of multiple jobs, either from a list of JobSpec instances or from file paths. It provides methods for adding job specifications, associating tasks with those specifications, and serializing the batch to a dictionary format.

_file_type_to_job_spec#

A dictionary that maps document types to a list of JobSpec instances.

Type:

defaultdict

add_job_spec(
job_spec: JobSpec,
) None[source]#

Adds a JobSpec to the batch.

Parameters:

job_spec (JobSpec) – The job specification to add.

add_task(task, document_type=None)[source]#

Adds a task to the relevant job specifications in the batch.

If a document_type is provided, the task will be added to all job specifications matching that document type. If no document_type is provided, the task will be added to all job specifications in the batch.

Parameters:
  • task (Task) – The task to add. Must derive from the nv_ingest_client.primitives.Task class.

  • document_type (str, optional) – The document type used to filter job specifications. If not provided, the document_type is inferred from the task, or the task is applied to all job specifications.

Raises:

ValueError – If the task does not derive from the Task class.

property file_types: List[str]#

Returns the list of unique file types present in the batch.

This property retrieves the document types currently stored in the batch’s job specifications.

Returns:

A list of document types for the jobs in the batch.

Return type:

List[str]

classmethod from_dataset(dataset: str, shuffle_dataset: bool = True)[source]#

Class method to create a BatchJobSpec instance from a dataset.

Parameters:
  • dataset (str) – The path to the dataset file.

  • shuffle_dataset (bool, optional) – Whether to shuffle the dataset files before adding them to the batch, by default True.

Returns:

A new instance of BatchJobSpec initialized with the dataset files.

Return type:

BatchJobSpec

from_files(files: str | List[str]) None[source]#

Initializes the batch by generating job specifications from file paths.

Parameters:

files (Union[str, List[str]]) – A single file path or a list of file paths to create job specifications from.

from_job_specs(
job_specs: JobSpec | List[JobSpec],
) None[source]#

Initializes the batch with a list of JobSpec instances.

Parameters:

job_specs (Union[JobSpec, List[JobSpec]]) – A single JobSpec or a list of JobSpec instances to add to the batch.

property job_specs: Dict[str, List[str]]#

A property that returns a dictionary of job specs categorized by document type.

Returns:

A dictionary mapping document types to job specifications.

Return type:

Dict[str, List[str]]

property tasks: Dict[str, List[Task]]#

Adds a task to the relevant job specifications in the batch.

If a document_type is provided, the task will be added to all job specifications matching that document type. If no document_type is provided, the task will be added to all job specifications in the batch.

Parameters:
  • task (Task) – The task to add. Must derive from the nv_ingest_client.primitives.Task class.

  • document_type (str, optional) – The document type used to filter job specifications. If not provided, the document_type is inferred from the task, or the task is applied to all job specifications.

to_dict() Dict[str, List[Dict]][source]#

Serializes the batch of job specifications into a list of dictionaries.

Returns:

A list of dictionaries representing the job specifications in the batch.

Return type:

List[Dict]

class nv_ingest_client.primitives.JobSpec(
payload: str | None = None,
tasks: List | None = None,
source_id: str | None = None,
source_name: str | None = None,
document_type: str | None = None,
extended_options: Dict | None = None,
)[source]#

Bases: object

Specification for creating a job for submission to the nv-ingest microservice.

Parameters:
  • payload (Dict) – The payload data for the job.

  • tasks (Optional[List], optional) – A list of tasks to be added to the job, by default None.

  • source_id (Optional[str], optional) – An identifier for the source of the job, by default None.

  • job_id (Optional[UUID], optional) – A unique identifier for the job, by default a new UUID is generated.

  • extended_options (Optional[Dict], optional) – Additional options for job processing, by default None.

_payload#

Storage for the payload data.

Type:

Dict

_tasks#

Storage for the list of tasks.

Type:

List

_source_id#

Storage for the source identifier.

Type:

str

_job_id#

Storage for the job’s unique identifier.

Type:

UUID

_extended_options#

Storage for the additional options.

Type:

Dict

to_dict() Dict:[source]#

Converts the job specification to a dictionary.

add_task(task):

Adds a task to the job specification.

add_task(task) None[source]#

Adds a task to the job specification.

Parameters:

task – The task to add to the job specification. Assumes the task has a to_dict method.

Raises:

ValueError – If the task does not have a to_dict method.

property document_type: str#
property job_id: UUID#
property payload: Dict#
property source_id: str#
property source_name: str#
to_dict() Dict[source]#

Converts the job specification instance into a dictionary suitable for JSON serialization.

Returns:

A dictionary representation of the job specification.

Return type:

Dict

class nv_ingest_client.primitives.Task[source]#

Bases: object

Generic task Object

to_dict() Dict[source]#

Returns a string with the task specification. This string is used for constructing tasks that are then submitted to the redis client