COCO Reader

Reader operator that reads a COCO dataset (or subset of COCO), which consists of an annotation file and the images directory.

DALI_EXTRA_PATH environment variable should point to the place where data from DALI extra repository is downloaded. Please make sure that the proper release tag is checked out.

from nvidia.dali.pipeline import Pipeline
import nvidia.dali.ops as ops
import nvidia.dali.types as types
import numpy as np
from time import time
import os.path

test_data_root = os.environ['DALI_EXTRA_PATH']
file_root = os.path.join(test_data_root, 'db', 'coco', 'images')
annotations_file = os.path.join(test_data_root, 'db', 'coco', 'instances.json')

num_gpus = 1
batch_size = 16
class COCOPipeline(Pipeline):
    def __init__(self, batch_size, num_threads, device_id):
        super(COCOPipeline, self).__init__(batch_size, num_threads, device_id, seed = 15)
        self.input = ops.COCOReader(file_root = file_root, annotations_file = annotations_file,
                                     shard_id = device_id, num_shards = num_gpus, ratio=True)
        self.decode = ops.ImageDecoder(device = "mixed", output_type = types.RGB)

    def define_graph(self):
        inputs, bboxes, labels = self.input()
        images = self.decode(inputs)
        return (images, bboxes, labels)
start = time()
pipes = [COCOPipeline(batch_size=batch_size, num_threads=2, device_id = device_id)  for device_id in range(num_gpus)]
for pipe in pipes:
total_time = time() - start
print("Computation graph built and dataset loaded in %f seconds." % total_time)
Computation graph built and dataset loaded in 0.307431 seconds.
pipe_out = [ for pipe in pipes]

images_cpu = pipe_out[0][0].as_cpu()
bboxes_cpu = pipe_out[0][1]
labels_cpu = pipe_out[0][2]

Bounding boxes returned by the operator are lists of floats containing composed of [x, y, width, height] (ltrb is set to False by default).

bboxes =
array([[0.125     , 0.1794569 , 0.32265624, 0.4687131 ]], dtype=float32)

Let’s see the ground truth bounding boxes drawn on the image.

import matplotlib.pyplot as plt
import matplotlib.patches as patches
import random

img_index = 4

img =

H = img.shape[0]
W = img.shape[1]

fig,ax = plt.subplots(1)

bboxes =
labels =
categories_set = set()
for label in labels:

category_id_to_color = dict([ (cat_id , [random.uniform(0, 1) ,random.uniform(0, 1), random.uniform(0, 1)]) for cat_id in categories_set])

for bbox, label in zip(bboxes, labels):
    rect = patches.Rectangle((bbox[0]*W,bbox[1]*H),bbox[2]*W,bbox[3]*H,linewidth=1,edgecolor=category_id_to_color[label[0]],facecolor='none')