Using DALI in PaddlePaddle#
Overview#
This example shows how to use DALI in PaddlePaddle.
This example uses readers.Caffe. See other examples for details on how to use different data formats.
Let us start from defining some global constants
DALI_EXTRA_PATH
environment variable should point to the place where data from DALI extra repository is downloaded. Please make sure that the proper release tag is checked out.
[1]:
import os.path
test_data_root = os.environ["DALI_EXTRA_PATH"]
# Caffe LMDB
lmdb_folder = os.path.join(test_data_root, "db", "lmdb")
N = 8 # number of GPUs
BATCH_SIZE = 128 # batch size per GPU
IMAGE_SIZE = 3
Let us define a pipeline with a reader:
[2]:
from nvidia.dali import pipeline_def, Pipeline
import nvidia.dali.fn as fn
import nvidia.dali.types as types
@pipeline_def
def caffe_pipeline(num_gpus):
device_id = Pipeline.current().device_id
jpegs, labels = fn.readers.caffe(
name="Reader",
path=lmdb_folder,
random_shuffle=True,
shard_id=device_id,
num_shards=num_gpus,
)
images = fn.decoders.image(jpegs, device="mixed")
images = fn.resize(
images,
resize_shorter=fn.random.uniform(range=(256, 480)),
interp_type=types.INTERP_LINEAR,
)
images = fn.crop_mirror_normalize(
images,
crop_pos_x=fn.random.uniform(range=(0.0, 1.0)),
crop_pos_y=fn.random.uniform(range=(0.0, 1.0)),
dtype=types.FLOAT,
crop=(227, 227),
mean=[128.0, 128.0, 128.0],
std=[1.0, 1.0, 1.0],
)
return images, labels
Let us create the pipeline and pass it to PaddlePaddle generic iterator
[3]:
import numpy as np
from nvidia.dali.plugin.paddle import DALIGenericIterator
label_range = (0, 999)
pipes = [
caffe_pipeline(
batch_size=BATCH_SIZE, num_threads=2, device_id=device_id, num_gpus=N
)
for device_id in range(N)
]
for pipe in pipes:
pipe.build()
dali_iter = DALIGenericIterator(pipes, ["data", "label"], reader_name="Reader")
for i, data in enumerate(dali_iter):
# Testing correctness of labels
for d in data:
label = d["label"]
image = d["data"]
## labels need to be integers
assert np.equal(np.mod(label, 1), 0).all()
## labels need to be in range pipe_name[2]
assert (np.array(label) >= label_range[0]).all()
assert (np.array(label) <= label_range[1]).all()
print("OK")
OK
[ ]: