{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# External Source Operator - basic usage\n", "\n", "In this example, we will show you how to use the `ExternalSource` operator, so that you can\n", "use an external data source as an input to the pipeline." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import types\n", "import collections\n", "import numpy as np\n", "from random import shuffle\n", "from nvidia.dali.pipeline import Pipeline\n", "import nvidia.dali.fn as fn\n", "import nvidia.dali.types as types\n", "\n", "batch_size = 16" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Define the Data Source\n", "In this example, we will use an infinite iterator as a data source." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "class ExternalInputIterator(object):\n", " def __init__(self, batch_size):\n", " self.images_dir = \"../../data/images/\"\n", " self.batch_size = batch_size\n", " with open(self.images_dir + \"file_list.txt\", \"r\") as f:\n", " self.files = [line.rstrip() for line in f if line != \"\"]\n", " shuffle(self.files)\n", "\n", " def __iter__(self):\n", " self.i = 0\n", " self.n = len(self.files)\n", " return self\n", "\n", " def __next__(self):\n", " batch = []\n", " labels = []\n", " for _ in range(self.batch_size):\n", " jpeg_filename, label = self.files[self.i].split(\" \")\n", " f = open(self.images_dir + jpeg_filename, \"rb\")\n", " batch.append(np.frombuffer(f.read(), dtype=np.uint8))\n", " labels.append(np.array([label], dtype=np.uint8))\n", " self.i = (self.i + 1) % self.n\n", " return (batch, labels)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "