nvImageCodec examples#

[1]:
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt

def show_image(img, title=None, figsize=(5, 5), cmap=None):
    """Display an image in a compact format to reduce notebook size."""
    plt.figure(figsize=figsize, dpi=72)
    plt.imshow(img, cmap=cmap)
    if title:
        plt.title(title)
    plt.axis('off')
    plt.tight_layout()

Setting resource folder

[2]:
resources_dir = os.getenv("PYNVIMGCODEC_EXAMPLES_RESOURCES_DIR", "../assets/images/")

Import nvImageCodec module and create Decoder and Encoder

[3]:
from nvidia import nvimgcodec
decoder = nvimgcodec.Decoder()
encoder = nvimgcodec.Encoder()

Load and decode Jpeg image with nvImageCodec

[4]:
with open(resources_dir + "tabby_tiger_cat.jpg", 'rb') as in_file:
    data = in_file.read()
    nv_img_cat = decoder.decode(data)

Save image to bmp file with nvImageCodec

[5]:
with open("cat-jpg-o.bmp", 'wb') as out_file:
    data = encoder.encode(nv_img_cat, "bmp")
    out_file.write(data)

Read back with OpenCV just saved (with nvImageCodec) bmp image

[6]:
cv_img_bmp = cv2.imread("cat-jpg-o.bmp")
cv_img_bmp = cv2.cvtColor(cv_img_bmp, cv2.COLOR_BGR2RGB)
show_image(cv_img_bmp)
../_images/samples_nvimgcodec_11_0.png

Load and decode Jpeg2000 (in jp2 container) image with nvImageCodec in one read function

[7]:
nv_img = decoder.read(resources_dir + "cat-1046544_640.jp2")

Save image to jpg file with nvImageCodec in one write function

[8]:
encoder.write("cat-jp2-o.jpg", nv_img)
[8]:
'cat-jp2-o.jpg'

Read back with OpenCV just save (with nvImageCodec) bmp image

[9]:
image = cv2.imread("cat-jp2-o.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
show_image(image)
../_images/samples_nvimgcodec_17_0.png

Load jpg with nvImageCodec

[10]:
nv_img_jpg = decoder.read(resources_dir + "tabby_tiger_cat.jpg")

Save as Jpeg 2000 with nvImageCodec

[11]:
encoder.write("cat-jpg-o.j2k", nv_img_jpg)

cs = encoder.encode(nv_img_jpg, codec =".j2k")

Read back with OpenCV just saved (with nvImageCodec) j2k image

[12]:
image = cv2.imread("cat-jpg-o.j2k")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
show_image(image)
../_images/samples_nvimgcodec_23_0.png

Passing decoding parameters#

Decode jpeg with Exif orientation - by default it applies exif orientation

[13]:
nv_img_jpg = decoder.read(resources_dir+ "f-exif-8.jpg")
encoder.write("f-exif-8.bmp", nv_img_jpg)
image = cv2.imread("f-exif-8.bmp")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
show_image(image, title="With EXIF orientation applied")
../_images/samples_nvimgcodec_26_0.png

Let assume we would like to ignore exif orientation

[14]:
dec_params = nvimgcodec.DecodeParams(apply_exif_orientation=False)
nv_img_jpg = decoder.read(resources_dir + "f-exif-8.jpg", params=dec_params)
encoder.write("f-wo-exif.bmp", nv_img_jpg)
image = cv2.imread("f-wo-exif.bmp")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
show_image(image, title="Without EXIF orientation")
../_images/samples_nvimgcodec_28_0.png

Passing encoding parameters#

Changing quality and chroma subsampling in jpeg

[15]:
nv_img_jpg = decoder.read(resources_dir + "tabby_tiger_cat.jpg")
enc_params = nvimgcodec.EncodeParams(quality_type=nvimgcodec.QualityType.QUALITY, quality_value=5, chroma_subsampling=nvimgcodec.ChromaSubsampling.CSS_GRAY)
encoder.write("cat-q5-gray.jpg", nv_img_jpg, params=enc_params)

image = cv2.imread("cat-q5-gray.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
show_image(image, title="Quality=5, Grayscale")
../_images/samples_nvimgcodec_31_0.png

Jpeg optimized huffman and progressive encoding

[16]:
nv_img_jpg = decoder.read(resources_dir + "tabby_tiger_cat.jpg")
encoder.write("cat-q75.jpg", nv_img_jpg, params=nvimgcodec.EncodeParams(quality_type=nvimgcodec.QualityType.QUALITY, quality_value=75))
encoder.write("cat-q75-optimized_huffman.jpg", nv_img_jpg, params=nvimgcodec.EncodeParams(
    quality_type=nvimgcodec.QualityType.QUALITY,
    quality_value=75,
    jpeg_encode_params = nvimgcodec.JpegEncodeParams(optimized_huffman=True, progressive=True)
))

print("default huffman file size:", os.path.getsize("cat-q75.jpg"))
print("optimized huffman file size:", os.path.getsize(
    "cat-q75-optimized_huffman.jpg"))

image = cv2.imread("cat-q75-optimized_huffman.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
show_image(image, title="Optimized Huffman")

default huffman file size: 69729
optimized huffman file size: 66722
../_images/samples_nvimgcodec_33_1.png

Jpeg2000 encode options overview

[17]:
nv_img_jpg = decoder.read(resources_dir + "tabby_tiger_cat.jpg")

encoder.write("cat-psnr25.j2k", nv_img_jpg, params=nvimgcodec.EncodeParams(quality_type=nvimgcodec.QualityType.PSNR, quality_value=25))

encoder.write("cat-lossless.j2k", nv_img_jpg, params=nvimgcodec.EncodeParams(quality_type=nvimgcodec.QualityType.LOSSLESS))

jpeg2k_encode_params = nvimgcodec.Jpeg2kEncodeParams()
jpeg2k_encode_params.num_resolutions = 2
jpeg2k_encode_params.code_block_size = (32, 32)
jpeg2k_encode_params.bitstream_type = nvimgcodec.Jpeg2kBitstreamType.JP2
jpeg2k_encode_params.prog_order = nvimgcodec.Jpeg2kProgOrder.LRCP
jpeg2k_encode_params.mct_mode = 1
encoder.write("cat-lossless-2decomps.j2k",
              nv_img_jpg,
              params=nvimgcodec.EncodeParams(
                quality_type=nvimgcodec.QualityType.LOSSLESS,
                jpeg2k_encode_params=jpeg2k_encode_params
              )
            )

jpeg2k_encode_params.ht = True
encoder.write("cat-lossless-2decomps-ht.j2k", nv_img_jpg, params=nvimgcodec.EncodeParams(
    quality_type=nvimgcodec.QualityType.LOSSLESS,
    jpeg2k_encode_params=jpeg2k_encode_params
))

print("lossy file size:", os.path.getsize("cat-psnr25.j2k"))
print("lossless file size:", os.path.getsize("cat-lossless.j2k"))
print("lossless 2 decomposition levels file size:",  os.path.getsize("cat-lossless-2decomps.j2k"))
print("lossless 2 decomposition levels with High-Throughput file size:",  os.path.getsize("cat-lossless-2decomps-ht.j2k"))
image = cv2.imread("cat-psnr25.j2k")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
show_image(image, title="JPEG2000 PSNR=25")
lossy file size: 2467
lossless file size: 598108
lossless 2 decomposition levels file size: 424868
lossless 2 decomposition levels with High-Throughput file size: 609293
../_images/samples_nvimgcodec_35_1.png

We can specify allowed backends used for decoding

[18]:
gpu_dec = nvimgcodec.Decoder(backends=[nvimgcodec.Backend(nvimgcodec.BackendKind.GPU_ONLY, load_hint=0.5), nvimgcodec.BackendKind.HYBRID_CPU_GPU])
cpu_dec = nvimgcodec.Decoder(backends=[nvimgcodec.BackendKind.CPU_ONLY])
[19]:
%%time
nv_img_j2k = cpu_dec.read(resources_dir + "cat-1046544_640.jp2")
CPU times: user 85.2 ms, sys: 4.98 ms, total: 90.2 ms
Wall time: 90 ms
[20]:
%%time
nv_img_j2k = gpu_dec.read(resources_dir + "cat-1046544_640.jp2")

CPU times: user 26.9 ms, sys: 0 ns, total: 26.9 ms
Wall time: 26.5 ms

The same way we can create Encoder with allowed backends.

[21]:
gpu_enc = nvimgcodec.Encoder(backends=[nvimgcodec.Backend(nvimgcodec.BackendKind.GPU_ONLY, load_hint=0.5)
                             , nvimgcodec.Backend(nvimgcodec.BackendKind.HYBRID_CPU_GPU)])
cpu_enc = nvimgcodec.Encoder(backends=[nvimgcodec.BackendKind.CPU_ONLY])
[22]:
cpu_enc.write("cat_cpu_out.jpg", nv_img_j2k)
[22]:
'cat_cpu_out.jpg'
[23]:
gpu_enc.write("cat_gpu_out.jpg", nv_img_j2k)
[23]:
'cat_gpu_out.jpg'

Support of __cuda_array_interface__#

[24]:
print(nv_img_j2k.__cuda_array_interface__)
print(nv_img_j2k.shape)
{'shape': (475, 640, 3), 'strides': None, 'typestr': '|u1', 'data': (13425796096, False), 'version': 3, 'stream': 1}
(475, 640, 3)

Support of __array_interface__#

It is possible to pass to nvImageCodec host ndarray by object which supports __array_interface__ as for example image (numpy.ndarray) created by OpenCV

[25]:
cv_img = cv2.imread(resources_dir + "Weimaraner.bmp")
cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)

print(type(cv_img))
print(cv_img.__array_interface__)

nv_h_img = nvimgcodec.as_image(cv_img)
gpu_enc.write("Weimaraner_ai_out.jpg", nv_h_img)

image = cv2.imread("Weimaraner_ai_out.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
show_image(image, title="Array Interface")

<class 'numpy.ndarray'>
{'data': (131389296, False), 'strides': None, 'descr': [('', '|u1')], 'typestr': '|u1', 'shape': (720, 720, 3), 'version': 3}
../_images/samples_nvimgcodec_48_1.png

If we use cpu() method of Image object it would create new Image with copied content to host buffer.

[26]:
nv_img = cpu_dec.read(resources_dir + "cat-1046544_640.jp2")
nv_h_img = nv_img.cpu()

Image with host buffer supports __array_interface__

[27]:
print(nv_h_img.__array_interface__)
{'shape': (475, 640, 3), 'strides': None, 'typestr': '|u1', 'data': (140090959462400, False), 'version': 3}

but can’t return a proper __cuda_array_interface__

[28]:
try:
    print(nv_h_img.__cuda_array_interface__)
except RuntimeError as e:
    print(f"Expected error: {e}")
Expected error: Unable to initialize __cuda_array_interface__: Image buffer is not on device (expected device buffer for __cuda_array_interface__). Call '.cuda()' on the image to obtain a device-backed image before using the CUDA array interface.

We can pass such Image to functions which accept and can use this interface like imshow from matplotlib library

[29]:
show_image(nv_h_img, title="Image with array interface")
../_images/samples_nvimgcodec_56_0.png

We can also create a zero-copy view of this image with numpy and process it with OpenCV

[30]:
np_img = np.asarray(nv_h_img)
kernel = np.ones((5, 5), np.float32)/25
dst = cv2.filter2D(np_img, -1, kernel)
plt.figure(figsize=(8, 3), dpi=72)
plt.subplot(121), plt.imshow(np_img), plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122), plt.imshow(dst), plt.title('Averaging')
plt.xticks([]), plt.yticks([])
plt.tight_layout()
plt.show()

../_images/samples_nvimgcodec_58_0.png

There is also method cuda() which can be used to convert an Image with a host buffer to an Image with copied contents to a device buffer.

[31]:
nv_new_cuda_img = nv_h_img.cuda()
print(nv_new_cuda_img.__cuda_array_interface__)

{'shape': (475, 640, 3), 'strides': None, 'typestr': '|u1', 'data': (13423328256, False), 'version': 3, 'stream': 1}

We can check whether Image keeps a host or a device buffer by reading the buffer_kind property

[32]:
print("Host image buffer kind: ", nv_h_img.buffer_kind)
print("Device image buffer kind: ", nv_new_cuda_img.buffer_kind)
Host image buffer kind:  ImageBufferKind.STRIDED_HOST
Device image buffer kind:  ImageBufferKind.STRIDED_DEVICE

Managing lifetime of decoder resources using “with” statement

[33]:
with nvimgcodec.Decoder() as decoder_2:
    nv_img = decoder_2.read(resources_dir + "cat-1046544_640.jp2")
    show_image(nv_img.cpu(), title="Context manager decode")
../_images/samples_nvimgcodec_64_0.png

Similarly for encoder resources

[34]:
with nvimgcodec.Encoder() as encoder_2:
    encoder_2.write("cat-1046544_640_out.jpg", nv_img)
    image = cv2.imread("cat-1046544_640_out.jpg")
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    show_image(image, title="Context manager encode")
../_images/samples_nvimgcodec_66_0.png

Parsing image information without decoding#

nvImageCodec allows to parse image information without having to decode the image. For this, we use the CodeStream entity

[35]:
stream = nvimgcodec.CodeStream(resources_dir + "cat-1046544_640.jp2")
print(stream)
CodeStream( codec_name=jpeg2k num_images=1 height=475 width=640 num_channels=3 dtype=uint8 precision=8 color_spec=1 sample_format=7 size=558133 capacity=558133 num_tiles_y=1 num_tiles_x=1 tile_height=475 tile_width=640)

An CodeStream object can be also created from an in-memory encoded stream (bytes or numpy array)

[36]:
with open(resources_dir + "cat-1046544_640.jp2", 'rb') as in_file:
    data = in_file.read()
    stream = nvimgcodec.CodeStream(data)
    print(stream)
CodeStream( codec_name=jpeg2k num_images=1 height=475 width=640 num_channels=3 dtype=uint8 precision=8 color_spec=1 sample_format=7 size=558133 capacity=558133 num_tiles_y=1 num_tiles_x=1 tile_height=475 tile_width=640)
[37]:
print(f"Image has dimensions {stream.height}x{stream.width}x{stream.num_channels} ({stream.height * stream.width * stream.num_channels} total number of pixels) structured in {stream.tile_height}x{stream.tile_width} tiles")
Image has dimensions 475x640x3 (912000 total number of pixels) structured in 475x640 tiles

Inspect color specification properties for information about the color space of images.

[38]:
jpeg_stream = nvimgcodec.CodeStream(resources_dir + "tabby_tiger_cat.jpg")
print(f"JPEG color spec: {jpeg_stream.color_spec}")

jp2_stream_rgb = nvimgcodec.CodeStream(resources_dir + "cat-1046544_640.jp2")
print(f"JPEG2000 with sRGB color spec: {jp2_stream_rgb.color_spec}")

jp2_stream_ycbcr  = nvimgcodec.CodeStream(resources_dir + "cat-300572_640_ycbcr.tiff")
print(f"Tiff with YCbCr color spec: {jp2_stream_ycbcr.color_spec}")

JPEG color spec: ColorSpec.SYCC
JPEG2000 with sRGB color spec: ColorSpec.SRGB
Tiff with YCbCr color spec: ColorSpec.SYCC

Using Color Spec when decoding#

The color_spec parameter can be used in DecodeParams to control how images are processed during decoding.

[39]:
cs = nvimgcodec.CodeStream(resources_dir + "cat-1046544_640.jp2")
print(f"Color spec of the source: {cs.color_spec}")

# Decode with sRGB (default behavior) - equivalent to define params=nvimgcodec.DecodeParams(color_spec = nvimgcodec.ColorSpec.SRGB)
img_srgb = decoder.decode(cs)
print(f"sRGB decode color spec: {img_srgb.color_spec}")

# Demonstrate grayscale conversion
img_gray = decoder.decode(cs, params=nvimgcodec.DecodeParams(
    color_spec=nvimgcodec.ColorSpec.GRAY
))
print(f"Grayscale decode color spec: {img_gray.color_spec}")

# Show visual comparison
fig, axes = plt.subplots(1, 2, figsize=(8, 3), dpi=72)
axes[0].imshow(img_srgb.cpu())
axes[0].set_title("Decoded as sRGB (default)")
axes[0].axis('off')
axes[1].imshow(img_gray.cpu(), cmap='gray')
axes[1].set_title("Decoded as Grayscale")
axes[1].axis('off')
plt.tight_layout()
plt.show()

Color spec of the source: ColorSpec.SRGB
sRGB decode color spec: ColorSpec.SRGB
Grayscale decode color spec: ColorSpec.GRAY
../_images/samples_nvimgcodec_75_1.png

Decoding with UNCHANGED color spec (preserve original color space). Please note that in case of GRAY input when decoding with UNCHANGED color space, shape is different (has only one channel)

[40]:
cs = nvimgcodec.CodeStream(resources_dir + "cat-111793_640-16bit-gray.jp2")
print(f"Color spec of the source: {cs.color_spec}")

# Decode with sRGB (default behavior)
img_srgb = decoder.decode(cs)
print(f"sRGB decode color spec: {img_srgb.color_spec}")
print(f"Default sRGB decoded shape: {img_srgb.shape}")

img_unchanged = decoder.decode(cs, params=nvimgcodec.DecodeParams(color_spec = nvimgcodec.ColorSpec.UNCHANGED))
print(f"Unchanged decode color spec: {img_unchanged.color_spec}")
print(f"Unchanged decoded shape: {img_unchanged.shape}")


Color spec of the source: ColorSpec.GRAY
sRGB decode color spec: ColorSpec.SRGB
Default sRGB decoded shape: (426, 640, 3)
Unchanged decode color spec: ColorSpec.GRAY
Unchanged decoded shape: (426, 640, 1)

For RGB input decoding with UNCHANGED color spec should not change anything.

[41]:
cs = nvimgcodec.CodeStream(resources_dir + "cat-1046544_640.jp2")
print(f"Color spec of the source: {cs.color_spec}")

# Decode with sRGB (default behavior)
img_srgb = decoder.decode(cs)
print(f"sRGB decode color spec: {img_srgb.color_spec}")
print(f"Default sRGB decoded shape: {img_srgb.shape}")

img_unchanged = decoder.decode(cs, params=nvimgcodec.DecodeParams(color_spec = nvimgcodec.ColorSpec.UNCHANGED))
print(f"Unchanged decode color spec: {img_unchanged.color_spec}")
print(f"Unchanged decoded shape: {img_unchanged.shape}")
Color spec of the source: ColorSpec.SRGB
sRGB decode color spec: ColorSpec.SRGB
Default sRGB decoded shape: (475, 640, 3)
Unchanged decode color spec: ColorSpec.SRGB
Unchanged decoded shape: (475, 640, 3)

Encode Jpeg2000 with tiles#

[42]:
# Encode JPEG2000 with tiling (e.g., 256x256 tiles)
jpeg2k_encode_params = nvimgcodec.Jpeg2kEncodeParams()
jpeg2k_encode_params.num_resolutions = 5
jpeg2k_encode_params.code_block_size = (64, 64)
jpeg2k_encode_params.bitstream_type = nvimgcodec.Jpeg2kBitstreamType.JP2
jpeg2k_encode_params.prog_order = nvimgcodec.Jpeg2kProgOrder.RPCL

# Set tile size (e.g., 256x256)
tile_width = 256
tile_height = 256

encoder.write(
    "cat-tiled-256x256.j2k",
    nv_img_jpg,
    params=nvimgcodec.EncodeParams(
        quality_type=nvimgcodec.QualityType.LOSSLESS,
        jpeg2k_encode_params=jpeg2k_encode_params,
        tile_width=tile_width,
        tile_height=tile_height
    )
)

# Parse information about the tiled JPEG2000 image
code_stream = nvimgcodec.CodeStream("cat-tiled-256x256.j2k")
print(f"Tile width: {code_stream.tile_width}")
print(f"Tile height: {code_stream.tile_height}")
print(f"Number of tiles (X): {code_stream.num_tiles_x}")
print(f"Number of tiles (Y): {code_stream.num_tiles_y}")

# Decode the tiled JPEG2000 image
image_tiled = decoder.decode(code_stream)
show_image(image_tiled.cpu(), title=f"JPEG2000 Tiled ({tile_width}x{tile_height})")
plt.show()

Tile width: 256
Tile height: 256
Number of tiles (X): 3
Number of tiles (Y): 3
../_images/samples_nvimgcodec_81_1.png