nvImageCodec examples#

[1]:
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt

Setting resource folder

[2]:
resources_dir = os.getenv("PYNVIMGCODEC_EXAMPLES_RESOURCES_DIR", "../assets/images/")

Import nvImageCodec module and create Decoder and Encoder

[3]:
from nvidia import nvimgcodec
decoder = nvimgcodec.Decoder()
encoder = nvimgcodec.Encoder()

Load and decode Jpeg image with nvImageCodec

[4]:
with open(resources_dir + "tabby_tiger_cat.jpg", 'rb') as in_file:
    data = in_file.read()
    nv_img_cat = decoder.decode(data)

Save image to bmp file with nvImageCodec

[5]:
with open("cat-jpg-o.bmp", 'wb') as out_file:
    data = encoder.encode(nv_img_cat, "bmp")
    out_file.write(data)

Read back with OpenCV just saved (with nvImageCodec) bmp image

[6]:
cv_img_bmp = cv2.imread("cat-jpg-o.bmp")
cv_img_bmp = cv2.cvtColor(cv_img_bmp, cv2.COLOR_BGR2RGB)
plt.imshow(cv_img_bmp)
[6]:
<matplotlib.image.AxesImage at 0x7f71a6cc3d30>
../_images/samples_nvimgcodec_11_1.png

Load and decode Jpeg2000 (in jp2 container) image with nvImageCodec in one read function

[7]:
nv_img = decoder.read(resources_dir + "cat-1046544_640.jp2")

Save image to jpg file with nvImageCodec in one write function

[8]:
encoder.write("cat-jp2-o.jpg", nv_img)
[8]:
'cat-jp2-o.jpg'

Read back with OpenCV just save (with nvImageCodec) bmp image

[9]:
image = cv2.imread("cat-jp2-o.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)
[9]:
<matplotlib.image.AxesImage at 0x7f718a705810>
../_images/samples_nvimgcodec_17_1.png

Load jpg with nvImageCodec

[10]:
nv_img_jpg = decoder.read(resources_dir + "tabby_tiger_cat.jpg")

Save as Jpeg 2000 with nvImageCodec

[11]:
encoder.write("cat-jpg-o.j2k", nv_img_jpg)

cs = encoder.encode(nv_img_jpg, codec =".j2k")

Read back with OpenCV just saved (with nvImageCodec) j2k image

[12]:
image = cv2.imread("cat-jpg-o.j2k")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)
[12]:
<matplotlib.image.AxesImage at 0x7f718a773550>
../_images/samples_nvimgcodec_23_1.png

Passing decoding parameters#

Decode jpeg with Exif orientation - by default it applies exif orientation

[13]:
nv_img_jpg = decoder.read(resources_dir+ "f-exif-8.jpg")
encoder.write("f-exif-8.bmp", nv_img_jpg)
image = cv2.imread("f-exif-8.bmp")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)
[13]:
<matplotlib.image.AxesImage at 0x7f718a634280>
../_images/samples_nvimgcodec_26_1.png

Let assume we would like to ignore exif orientation

[14]:
dec_params = nvimgcodec.DecodeParams(apply_exif_orientation=False)
nv_img_jpg = decoder.read(resources_dir + "f-exif-8.jpg", params=dec_params)
encoder.write("f-wo-exif.bmp", nv_img_jpg)
image = cv2.imread("f-wo-exif.bmp")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)
[14]:
<matplotlib.image.AxesImage at 0x7f718a68ee90>
../_images/samples_nvimgcodec_28_1.png

Passing encoding parameters#

Changing quality and chroma subsampling in jpeg

[15]:
nv_img_jpg = decoder.read(resources_dir + "tabby_tiger_cat.jpg")
enc_params = nvimgcodec.EncodeParams(quality_type=nvimgcodec.QualityType.QUALITY, quality_value=5, chroma_subsampling=nvimgcodec.ChromaSubsampling.CSS_GRAY)
encoder.write("cat-q5-gray.jpg", nv_img_jpg, params=enc_params)

image = cv2.imread("cat-q5-gray.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)
[15]:
<matplotlib.image.AxesImage at 0x7f718811c6a0>
../_images/samples_nvimgcodec_31_1.png

Jpeg optimized huffman and progressive encoding

[16]:
nv_img_jpg = decoder.read(resources_dir + "tabby_tiger_cat.jpg")
encoder.write("cat-q75.jpg", nv_img_jpg, params=nvimgcodec.EncodeParams(quality_type=nvimgcodec.QualityType.QUALITY, quality_value=75))
encoder.write("cat-q75-optimized_huffman.jpg", nv_img_jpg, params=nvimgcodec.EncodeParams(
    quality_type=nvimgcodec.QualityType.QUALITY,
    quality_value=75,
    jpeg_encode_params = nvimgcodec.JpegEncodeParams(optimized_huffman=True, progressive=True)
))

print("default huffman file size:", os.path.getsize("cat-q75.jpg"))
print("optimized huffman file size:", os.path.getsize(
    "cat-q75-optimized_huffman.jpg"))

image = cv2.imread("cat-q75-optimized_huffman.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)

default huffman file size: 69729
optimized huffman file size: 66722
[16]:
<matplotlib.image.AxesImage at 0x7f71881b6410>
../_images/samples_nvimgcodec_33_2.png

Jpeg2000 encode options overview

[17]:
nv_img_jpg = decoder.read(resources_dir + "tabby_tiger_cat.jpg")

encoder.write("cat-psnr25.j2k", nv_img_jpg, params=nvimgcodec.EncodeParams(quality_type=nvimgcodec.QualityType.PSNR, quality_value=25))

encoder.write("cat-lossless.j2k", nv_img_jpg, params=nvimgcodec.EncodeParams(quality_type=nvimgcodec.QualityType.LOSSLESS))

jpeg2k_encode_params = nvimgcodec.Jpeg2kEncodeParams()
jpeg2k_encode_params.num_resolutions = 2
jpeg2k_encode_params.code_block_size = (32, 32)
jpeg2k_encode_params.bitstream_type = nvimgcodec.Jpeg2kBitstreamType.JP2
jpeg2k_encode_params.prog_order = nvimgcodec.Jpeg2kProgOrder.LRCP
jpeg2k_encode_params.mct_mode = 1
encoder.write("cat-lossless-2decomps.j2k",
              nv_img_jpg,
              params=nvimgcodec.EncodeParams(
                quality_type=nvimgcodec.QualityType.LOSSLESS,
                jpeg2k_encode_params=jpeg2k_encode_params
              )
            )

jpeg2k_encode_params.ht = True
encoder.write("cat-lossless-2decomps-ht.j2k", nv_img_jpg, params=nvimgcodec.EncodeParams(
    quality_type=nvimgcodec.QualityType.LOSSLESS,
    jpeg2k_encode_params=jpeg2k_encode_params
))

print("lossy file size:", os.path.getsize("cat-psnr25.j2k"))
print("lossless file size:", os.path.getsize("cat-lossless.j2k"))
print("lossless 2 decomposition levels file size:",  os.path.getsize("cat-lossless-2decomps.j2k"))
print("lossless 2 decomposition levels with High-Throughput file size:",  os.path.getsize("cat-lossless-2decomps-ht.j2k"))
image = cv2.imread("cat-psnr25.j2k")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)
lossy file size: 2467
lossless file size: 598108
lossless 2 decomposition levels file size: 424868
lossless 2 decomposition levels with High-Throughput file size: 609293
[17]:
<matplotlib.image.AxesImage at 0x7f718a6521a0>
../_images/samples_nvimgcodec_35_2.png

We can specify allowed backends used for decoding

[18]:
gpu_dec = nvimgcodec.Decoder(backends=[nvimgcodec.Backend(nvimgcodec.BackendKind.GPU_ONLY, load_hint=0.5), nvimgcodec.BackendKind.HYBRID_CPU_GPU])
cpu_dec = nvimgcodec.Decoder(backends=[nvimgcodec.BackendKind.CPU_ONLY])
[19]:
%%time
nv_img_j2k = cpu_dec.read(resources_dir + "cat-1046544_640.jp2")
CPU times: user 96.4 ms, sys: 668 μs, total: 97.1 ms
Wall time: 96.8 ms
[20]:
%%time
nv_img_j2k = gpu_dec.read(resources_dir + "cat-1046544_640.jp2")

CPU times: user 27.2 ms, sys: 0 ns, total: 27.2 ms
Wall time: 26.5 ms

The same way we can create Encoder with allowed backends.

[21]:
gpu_enc = nvimgcodec.Encoder(backends=[nvimgcodec.Backend(nvimgcodec.BackendKind.GPU_ONLY, load_hint=0.5)
                             , nvimgcodec.Backend(nvimgcodec.BackendKind.HYBRID_CPU_GPU)])
cpu_enc = nvimgcodec.Encoder(backends=[nvimgcodec.BackendKind.CPU_ONLY])
[22]:
cpu_enc.write("cat_cpu_out.jpg", nv_img_j2k)
[22]:
'cat_cpu_out.jpg'
[23]:
gpu_enc.write("cat_gpu_out.jpg", nv_img_j2k)
[23]:
'cat_gpu_out.jpg'

Support of __cuda_array_interface__#

[24]:
print(nv_img_j2k.__cuda_array_interface__)
print(nv_img_j2k.shape)
{'shape': (475, 640, 3), 'strides': None, 'typestr': '|u1', 'data': (13425796096, False), 'version': 3, 'stream': 1}
(475, 640, 3)

Support of __array_interface__#

It is possible to pass to nvImageCodec host ndarray by object which supports __array_interface__ as for example image (numpy.ndarray) created by OpenCV

[25]:
cv_img = cv2.imread(resources_dir + "Weimaraner.bmp")
cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)

print(type(cv_img))
print(cv_img.__array_interface__)

nv_h_img = nvimgcodec.as_image(cv_img)
gpu_enc.write("Weimaraner_ai_out.jpg", nv_h_img)

image = cv2.imread("Weimaraner_ai_out.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)

<class 'numpy.ndarray'>
{'data': (118512496, False), 'strides': None, 'descr': [('', '|u1')], 'typestr': '|u1', 'shape': (720, 720, 3), 'version': 3}
[25]:
<matplotlib.image.AxesImage at 0x7f71647d0610>
../_images/samples_nvimgcodec_48_2.png

If we use cpu() method of Image object it would create new Image with copied content to host buffer.

[26]:
nv_img = cpu_dec.read(resources_dir + "cat-1046544_640.jp2")
nv_h_img = nv_img.cpu()

Image with host buffer supports __array_interface__ (but can’t return a proper __cuda_array_interface__)

[27]:
print(nv_h_img.__array_interface__)
print(nv_h_img.__cuda_array_interface__)
{'shape': (475, 640, 3), 'strides': None, 'typestr': '|u1', 'data': (140123238825984, False), 'version': 3}
{'shape': (475, 640, 3), 'strides': None, 'typestr': '|u1', 'data': (140123238825984, False), 'version': 3, 'stream': 1}

so we can pass such Image to functions which accept and can use this interface like imshow from matplotlib library

[28]:
plt.imshow(nv_h_img)
[28]:
<matplotlib.image.AxesImage at 0x7f716461b820>
../_images/samples_nvimgcodec_54_1.png

We can also create a zero-copy view of this image with numpy and process it with OpenCV

[29]:
np_img = np.asarray(nv_h_img)
kernel = np.ones((5, 5), np.float32)/25
dst = cv2.filter2D(np_img, -1, kernel)
plt.subplot(121), plt.imshow(np_img), plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122), plt.imshow(dst), plt.title('Averaging')
plt.xticks([]), plt.yticks([])
plt.show()

../_images/samples_nvimgcodec_56_0.png

There is also method cuda() which can be used to convert an Image with a host buffer to an Image with copied contents to a device buffer.

[30]:
print(nv_h_img.__cuda_array_interface__)
nv_new_cuda_img = nv_h_img.cuda()
print(nv_new_cuda_img.__cuda_array_interface__)

{'shape': (475, 640, 3), 'strides': None, 'typestr': '|u1', 'data': (140123238825984, False), 'version': 3, 'stream': 1}
{'shape': (475, 640, 3), 'strides': None, 'typestr': '|u1', 'data': (13423328256, False), 'version': 3, 'stream': 1}

We can check whether Image keeps a host or a device buffer by reading the buffer_kind property

[31]:
print("Host image buffer kind: ", nv_h_img.buffer_kind)
print("Device image buffer kind: ", nv_new_cuda_img.buffer_kind)
Host image buffer kind:  ImageBufferKind.STRIDED_HOST
Device image buffer kind:  ImageBufferKind.STRIDED_DEVICE

Managing lifetime of decoder resources using “with” statement

[32]:
with nvimgcodec.Decoder() as decoder_2:
    nv_img = decoder_2.read(resources_dir + "cat-1046544_640.jp2")
    plt.imshow(nv_img.cpu())
../_images/samples_nvimgcodec_62_0.png

Similarly for encoder resources

[33]:
with nvimgcodec.Encoder() as encoder_2:
    encoder_2.write("cat-1046544_640_out.jpg", nv_img)
    image = cv2.imread("cat-1046544_640_out.jpg")
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.imshow(image)
../_images/samples_nvimgcodec_64_0.png

Parsing image information without decoding#

nvImageCodec allows to parse image information without having to decode the image. For this, we use the CodeStream entity

[34]:
stream = nvimgcodec.CodeStream(resources_dir + "cat-1046544_640.jp2")
print(stream)
CodeStream( codec_name=jpeg2k num_images=1 height=475 width=640 num_channels=3 dtype=uint8 precision=8 color_spec=1 sample_format=7 size=558133 capacity=558133 num_tiles_y=1 num_tiles_x=1 tile_height=475 tile_width=640)

An CodeStream object can be also created from an in-memory encoded stream (bytes or numpy array)

[35]:
with open(resources_dir + "cat-1046544_640.jp2", 'rb') as in_file:
    data = in_file.read()
    stream = nvimgcodec.CodeStream(data)
    print(stream)
CodeStream( codec_name=jpeg2k num_images=1 height=475 width=640 num_channels=3 dtype=uint8 precision=8 color_spec=1 sample_format=7 size=558133 capacity=558133 num_tiles_y=1 num_tiles_x=1 tile_height=475 tile_width=640)
[36]:
print(f"Image has dimensions {stream.height}x{stream.width}x{stream.num_channels} ({stream.height * stream.width * stream.num_channels} total number of pixels) structured in {stream.tile_height}x{stream.tile_width} tiles")
Image has dimensions 475x640x3 (912000 total number of pixels) structured in 475x640 tiles

Inspect color specification properties for information about the color space of images.

[37]:
jpeg_stream = nvimgcodec.CodeStream(resources_dir + "tabby_tiger_cat.jpg")
print(f"JPEG color spec: {jpeg_stream.color_spec}")

jp2_stream_rgb = nvimgcodec.CodeStream(resources_dir + "cat-1046544_640.jp2")
print(f"JPEG2000 with sRGB color spec: {jp2_stream_rgb.color_spec}")

jp2_stream_ycbcr  = nvimgcodec.CodeStream(resources_dir + "cat-300572_640_ycbcr.tiff")
print(f"Tiff with YCbCr color spec: {jp2_stream_ycbcr.color_spec}")

JPEG color spec: ColorSpec.SYCC
JPEG2000 with sRGB color spec: ColorSpec.SRGB
Tiff with YCbCr color spec: ColorSpec.SYCC

Using Color Spec when decoding#

The color_spec parameter can be used in DecodeParams to control how images are processed during decoding.

[38]:
cs = nvimgcodec.CodeStream(resources_dir + "cat-1046544_640.jp2")
print(f"Color spec of the source: {cs.color_spec}")

# Decode with sRGB (default behavior) - equivalent to define params=nvimgcodec.DecodeParams(color_spec = nvimgcodec.ColorSpec.SRGB)
img_srgb = decoder.decode(cs)
print(f"sRGB decode color spec: {img_srgb.color_spec}")

# Demonstrate grayscale conversion
img_gray = decoder.decode(cs, params=nvimgcodec.DecodeParams(
    color_spec=nvimgcodec.ColorSpec.GRAY
))
print(f"Grayscale decode color spec: {img_gray.color_spec}")

# Show visual comparison
fig, axes = plt.subplots(1, 2, figsize=(12, 6))
axes[0].imshow(img_srgb.cpu())
axes[0].set_title("Decoded as sRGB (default)")
axes[0].axis('off')
axes[1].imshow(img_gray.cpu(), cmap='gray')
axes[1].set_title("Decoded as Grayscale")
axes[1].axis('off')
plt.tight_layout()
plt.show()

Color spec of the source: ColorSpec.SRGB
sRGB decode color spec: ColorSpec.SRGB
Grayscale decode color spec: ColorSpec.GRAY
../_images/samples_nvimgcodec_73_1.png

Decoding with UNCHANGED color spec (preserve original color space). Please note that in case of GRAY input when decoding with UNCHANGED color space, shape is different (has only one channel)

[39]:
cs = nvimgcodec.CodeStream(resources_dir + "cat-111793_640-16bit-gray.jp2")
print(f"Color spec of the source: {cs.color_spec}")

# Decode with sRGB (default behavior)
img_srgb = decoder.decode(cs)
print(f"sRGB decode color spec: {img_srgb.color_spec}")
print(f"Default sRGB decoded shape: {img_srgb.shape}")

img_unchanged = decoder.decode(cs, params=nvimgcodec.DecodeParams(color_spec = nvimgcodec.ColorSpec.UNCHANGED))
print(f"Unchanged decode color spec: {img_unchanged.color_spec}")
print(f"Unchanged decoded shape: {img_unchanged.shape}")


Color spec of the source: ColorSpec.GRAY
sRGB decode color spec: ColorSpec.SRGB
Default sRGB decoded shape: (426, 640, 3)
Unchanged decode color spec: ColorSpec.GRAY
Unchanged decoded shape: (426, 640, 1)

For RGB input decoding with UNCHANGED color spec should not change anything.

[40]:
cs = nvimgcodec.CodeStream(resources_dir + "cat-1046544_640.jp2")
print(f"Color spec of the source: {cs.color_spec}")

# Decode with sRGB (default behavior)
img_srgb = decoder.decode(cs)
print(f"sRGB decode color spec: {img_srgb.color_spec}")
print(f"Default sRGB decoded shape: {img_srgb.shape}")

img_unchanged = decoder.decode(cs, params=nvimgcodec.DecodeParams(color_spec = nvimgcodec.ColorSpec.UNCHANGED))
print(f"Unchanged decode color spec: {img_unchanged.color_spec}")
print(f"Unchanged decoded shape: {img_unchanged.shape}")
Color spec of the source: ColorSpec.SRGB
sRGB decode color spec: ColorSpec.SRGB
Default sRGB decoded shape: (475, 640, 3)
Unchanged decode color spec: ColorSpec.SRGB
Unchanged decoded shape: (475, 640, 3)

Encode Jpeg2000 with tiles#

[41]:
# Encode JPEG2000 with tiling (e.g., 256x256 tiles)
jpeg2k_encode_params = nvimgcodec.Jpeg2kEncodeParams()
jpeg2k_encode_params.num_resolutions = 5
jpeg2k_encode_params.code_block_size = (64, 64)
jpeg2k_encode_params.bitstream_type = nvimgcodec.Jpeg2kBitstreamType.JP2
jpeg2k_encode_params.prog_order = nvimgcodec.Jpeg2kProgOrder.RPCL

# Set tile size (e.g., 256x256)
tile_width = 256
tile_height = 256

encoder.write(
    "cat-tiled-256x256.j2k",
    nv_img_jpg,
    params=nvimgcodec.EncodeParams(
        quality_type=nvimgcodec.QualityType.LOSSLESS,
        jpeg2k_encode_params=jpeg2k_encode_params,
        tile_width=tile_width,
        tile_height=tile_height
    )
)

# Parse information about the tiled JPEG2000 image
code_stream = nvimgcodec.CodeStream("cat-tiled-256x256.j2k")
print(f"Tile width: {code_stream.tile_width}")
print(f"Tile height: {code_stream.tile_height}")
print(f"Number of tiles (X): {code_stream.num_tiles_x}")
print(f"Number of tiles (Y): {code_stream.num_tiles_y}")

# Decode the tiled JPEG2000 image
image_tiled = decoder.decode(code_stream)

# Visualize the tiled JPEG2000 image
plt.imshow(image_tiled.cpu())
plt.title(f"JPEG2000 Tiled ({tile_width}x{tile_height})")
plt.show()

Tile width: 256
Tile height: 256
Number of tiles (X): 3
Number of tiles (Y): 3
../_images/samples_nvimgcodec_79_1.png