nvImageCodec examples#
[1]:
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt
Setting resource folder
[2]:
resources_dir = os.getenv("PYNVIMGCODEC_EXAMPLES_RESOURCES_DIR", "../assets/images/")
Import nvImageCodec module and create Decoder and Encoder
[3]:
from nvidia import nvimgcodec
decoder = nvimgcodec.Decoder()
encoder = nvimgcodec.Encoder()
Load and decode Jpeg image with nvImageCodec
[4]:
with open(resources_dir + "tabby_tiger_cat.jpg", 'rb') as in_file:
data = in_file.read()
nv_img_cat = decoder.decode(data)
Save image to bmp file with nvImageCodec
[5]:
with open("cat-jpg-o.bmp", 'wb') as out_file:
data = encoder.encode(nv_img_cat, "bmp")
out_file.write(data)
Read back with OpenCV just saved (with nvImageCodec) bmp image
[6]:
cv_img_bmp = cv2.imread("cat-jpg-o.bmp")
cv_img_bmp = cv2.cvtColor(cv_img_bmp, cv2.COLOR_BGR2RGB)
plt.imshow(cv_img_bmp)
[6]:
<matplotlib.image.AxesImage at 0x7f71a6cc3d30>
Load and decode Jpeg2000 (in jp2 container) image with nvImageCodec in one read function
[7]:
nv_img = decoder.read(resources_dir + "cat-1046544_640.jp2")
Save image to jpg file with nvImageCodec in one write function
[8]:
encoder.write("cat-jp2-o.jpg", nv_img)
[8]:
'cat-jp2-o.jpg'
Read back with OpenCV just save (with nvImageCodec) bmp image
[9]:
image = cv2.imread("cat-jp2-o.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)
[9]:
<matplotlib.image.AxesImage at 0x7f718a705810>
Load jpg with nvImageCodec
[10]:
nv_img_jpg = decoder.read(resources_dir + "tabby_tiger_cat.jpg")
Save as Jpeg 2000 with nvImageCodec
[11]:
encoder.write("cat-jpg-o.j2k", nv_img_jpg)
cs = encoder.encode(nv_img_jpg, codec =".j2k")
Read back with OpenCV just saved (with nvImageCodec) j2k image
[12]:
image = cv2.imread("cat-jpg-o.j2k")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)
[12]:
<matplotlib.image.AxesImage at 0x7f718a773550>
Passing decoding parameters#
Decode jpeg with Exif orientation - by default it applies exif orientation
[13]:
nv_img_jpg = decoder.read(resources_dir+ "f-exif-8.jpg")
encoder.write("f-exif-8.bmp", nv_img_jpg)
image = cv2.imread("f-exif-8.bmp")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)
[13]:
<matplotlib.image.AxesImage at 0x7f718a634280>
Let assume we would like to ignore exif orientation
[14]:
dec_params = nvimgcodec.DecodeParams(apply_exif_orientation=False)
nv_img_jpg = decoder.read(resources_dir + "f-exif-8.jpg", params=dec_params)
encoder.write("f-wo-exif.bmp", nv_img_jpg)
image = cv2.imread("f-wo-exif.bmp")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)
[14]:
<matplotlib.image.AxesImage at 0x7f718a68ee90>
Passing encoding parameters#
Changing quality and chroma subsampling in jpeg
[15]:
nv_img_jpg = decoder.read(resources_dir + "tabby_tiger_cat.jpg")
enc_params = nvimgcodec.EncodeParams(quality_type=nvimgcodec.QualityType.QUALITY, quality_value=5, chroma_subsampling=nvimgcodec.ChromaSubsampling.CSS_GRAY)
encoder.write("cat-q5-gray.jpg", nv_img_jpg, params=enc_params)
image = cv2.imread("cat-q5-gray.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)
[15]:
<matplotlib.image.AxesImage at 0x7f718811c6a0>
Jpeg optimized huffman and progressive encoding
[16]:
nv_img_jpg = decoder.read(resources_dir + "tabby_tiger_cat.jpg")
encoder.write("cat-q75.jpg", nv_img_jpg, params=nvimgcodec.EncodeParams(quality_type=nvimgcodec.QualityType.QUALITY, quality_value=75))
encoder.write("cat-q75-optimized_huffman.jpg", nv_img_jpg, params=nvimgcodec.EncodeParams(
quality_type=nvimgcodec.QualityType.QUALITY,
quality_value=75,
jpeg_encode_params = nvimgcodec.JpegEncodeParams(optimized_huffman=True, progressive=True)
))
print("default huffman file size:", os.path.getsize("cat-q75.jpg"))
print("optimized huffman file size:", os.path.getsize(
"cat-q75-optimized_huffman.jpg"))
image = cv2.imread("cat-q75-optimized_huffman.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)
default huffman file size: 69729
optimized huffman file size: 66722
[16]:
<matplotlib.image.AxesImage at 0x7f71881b6410>
Jpeg2000 encode options overview
[17]:
nv_img_jpg = decoder.read(resources_dir + "tabby_tiger_cat.jpg")
encoder.write("cat-psnr25.j2k", nv_img_jpg, params=nvimgcodec.EncodeParams(quality_type=nvimgcodec.QualityType.PSNR, quality_value=25))
encoder.write("cat-lossless.j2k", nv_img_jpg, params=nvimgcodec.EncodeParams(quality_type=nvimgcodec.QualityType.LOSSLESS))
jpeg2k_encode_params = nvimgcodec.Jpeg2kEncodeParams()
jpeg2k_encode_params.num_resolutions = 2
jpeg2k_encode_params.code_block_size = (32, 32)
jpeg2k_encode_params.bitstream_type = nvimgcodec.Jpeg2kBitstreamType.JP2
jpeg2k_encode_params.prog_order = nvimgcodec.Jpeg2kProgOrder.LRCP
jpeg2k_encode_params.mct_mode = 1
encoder.write("cat-lossless-2decomps.j2k",
nv_img_jpg,
params=nvimgcodec.EncodeParams(
quality_type=nvimgcodec.QualityType.LOSSLESS,
jpeg2k_encode_params=jpeg2k_encode_params
)
)
jpeg2k_encode_params.ht = True
encoder.write("cat-lossless-2decomps-ht.j2k", nv_img_jpg, params=nvimgcodec.EncodeParams(
quality_type=nvimgcodec.QualityType.LOSSLESS,
jpeg2k_encode_params=jpeg2k_encode_params
))
print("lossy file size:", os.path.getsize("cat-psnr25.j2k"))
print("lossless file size:", os.path.getsize("cat-lossless.j2k"))
print("lossless 2 decomposition levels file size:", os.path.getsize("cat-lossless-2decomps.j2k"))
print("lossless 2 decomposition levels with High-Throughput file size:", os.path.getsize("cat-lossless-2decomps-ht.j2k"))
image = cv2.imread("cat-psnr25.j2k")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)
lossy file size: 2467
lossless file size: 598108
lossless 2 decomposition levels file size: 424868
lossless 2 decomposition levels with High-Throughput file size: 609293
[17]:
<matplotlib.image.AxesImage at 0x7f718a6521a0>
We can specify allowed backends used for decoding
[18]:
gpu_dec = nvimgcodec.Decoder(backends=[nvimgcodec.Backend(nvimgcodec.BackendKind.GPU_ONLY, load_hint=0.5), nvimgcodec.BackendKind.HYBRID_CPU_GPU])
cpu_dec = nvimgcodec.Decoder(backends=[nvimgcodec.BackendKind.CPU_ONLY])
[19]:
%%time
nv_img_j2k = cpu_dec.read(resources_dir + "cat-1046544_640.jp2")
CPU times: user 96.4 ms, sys: 668 μs, total: 97.1 ms
Wall time: 96.8 ms
[20]:
%%time
nv_img_j2k = gpu_dec.read(resources_dir + "cat-1046544_640.jp2")
CPU times: user 27.2 ms, sys: 0 ns, total: 27.2 ms
Wall time: 26.5 ms
The same way we can create Encoder with allowed backends.
[21]:
gpu_enc = nvimgcodec.Encoder(backends=[nvimgcodec.Backend(nvimgcodec.BackendKind.GPU_ONLY, load_hint=0.5)
, nvimgcodec.Backend(nvimgcodec.BackendKind.HYBRID_CPU_GPU)])
cpu_enc = nvimgcodec.Encoder(backends=[nvimgcodec.BackendKind.CPU_ONLY])
[22]:
cpu_enc.write("cat_cpu_out.jpg", nv_img_j2k)
[22]:
'cat_cpu_out.jpg'
[23]:
gpu_enc.write("cat_gpu_out.jpg", nv_img_j2k)
[23]:
'cat_gpu_out.jpg'
Support of __cuda_array_interface__#
[24]:
print(nv_img_j2k.__cuda_array_interface__)
print(nv_img_j2k.shape)
{'shape': (475, 640, 3), 'strides': None, 'typestr': '|u1', 'data': (13425796096, False), 'version': 3, 'stream': 1}
(475, 640, 3)
Support of __array_interface__#
It is possible to pass to nvImageCodec host ndarray by object which supports __array_interface__ as for example image (numpy.ndarray) created by OpenCV
[25]:
cv_img = cv2.imread(resources_dir + "Weimaraner.bmp")
cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
print(type(cv_img))
print(cv_img.__array_interface__)
nv_h_img = nvimgcodec.as_image(cv_img)
gpu_enc.write("Weimaraner_ai_out.jpg", nv_h_img)
image = cv2.imread("Weimaraner_ai_out.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)
<class 'numpy.ndarray'>
{'data': (118512496, False), 'strides': None, 'descr': [('', '|u1')], 'typestr': '|u1', 'shape': (720, 720, 3), 'version': 3}
[25]:
<matplotlib.image.AxesImage at 0x7f71647d0610>
If we use cpu() method of Image object it would create new Image with copied content to host buffer.
[26]:
nv_img = cpu_dec.read(resources_dir + "cat-1046544_640.jp2")
nv_h_img = nv_img.cpu()
Image with host buffer supports __array_interface__ (but can’t return a proper __cuda_array_interface__)
[27]:
print(nv_h_img.__array_interface__)
print(nv_h_img.__cuda_array_interface__)
{'shape': (475, 640, 3), 'strides': None, 'typestr': '|u1', 'data': (140123238825984, False), 'version': 3}
{'shape': (475, 640, 3), 'strides': None, 'typestr': '|u1', 'data': (140123238825984, False), 'version': 3, 'stream': 1}
so we can pass such Image to functions which accept and can use this interface like imshow from matplotlib library
[28]:
plt.imshow(nv_h_img)
[28]:
<matplotlib.image.AxesImage at 0x7f716461b820>
We can also create a zero-copy view of this image with numpy and process it with OpenCV
[29]:
np_img = np.asarray(nv_h_img)
kernel = np.ones((5, 5), np.float32)/25
dst = cv2.filter2D(np_img, -1, kernel)
plt.subplot(121), plt.imshow(np_img), plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122), plt.imshow(dst), plt.title('Averaging')
plt.xticks([]), plt.yticks([])
plt.show()
There is also method cuda() which can be used to convert an Image with a host buffer to an Image with copied contents to a device buffer.
[30]:
print(nv_h_img.__cuda_array_interface__)
nv_new_cuda_img = nv_h_img.cuda()
print(nv_new_cuda_img.__cuda_array_interface__)
{'shape': (475, 640, 3), 'strides': None, 'typestr': '|u1', 'data': (140123238825984, False), 'version': 3, 'stream': 1}
{'shape': (475, 640, 3), 'strides': None, 'typestr': '|u1', 'data': (13423328256, False), 'version': 3, 'stream': 1}
We can check whether Image keeps a host or a device buffer by reading the buffer_kind property
[31]:
print("Host image buffer kind: ", nv_h_img.buffer_kind)
print("Device image buffer kind: ", nv_new_cuda_img.buffer_kind)
Host image buffer kind: ImageBufferKind.STRIDED_HOST
Device image buffer kind: ImageBufferKind.STRIDED_DEVICE
Managing lifetime of decoder resources using “with” statement
[32]:
with nvimgcodec.Decoder() as decoder_2:
nv_img = decoder_2.read(resources_dir + "cat-1046544_640.jp2")
plt.imshow(nv_img.cpu())
Similarly for encoder resources
[33]:
with nvimgcodec.Encoder() as encoder_2:
encoder_2.write("cat-1046544_640_out.jpg", nv_img)
image = cv2.imread("cat-1046544_640_out.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image)
Parsing image information without decoding#
nvImageCodec allows to parse image information without having to decode the image. For this, we use the CodeStream entity
[34]:
stream = nvimgcodec.CodeStream(resources_dir + "cat-1046544_640.jp2")
print(stream)
CodeStream( codec_name=jpeg2k num_images=1 height=475 width=640 num_channels=3 dtype=uint8 precision=8 color_spec=1 sample_format=7 size=558133 capacity=558133 num_tiles_y=1 num_tiles_x=1 tile_height=475 tile_width=640)
An CodeStream object can be also created from an in-memory encoded stream (bytes or numpy array)
[35]:
with open(resources_dir + "cat-1046544_640.jp2", 'rb') as in_file:
data = in_file.read()
stream = nvimgcodec.CodeStream(data)
print(stream)
CodeStream( codec_name=jpeg2k num_images=1 height=475 width=640 num_channels=3 dtype=uint8 precision=8 color_spec=1 sample_format=7 size=558133 capacity=558133 num_tiles_y=1 num_tiles_x=1 tile_height=475 tile_width=640)
[36]:
print(f"Image has dimensions {stream.height}x{stream.width}x{stream.num_channels} ({stream.height * stream.width * stream.num_channels} total number of pixels) structured in {stream.tile_height}x{stream.tile_width} tiles")
Image has dimensions 475x640x3 (912000 total number of pixels) structured in 475x640 tiles
Inspect color specification properties for information about the color space of images.
[37]:
jpeg_stream = nvimgcodec.CodeStream(resources_dir + "tabby_tiger_cat.jpg")
print(f"JPEG color spec: {jpeg_stream.color_spec}")
jp2_stream_rgb = nvimgcodec.CodeStream(resources_dir + "cat-1046544_640.jp2")
print(f"JPEG2000 with sRGB color spec: {jp2_stream_rgb.color_spec}")
jp2_stream_ycbcr = nvimgcodec.CodeStream(resources_dir + "cat-300572_640_ycbcr.tiff")
print(f"Tiff with YCbCr color spec: {jp2_stream_ycbcr.color_spec}")
JPEG color spec: ColorSpec.SYCC
JPEG2000 with sRGB color spec: ColorSpec.SRGB
Tiff with YCbCr color spec: ColorSpec.SYCC
Using Color Spec when decoding#
The color_spec parameter can be used in DecodeParams to control how images are processed during decoding.
[38]:
cs = nvimgcodec.CodeStream(resources_dir + "cat-1046544_640.jp2")
print(f"Color spec of the source: {cs.color_spec}")
# Decode with sRGB (default behavior) - equivalent to define params=nvimgcodec.DecodeParams(color_spec = nvimgcodec.ColorSpec.SRGB)
img_srgb = decoder.decode(cs)
print(f"sRGB decode color spec: {img_srgb.color_spec}")
# Demonstrate grayscale conversion
img_gray = decoder.decode(cs, params=nvimgcodec.DecodeParams(
color_spec=nvimgcodec.ColorSpec.GRAY
))
print(f"Grayscale decode color spec: {img_gray.color_spec}")
# Show visual comparison
fig, axes = plt.subplots(1, 2, figsize=(12, 6))
axes[0].imshow(img_srgb.cpu())
axes[0].set_title("Decoded as sRGB (default)")
axes[0].axis('off')
axes[1].imshow(img_gray.cpu(), cmap='gray')
axes[1].set_title("Decoded as Grayscale")
axes[1].axis('off')
plt.tight_layout()
plt.show()
Color spec of the source: ColorSpec.SRGB
sRGB decode color spec: ColorSpec.SRGB
Grayscale decode color spec: ColorSpec.GRAY
Decoding with UNCHANGED color spec (preserve original color space). Please note that in case of GRAY input when decoding with UNCHANGED color space, shape is different (has only one channel)
[39]:
cs = nvimgcodec.CodeStream(resources_dir + "cat-111793_640-16bit-gray.jp2")
print(f"Color spec of the source: {cs.color_spec}")
# Decode with sRGB (default behavior)
img_srgb = decoder.decode(cs)
print(f"sRGB decode color spec: {img_srgb.color_spec}")
print(f"Default sRGB decoded shape: {img_srgb.shape}")
img_unchanged = decoder.decode(cs, params=nvimgcodec.DecodeParams(color_spec = nvimgcodec.ColorSpec.UNCHANGED))
print(f"Unchanged decode color spec: {img_unchanged.color_spec}")
print(f"Unchanged decoded shape: {img_unchanged.shape}")
Color spec of the source: ColorSpec.GRAY
sRGB decode color spec: ColorSpec.SRGB
Default sRGB decoded shape: (426, 640, 3)
Unchanged decode color spec: ColorSpec.GRAY
Unchanged decoded shape: (426, 640, 1)
For RGB input decoding with UNCHANGED color spec should not change anything.
[40]:
cs = nvimgcodec.CodeStream(resources_dir + "cat-1046544_640.jp2")
print(f"Color spec of the source: {cs.color_spec}")
# Decode with sRGB (default behavior)
img_srgb = decoder.decode(cs)
print(f"sRGB decode color spec: {img_srgb.color_spec}")
print(f"Default sRGB decoded shape: {img_srgb.shape}")
img_unchanged = decoder.decode(cs, params=nvimgcodec.DecodeParams(color_spec = nvimgcodec.ColorSpec.UNCHANGED))
print(f"Unchanged decode color spec: {img_unchanged.color_spec}")
print(f"Unchanged decoded shape: {img_unchanged.shape}")
Color spec of the source: ColorSpec.SRGB
sRGB decode color spec: ColorSpec.SRGB
Default sRGB decoded shape: (475, 640, 3)
Unchanged decode color spec: ColorSpec.SRGB
Unchanged decoded shape: (475, 640, 3)
Encode Jpeg2000 with tiles#
[41]:
# Encode JPEG2000 with tiling (e.g., 256x256 tiles)
jpeg2k_encode_params = nvimgcodec.Jpeg2kEncodeParams()
jpeg2k_encode_params.num_resolutions = 5
jpeg2k_encode_params.code_block_size = (64, 64)
jpeg2k_encode_params.bitstream_type = nvimgcodec.Jpeg2kBitstreamType.JP2
jpeg2k_encode_params.prog_order = nvimgcodec.Jpeg2kProgOrder.RPCL
# Set tile size (e.g., 256x256)
tile_width = 256
tile_height = 256
encoder.write(
"cat-tiled-256x256.j2k",
nv_img_jpg,
params=nvimgcodec.EncodeParams(
quality_type=nvimgcodec.QualityType.LOSSLESS,
jpeg2k_encode_params=jpeg2k_encode_params,
tile_width=tile_width,
tile_height=tile_height
)
)
# Parse information about the tiled JPEG2000 image
code_stream = nvimgcodec.CodeStream("cat-tiled-256x256.j2k")
print(f"Tile width: {code_stream.tile_width}")
print(f"Tile height: {code_stream.tile_height}")
print(f"Number of tiles (X): {code_stream.num_tiles_x}")
print(f"Number of tiles (Y): {code_stream.num_tiles_y}")
# Decode the tiled JPEG2000 image
image_tiled = decoder.decode(code_stream)
# Visualize the tiled JPEG2000 image
plt.imshow(image_tiled.cpu())
plt.title(f"JPEG2000 Tiled ({tile_width}x{tile_height})")
plt.show()
Tile width: 256
Tile height: 256
Number of tiles (X): 3
Number of tiles (Y): 3