nvCOMP Python API Basics#

[1]:
import numpy as np
import cupy as cp

Download example files

[2]:
import urllib.request
urllib.request.urlretrieve("http://textfiles.com/etext/NONFICTION/locke-essay-113.txt", "locke-essay-113.txt")
urllib.request.urlretrieve("http://textfiles.com/etext/FICTION/mobydick.txt", "mobydick.txt")
[2]:
('mobydick.txt', <http.client.HTTPMessage at 0x7f9022b42ef0>)

Import nvComp python module and check versions#

[3]:
from nvidia import nvcomp
print("nvcomp version:", nvcomp.__version__)
print("nvcomp cuda version:", nvcomp.__cuda_version__)
nvcomp version: 4.0.0
nvcomp cuda version: 12030

Zero-copy import host array#

[4]:
ascending = np.arange(0, 4096, dtype=np.int32)
nvarr_h = nvcomp.as_array(ascending)
[5]:
print(ascending.__array_interface__)
print(nvarr_h.__array_interface__)
print(nvarr_h.__cuda_array_interface__)
print(nvarr_h.buffer_size)
print(nvarr_h.buffer_kind)
print(nvarr_h.ndim)
print(nvarr_h.dtype)
print(nvarr_h.shape)
print(nvarr_h.strides)
print(nvarr_h.item_size)
print(nvarr_h.size)

{'data': (94335900832880, False), 'strides': None, 'descr': [('', '<i4')], 'typestr': '<i4', 'shape': (4096,), 'version': 3}
{'shape': (4096,), 'strides': None, 'typestr': '<i4', 'data': (94335900832880, False), 'version': 3}
{'shape': (4096,), 'strides': None, 'typestr': '<i4', 'data': (94335900832880, False), 'version': 3, 'stream': 1}
16384
ArrayBufferKind.STRIDED_HOST
1
int32
(4096,)
(4,)
4
4096

Zero-copy import device array#

[6]:
data_gpu = cp.array(ascending)
nvarr_d = nvcomp.as_array(data_gpu)
print(data_gpu.__cuda_array_interface__)
print(nvarr_d.__cuda_array_interface__)
print(nvarr_d.buffer_kind)
print(nvarr_d.ndim)
print(nvarr_d.dtype)
print(nvarr_d.shape)
print(nvarr_d.strides)
print(nvarr_d.item_size)
print(nvarr_d.size)
{'shape': (4096,), 'typestr': '<i4', 'descr': [('', '<i4')], 'stream': 1, 'version': 3, 'strides': None, 'data': (34472984576, False)}
{'shape': (4096,), 'strides': None, 'typestr': '<i4', 'data': (34472984576, False), 'version': 3, 'stream': 1}
ArrayBufferKind.STRIDED_DEVICE
1
int32
(4096,)
(4,)
4
4096

Convert host array to device array#

[7]:
nvarr_d_cnv = nvarr_h.cuda()
print(nvarr_d_cnv.__cuda_array_interface__)
{'shape': (4096,), 'strides': None, 'typestr': '<i4', 'data': (47244640256, False), 'version': 3, 'stream': 1}

Convert device array to host array#

[8]:
nvarr_h_cnv = nvarr_d.cpu()
print(nvarr_h_cnv.__array_interface__)
{'shape': (4096,), 'strides': None, 'typestr': '<i4', 'data': (12960415744, False), 'version': 3}

Encode single array#

Read text file

[ ]:
with open('mobydick.txt', "rb") as f: text = f.read()

as_array supports python buffer protocol so we can pass text directly to it

[ ]:
nvarr_txt_h = nvcomp.as_array(text)
print (nvarr_txt_h.__array_interface__)
{'shape': (1205404,), 'strides': None, 'typestr': '|u1', 'data': (94335916846944, False), 'version': 3}

Transfer to Device

[ ]:
nvarr_txt_d = nvarr_txt_h.cuda()
print(nvarr_txt_d.__cuda_array_interface__)
{'shape': (1205404,), 'strides': None, 'typestr': '|u1', 'data': (47244656640, False), 'version': 3, 'stream': 1}

Create Codec

[ ]:
lz4_codec = nvcomp.Codec(algorithm="LZ4")

Encode

[13]:
lz4_comp_arr = lz4_codec.encode(nvarr_txt_d)
[14]:
print(lz4_comp_arr.__cuda_array_interface__)
print(lz4_comp_arr.buffer_kind)
{'shape': (824829,), 'strides': None, 'typestr': '|u1', 'data': (47248921600, False), 'version': 3, 'stream': 94335914071776}
ArrayBufferKind.STRIDED_DEVICE

Array supports python buffer protocol so we can pass it to write function directly

[15]:
with  open('mobydick.lz4', "wb") as f:  f.write(lz4_comp_arr.cpu())

Passing directly objects with standard interfaces to encode function

[16]:
lz4_comp_arr = lz4_codec.encode(text)

Decode single array#

[17]:
nv_dec_array = lz4_codec.decode(lz4_comp_arr)
[18]:
print(nv_dec_array.__cuda_array_interface__)
print(nv_dec_array.buffer_kind)
{'shape': (1205404,), 'strides': None, 'typestr': '|i1', 'data': (47253028864, False), 'version': 3, 'stream': 94335914071776}
ArrayBufferKind.STRIDED_DEVICE

Compare decoded array with original and print first 400 positions of decoded array

[19]:
print("Is decoded equal to original?", bytes(nv_dec_array.cpu()) ==  bytes(nvarr_txt_h))
print(bytes(nv_dec_array.cpu())[:400].decode())
Is decoded equal to original? True
Preliminary Matter.

This text of Melville's Moby-Dick is based on the Hendricks House edition.
It was prepared by Professor Eugene F. Irey at the University of Colorado.
Any subsequent copies of this data must include this notice
and any publications resulting from analysis of this data must
include reference to Professor Irey's work.

Etymology  (Supplied by a late consumptive usher to a gra

Encode and decode with ANS codec, specified chunk size and checksum policy

[20]:
ans_codec = nvcomp.Codec(algorithm="ANS", chunk_size=20, checksum_policy = nvcomp.ChecksumPolicy.COMPUTE_AND_VERIFY)
ans_comp_arr = ans_codec.encode(nvarr_d)

Define decode output type

[21]:
ans_deco_arr_uint8 = ans_codec.decode(ans_comp_arr)
ans_deco_arr_uint32 = ans_codec.decode(ans_comp_arr, '<u4')

print(ans_deco_arr_uint8.dtype)
print(ans_deco_arr_uint32.dtype)
uint8
uint32

Codec specific options#

[22]:
gdeflate_ht_codec = nvcomp.Codec(algorithm="GDeflate", algorithm_type = 0)
gdeflate_lt_codec = nvcomp.Codec(algorithm="Gdeflate", algorithm_type = 1)

[23]:
%%timeit
gdeflate_ht_comp_arr = gdeflate_ht_codec.encode(text[:4096])
300 µs ± 50.2 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
[24]:
%%timeit
gdeflate_lt_comp_arr = gdeflate_lt_codec.encode(text[:4096])
895 µs ± 24.4 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
[25]:
gdeflate_ht_comp_arr = gdeflate_ht_codec.encode(text[:4096])
gdeflate_lt_comp_arr = gdeflate_lt_codec.encode(text[:4096])
print("high-throughput, low compression ratio (default) - compressed size:", gdeflate_ht_comp_arr.size)
print("low-throughput, high compression ratio - compressed size:", gdeflate_lt_comp_arr.size)
high-throughput, low compression ratio (default) - compressed size: 2664
low-throughput, high compression ratio - compressed size: 2520

Encode single array with multiple codecs#

[26]:
print("Uncompressed size is", nvarr_txt_d.buffer_size)
alogs = ["LZ4", "Snappy", "GDeflate", "Deflate", "Bitcomp", "ANS", "Zstd",  "Cascaded"]
encoded_files = []
for algorithm in alogs:
    codec = nvcomp.Codec(algorithm=algorithm)
    com_arr = codec.encode(nvarr_txt_d)
    print("Compressed size for", algorithm, "is", com_arr.buffer_size, "({:.1%})".format(com_arr.buffer_size/nvarr_txt_d.buffer_size) )
    with  open('mobydick.%s'% algorithm, "wb") as f:  f.write(com_arr.cpu())
    encoded_files.append('mobydick.%s'% algorithm)
Uncompressed size is 1205404
Compressed size for LZ4 is 824829 (68.4%)
Compressed size for Snappy is 863151 (71.6%)
Compressed size for GDeflate is 622632 (51.7%)
Compressed size for Deflate is 619051 (51.4%)
Compressed size for Bitcomp is 986776 (81.9%)
Compressed size for ANS is 737804 (61.2%)
Compressed size for Zstd is 540745 (44.9%)
Compressed size for Cascaded is 1205948 (100.0%)

Decoding single arrays of various formats#

[27]:
codec = nvcomp.Codec()
for file_name in encoded_files:
    print("Decoding", file_name,)
    with open(file_name, "rb") as f: comp_bytes = f.read()
    nv_dec_d = codec.decode(comp_bytes) # since it supports buffer protocol we can pass comp_bytes directly
    print ("is equal to original? -", bytes(nv_dec_d.cpu()) ==  bytes(nvarr_txt_h))
Decoding mobydick.LZ4
is equal to original? - True
Decoding mobydick.Snappy
is equal to original? - True
Decoding mobydick.GDeflate
is equal to original? - True
Decoding mobydick.Deflate
is equal to original? - True
Decoding mobydick.Bitcomp
is equal to original? - True
Decoding mobydick.ANS
is equal to original? - True
Decoding mobydick.Zstd
is equal to original? - True
Decoding mobydick.Cascaded
is equal to original? - True

Encoding and decoding with various Bitstream Kinds#

[28]:
print("Uncompressed size is", nvarr_txt_d.buffer_size)
algos = ["LZ4", "Snappy", "Bitcomp", "ANS", "Zstd",  "Cascaded"]
bitstreams = [
    nvcomp.BitstreamKind.NVCOMP_NATIVE,
    nvcomp.BitstreamKind.RAW,
    nvcomp.BitstreamKind.WITH_UNCOMPRESSED_SIZE
]

for algorithm in algos:
    for bitstream_kind in bitstreams:
        codec = nvcomp.Codec(algorithm=algorithm, bitstream_kind=bitstream_kind)
        comp_arr = codec.encode(nvarr_txt_d)
        comp_ratio = comp_arr.buffer_size/nvarr_txt_d.buffer_size
        print("Compressed size for", algorithm, "with bitstream", bitstream_kind, "is", comp_arr.buffer_size, "({:.1%})".format(comp_ratio))
        decomp_array = codec.decode(comp_arr)
        print ("is equal to original? -", bytes(decomp_array.cpu()) ==  bytes(nvarr_txt_d.cpu()))
Uncompressed size is 1205404
Compressed size for LZ4 with bitstream BitstreamKind.NVCOMP_NATIVE is 824829 (68.4%)
is equal to original? - True
Compressed size for LZ4 with bitstream BitstreamKind.RAW is 807075 (67.0%)
is equal to original? - True
Compressed size for LZ4 with bitstream BitstreamKind.WITH_UNCOMPRESSED_SIZE is 807079 (67.0%)
is equal to original? - True
Compressed size for Snappy with bitstream BitstreamKind.NVCOMP_NATIVE is 863143 (71.6%)
is equal to original? - True
Compressed size for Snappy with bitstream BitstreamKind.RAW is 854105 (70.9%)
is equal to original? - True
Compressed size for Snappy with bitstream BitstreamKind.WITH_UNCOMPRESSED_SIZE is 854113 (70.9%)
is equal to original? - True
Compressed size for Bitcomp with bitstream BitstreamKind.NVCOMP_NATIVE is 986776 (81.9%)
is equal to original? - True
Compressed size for Bitcomp with bitstream BitstreamKind.RAW is 985800 (81.8%)
is equal to original? - True
Compressed size for Bitcomp with bitstream BitstreamKind.WITH_UNCOMPRESSED_SIZE is 985808 (81.8%)
is equal to original? - True
Compressed size for ANS with bitstream BitstreamKind.NVCOMP_NATIVE is 737802 (61.2%)
is equal to original? - True
Compressed size for ANS with bitstream BitstreamKind.RAW is 680096 (56.4%)
is equal to original? - True
Compressed size for ANS with bitstream BitstreamKind.WITH_UNCOMPRESSED_SIZE is 680104 (56.4%)
is equal to original? - True
Compressed size for Zstd with bitstream BitstreamKind.NVCOMP_NATIVE is 540745 (44.9%)
is equal to original? - True
Compressed size for Zstd with bitstream BitstreamKind.RAW is 527380 (43.8%)
is equal to original? - True
Compressed size for Zstd with bitstream BitstreamKind.WITH_UNCOMPRESSED_SIZE is 527388 (43.8%)
is equal to original? - True
Compressed size for Cascaded with bitstream BitstreamKind.NVCOMP_NATIVE is 1205948 (100.0%)
is equal to original? - True
Compressed size for Cascaded with bitstream BitstreamKind.RAW is 1205412 (100.0%)
is equal to original? - True
Compressed size for Cascaded with bitstream BitstreamKind.WITH_UNCOMPRESSED_SIZE is 1205420 (100.0%)
is equal to original? - True

Batch encoding#

[29]:
nv_uncomp_arrays = []
for fn in ['mobydick.txt', 'locke-essay-113.txt']:
    with open(fn, "rb") as f: text = f.read()
    nv_uncomp_arrays.append( nvcomp.as_array(text).cuda())
[30]:
nv_comp_arrays = lz4_comp_arr = lz4_codec.encode(nv_uncomp_arrays)
[31]:
for i in range(len(nv_uncomp_arrays)):
    print(nv_uncomp_arrays[i].__cuda_array_interface__)
    print(nv_comp_arrays[i].__cuda_array_interface__)
{'shape': (1205404,), 'strides': None, 'typestr': '|u1', 'data': (47647160320, False), 'version': 3, 'stream': 1}
{'shape': (824829,), 'strides': None, 'typestr': '|u1', 'data': (47625135104, False), 'version': 3, 'stream': 94335914071776}
{'shape': (1605768,), 'strides': None, 'typestr': '|u1', 'data': (47648366080, False), 'version': 3, 'stream': 1}
{'shape': (978812,), 'strides': None, 'typestr': '|u1', 'data': (47247113728, False), 'version': 3, 'stream': 94335914071776}

Batch decoding#

[32]:
nv_dec_arrays = lz4_codec.decode(nv_comp_arrays)

Compare with original

[33]:
for i in range(len(nv_dec_arrays)):
    print("Is decoded equal to original?",  bytes(nv_uncomp_arrays[i].cpu()) ==  bytes(nv_dec_arrays[i].cpu()))
    print("\n", bytes(nv_dec_arrays[i].cpu())[:400].decode())
Is decoded equal to original? True

 Preliminary Matter.

This text of Melville's Moby-Dick is based on the Hendricks House edition.
It was prepared by Professor Eugene F. Irey at the University of Colorado.
Any subsequent copies of this data must include this notice
and any publications resulting from analysis of this data must
include reference to Professor Irey's work.

Etymology  (Supplied by a late consumptive usher to a gra
Is decoded equal to original? True

                                       1690

                    AN ESSAY CONCERNING HUMAN UNDERSTANDING

                                 by John Locke

                       TO THE RIGHT HONOURABLE

            LORD THOMAS, EARL OF PEMBROKE AND MONTGOMERY,

                      BARRON HERBERT OF CARDIFF,

      LORD ROSS, OF KENDAL, PAR, FITZHUGH, MARMION, ST. QUINTIN,

          AND SHURLAND;

Batch decoding and encoding various formats#

[34]:
algos = ["LZ4", "Snappy", "Bitcomp", "Cascaded", "Zstd", "ANS"]
bitstreams = [
    nvcomp.BitstreamKind.NVCOMP_NATIVE,
    nvcomp.BitstreamKind.RAW,
    nvcomp.BitstreamKind.WITH_UNCOMPRESSED_SIZE
]
for algorithm in algos:
    for bitstream_kind in bitstreams:
        print("Algorithm:", algorithm, "BitstreamKind:", bitstream_kind)
        codec = nvcomp.Codec(algorithm=algorithm, bitstream_kind=bitstream_kind)
        nv_comp_arrays = codec.encode(nv_uncomp_arrays)
        nv_dec_arrays = codec.decode(nv_comp_arrays)
        for i in range(len(nv_dec_arrays)):
            print(" - File #", i)
            print("   -- Uncompressed size:", nv_uncomp_arrays[i].buffer_size)
            print("   -- Compressed size:", nv_comp_arrays[i].buffer_size, "({:.1%})".format(nv_comp_arrays[i].buffer_size/nv_uncomp_arrays[i].buffer_size) )
            print("   -- Is decoded equal to original?",  bytes(nv_uncomp_arrays[i].cpu()) == bytes(nv_dec_arrays[i].cpu()))

Algorithm: LZ4 BitstreamKind: BitstreamKind.NVCOMP_NATIVE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 824829 (68.4%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 978812 (61.0%)
   -- Is decoded equal to original? True
Algorithm: LZ4 BitstreamKind: BitstreamKind.RAW
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 807075 (67.0%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 964181 (60.0%)
   -- Is decoded equal to original? True
Algorithm: LZ4 BitstreamKind: BitstreamKind.WITH_UNCOMPRESSED_SIZE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 807079 (67.0%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 964185 (60.0%)
   -- Is decoded equal to original? True
Algorithm: Snappy BitstreamKind: BitstreamKind.NVCOMP_NATIVE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 863151 (71.6%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 960232 (59.8%)
   -- Is decoded equal to original? True
Algorithm: Snappy BitstreamKind: BitstreamKind.RAW
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 854105 (70.9%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 945120 (58.9%)
   -- Is decoded equal to original? True
Algorithm: Snappy BitstreamKind: BitstreamKind.WITH_UNCOMPRESSED_SIZE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 854113 (70.9%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 945128 (58.9%)
   -- Is decoded equal to original? True
Algorithm: Bitcomp BitstreamKind: BitstreamKind.NVCOMP_NATIVE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 986776 (81.9%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 1300340 (81.0%)
   -- Is decoded equal to original? True
Algorithm: Bitcomp BitstreamKind: BitstreamKind.RAW
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 985800 (81.8%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 1299060 (80.9%)
   -- Is decoded equal to original? True
Algorithm: Bitcomp BitstreamKind: BitstreamKind.WITH_UNCOMPRESSED_SIZE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 985808 (81.8%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 1299068 (80.9%)
   -- Is decoded equal to original? True
Algorithm: Cascaded BitstreamKind: BitstreamKind.NVCOMP_NATIVE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 1205948 (100.0%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 1606456 (100.0%)
   -- Is decoded equal to original? True
Algorithm: Cascaded BitstreamKind: BitstreamKind.RAW
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 1205412 (100.0%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 1605776 (100.0%)
   -- Is decoded equal to original? True
Algorithm: Cascaded BitstreamKind: BitstreamKind.WITH_UNCOMPRESSED_SIZE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 1205420 (100.0%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 1605784 (100.0%)
   -- Is decoded equal to original? True
Algorithm: Zstd BitstreamKind: BitstreamKind.NVCOMP_NATIVE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 540745 (44.9%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 613497 (38.2%)
   -- Is decoded equal to original? True
Algorithm: Zstd BitstreamKind: BitstreamKind.RAW
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 527380 (43.8%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 596140 (37.1%)
   -- Is decoded equal to original? True
Algorithm: Zstd BitstreamKind: BitstreamKind.WITH_UNCOMPRESSED_SIZE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 527388 (43.8%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 596148 (37.1%)
   -- Is decoded equal to original? True
Algorithm: ANS BitstreamKind: BitstreamKind.NVCOMP_NATIVE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 737804 (61.2%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 957184 (59.6%)
   -- Is decoded equal to original? True
Algorithm: ANS BitstreamKind: BitstreamKind.RAW
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 680094 (56.4%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 879728 (54.8%)
   -- Is decoded equal to original? True
Algorithm: ANS BitstreamKind: BitstreamKind.WITH_UNCOMPRESSED_SIZE
 - File # 0
   -- Uncompressed size: 1205404
   -- Compressed size: 680102 (56.4%)
   -- Is decoded equal to original? True
 - File # 1
   -- Uncompressed size: 1605768
   -- Compressed size: 879736 (54.8%)
   -- Is decoded equal to original? True

Checksum example#

Checksums can be used only with nvcomp.BitstreamKind.NVCOMP_NATIVE bitstream kind. They are computed before the data is compressed and after it is decompressed and validates that the decompressed data is equal to original.

Checksum can be used to prevent silent corruption, which can happen when data is corrupted but the decoding finishes without errors or when there is a bug in encoding or decoding implementation

Following example will show how to use them. Create codec which computes and verifies checksums:

[35]:
codec = nvcomp.Codec(
    algorithm="GDeflate",
    bitstream_kind=nvcomp.BitstreamKind.NVCOMP_NATIVE,
    checksum_policy=nvcomp.ChecksumPolicy.COMPUTE_AND_VERIFY,
)
nv_comp_arrays = codec.encode(nv_uncomp_arrays)

Verify that there are no errors when data is correct:

[36]:
nv_dec_arrays = codec.decode(nv_comp_arrays)
for i in range(len(nv_dec_arrays)):
    print (f"Is array {i} equal to original? -", bytes(nv_dec_arrays[i].cpu()) ==  bytes(nv_uncomp_arrays[i].cpu()))
Is array 0 equal to original? - True
Is array 1 equal to original? - True

Introduce artificial error in data and decode

[37]:
array_with_error = 1

cupy_array = cp.asarray(nv_comp_arrays[array_with_error])
cupy_array[1000] = cupy_array[1000] ^ 176

nv_comp_arrays[array_with_error] = nvcomp.as_array(cupy_array)

nv_dec_arrays = codec.decode(nv_comp_arrays)

Check which arrays were affected in decoding (should be only one). Checksums are only validated during the first access to the data, any following access will skip that check.

[38]:
for i in range(len(nv_dec_arrays)):
    try:
        print (f"Is array {i} equal to original? -", bytes(nv_dec_arrays[i].cpu()) ==  bytes(nv_uncomp_arrays[i].cpu()))
    except RuntimeError as err:
        print(f"error with decoding array {i}: {err}")
        print(f"Rerunning: Is array {i} equal to original? -", bytes(nv_dec_arrays[i].cpu()) ==  bytes(nv_uncomp_arrays[i].cpu()))
Is array 0 equal to original? - True
error with decoding array 1: Checksum doesn't match.
Rerunning: Is array 1 equal to original? - False