Brute Force

View as Markdown

Python module: cuvs.neighbors.brute_force

Index

1cdef class Index

Brute Force index object. This object stores the trained Brute Force which can be used to perform nearest neighbors searches.

Members

NameKind
trainedproperty

trained

1def trained(self)

build

@auto_sync_resources

1def build(dataset, metric="sqeuclidean", metric_arg=2.0, resources=None)

Build the Brute Force index from the dataset for efficient search.

Parameters

NameTypeDescription
datasetCUDA array interface compliant matrix shape (n_samples, dim)Supported dtype [float32, float16]
metricDistance metric to use. Default is sqeuclidean
metric_argvalue of 'p' for Minkowski distances
resourcescuvs.common.Resources, optional

Returns

NameTypeDescription
indexcuvs.neighbors.brute_force.Index

Examples

1>>> import cupy as cp
2>>> from cuvs.neighbors import brute_force
3>>> n_samples = 50000
4>>> n_features = 50
5>>> n_queries = 1000
6>>> k = 10
7>>> dataset = cp.random.random_sample((n_samples, n_features),
8... dtype=cp.float32)
9>>> index = brute_force.build(dataset, metric="cosine")
10>>> distances, neighbors = brute_force.search(index, dataset, k)
11>>> distances = cp.asarray(distances)
12>>> neighbors = cp.asarray(neighbors)

@auto_sync_resources @auto_convert_output

1def search(Index index, queries, k, neighbors=None, distances=None, resources=None, prefilter=None)

Find the k nearest neighbors for each query.

Parameters

NameTypeDescription
indexIndexTrained Brute Force index.
queriesCUDA array interface compliant matrix shape (n_samples, dim)Supported dtype [float32, float16]
kintThe number of neighbors.
neighborsOptional CUDA array interface compliant matrix shape(n_queries, k), dtype int64_t. If supplied, neighbor indices will be written here in-place. (default None)
distancesOptional CUDA array interface compliant matrix shape(n_queries, k) If supplied, the distances to the neighbors will be written here in-place. (default None)
prefilterOptional, cuvs.neighbors.cuvsFilterAn optional filter to exclude certain query-neighbor pairs using a bitmap or bitset. The filter function should have a row-major layout with logical shape (n_prefilter_rows, n_samples), where:
- n_prefilter_rows == n_queries when using a bitmap filter.
- n_prefilter_rows == 1 when using a bitset prefilter. Each bit in n_samples determines whether queries[i] should be considered for distance computation with the index. (default None)
resourcescuvs.common.Resources, optional

Examples

1>>> # Example without pre-filter
2>>> import cupy as cp
3>>> from cuvs.neighbors import brute_force
4>>> n_samples = 50000
5>>> n_features = 50
6>>> n_queries = 1000
7>>> dataset = cp.random.random_sample((n_samples, n_features),
8... dtype=cp.float32)
9>>> # Build index
10>>> index = brute_force.build(dataset, metric="sqeuclidean")
11>>> # Search using the built index
12>>> queries = cp.random.random_sample((n_queries, n_features),
13... dtype=cp.float32)
14>>> k = 10
15>>> # Using a pooling allocator reduces overhead of temporary array
16>>> # creation during search. This is useful if multiple searches
17>>> # are performed with same query size.
18>>> distances, neighbors = brute_force.search(index, queries, k)
19>>> neighbors = cp.asarray(neighbors)
20>>> distances = cp.asarray(distances)
1>>> # Example with pre-filter
2>>> import numpy as np
3>>> import cupy as cp
4>>> from cuvs.neighbors import brute_force, filters
5>>> n_samples = 50000
6>>> n_features = 50
7>>> n_queries = 1000
8>>> dataset = cp.random.random_sample((n_samples, n_features),
9... dtype=cp.float32)
10>>> # Build index
11>>> index = brute_force.build(dataset, metric="sqeuclidean")
12>>> # Search using the built index
13>>> queries = cp.random.random_sample((n_queries, n_features),
14... dtype=cp.float32)
15>>> # Build filters
16>>> n_bitmap = np.ceil(n_samples * n_queries / 32).astype(int)
17>>> # Create your own bitmap as the filter by replacing the random one.
18>>> bitmap = cp.random.randint(1, 100, size=(n_bitmap,), dtype=cp.uint32)
19>>> bitmap_prefilter = filters.from_bitmap(bitmap)
20>>>
21>>> # or Build bitset prefilter:
22>>> # n_bitset = np.ceil(n_samples * 1 / 32).astype(int)
23>>> # # Create your own bitset as the filter by replacing the random one.
24>>> # bitset = cp.random.randint(1, 100, size=(n_bitset,), dtype=cp.uint32)
25>>> # bitset_prefilter = filters.from_bitset(bitset)
26>>>
27>>> k = 10
28>>> # Using a pooling allocator reduces overhead of temporary array
29>>> # creation during search. This is useful if multiple searches
30>>> # are performed with same query size.
31>>> distances, neighbors = brute_force.search(index, queries, k,
32... prefilter=bitmap_prefilter)
33>>> neighbors = cp.asarray(neighbors)
34>>> distances = cp.asarray(distances)

save

@auto_sync_resources

1def save(filename, Index index, bool include_dataset=True, resources=None)

Saves the index to a file.

The serialization format can be subject to changes, therefore loading an index saved with a previous version of cuvs is not guaranteed to work.

Parameters

NameTypeDescription
filenamestringName of the file.
indexIndexTrained Brute Force index.
resourcescuvs.common.Resources, optional

Examples

1>>> import cupy as cp
2>>> from cuvs.neighbors import brute_force
3>>> n_samples = 50000
4>>> n_features = 50
5>>> dataset = cp.random.random_sample((n_samples, n_features),
6... dtype=cp.float32)
7>>> # Build index
8>>> index = brute_force.build(dataset)
9>>> # Serialize and deserialize the brute_force index built
10>>> brute_force.save("my_index.bin", index)
11>>> index_loaded = brute_force.load("my_index.bin")

load

@auto_sync_resources

1def load(filename, resources=None)

Loads index from file.

The serialization format can be subject to changes, therefore loading an index saved with a previous version of cuvs is not guaranteed to work.

Parameters

NameTypeDescription
filenamestringName of the file.
resourcescuvs.common.Resources, optional

Returns

NameTypeDescription
indexIndex

Examples

1>>> import cupy as cp
2>>> from cuvs.neighbors import brute_force
3>>> n_samples = 50000
4>>> n_features = 50
5>>> dataset = cp.random.random_sample((n_samples, n_features),
6... dtype=cp.float32)
7>>> # Build index
8>>> index = brute_force.build(dataset)
9>>> # Serialize and deserialize the brute_force index built
10>>> brute_force.save("my_index.bin", index)
11>>> index_loaded = brute_force.load("my_index.bin")