cuStateVec Ex API Reference#

This reference describes all components of the cuStateVec Ex API.

cuStateVec Ex data types#

Opaque data structures#

`custatevecExDictionaryDescriptor_t`#

typedef struct custatevecExDictionary *custatevecExDictionaryDescriptor_t#: This descriptor holds a handle to a dictionary instance.

`custatevecExCommunicatorDescriptor_t`#

typedef struct custatevecExCommunicator_t *custatevecExCommunicatorDescriptor_t#

This descriptor holds a handle to an inter-process communication object.

This abstraction enables communication between processes for multi-process state vector operations.

`custatevecExStateVectorDescriptor_t`#

typedef struct custatevecExStateVector *custatevecExStateVectorDescriptor_t#: This descriptor holds a handle to a state vector instance.

`custatevecExSVUpdaterDescriptor_t`#

typedef struct custatevecExSVUpdater *custatevecExSVUpdaterDescriptor_t#: This descriptor holds a handle to an SVUpdater instance.

`custatevecExResourceManagerDescriptor_t`#

typedef struct custatevecExResourceManager *custatevecExResourceManagerDescriptor_t#: This descriptor holds a handle to a resource manager instance.

Note

Custom resource manager is not supported in this release.

Enumerators#

`custatevecExCommunicatorStatus_t`#

enum custatevecExCommunicatorStatus_t#

Status code returned by communicator method functions.

This status code is returned by communicator functions defined in custatevecEx_ext.h. This enum only implements the success code. Other status codes are implementation dependent.

Values:

enumerator CUSTATEVEC_EX_COMMUNICATOR_STATUS_SUCCESS = 0#: Operation completed successfully.

`custatevecExStateVectorCapability_t`#

enum custatevecExStateVectorCapability_t#

Bitmask that specifies state vector capability. This enum is reserved for future use.

Values:

enumerator CUSTATEVEC_EX_SV_CAPABILITY_NONE = 0#: No capability enabled

`custatevecExStateVectorDistributionType_t`#

enum custatevecExStateVectorDistributionType_t#

Enum that specifies the distribution type of state vector.

Values:

enumerator CUSTATEVEC_EX_SV_DISTRIBUTION_SINGLE_DEVICE = 0#: State vector on single device.

enumerator CUSTATEVEC_EX_SV_DISTRIBUTION_MULTI_DEVICE = 1#: State vector distributed to multiple devices.

enumerator CUSTATEVEC_EX_SV_DISTRIBUTION_MULTI_PROCESS = 2#: State vector distributed to multiple processes.

`custatevecExGlobalIndexBitClass_t`#

enum custatevecExGlobalIndexBitClass_t#

Communication method for global index bit operations in multi-process distributions.

Operations on global index bits require data transfers. This enum specifies the communication method to use.

Values:

enumerator CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_INTERPROC_P2P = 1#: Inter-process GPUDirect P2P.

enumerator CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_COMMUNICATOR = 2#: Communication via custatevecExCommunicator.

`custatevecExStateVectorProperty_t`#

enum custatevecExStateVectorProperty_t#

Specifies the name of state vector property.

Values:

enumerator CUSTATEVEC_EX_SV_PROP_DISTRIBUTION_TYPE = 0#: Returns custatevecExStateVectorDistributionType_t.

enumerator CUSTATEVEC_EX_SV_PROP_DATA_TYPE = 1#: Returns cudaDataType_t.

enumerator CUSTATEVEC_EX_SV_PROP_NUM_WIRES = 2#: Returns int32_t.

enumerator CUSTATEVEC_EX_SV_PROP_WIRE_ORDERING = 3#: Returns int32_t array.

enumerator CUSTATEVEC_EX_SV_PROP_NUM_LOCAL_WIRES = 4#: Returns int32_t.

enumerator CUSTATEVEC_EX_SV_PROP_NUM_DEVICE_SUBSVS = 5#: Returns int32_t.

enumerator CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES = 6#: Returns int32_t array.

`custatevecExPermutationType_t`#

enum custatevecExPermutationType_t#

Specifies the permutation type.

Values:

enumerator CUSTATEVEC_EX_PERMUTATION_SCATTER = 0#: Scatter permutation.

enumerator CUSTATEVEC_EX_PERMUTATION_GATHER = 1#: Gather permutation.

`custatevecExMatrixType_t`#

enum custatevecExMatrixType_t#

Specifies the type of matrix.

Values:

enumerator CUSTATEVEC_EX_MATRIX_DENSE = 1#: Dense matrix.

enumerator CUSTATEVEC_EX_MATRIX_DIAGONAL = 2#: Diagonal matrix.

enumerator CUSTATEVEC_EX_MATRIX_ANTI_DIAGONAL = 4#: Anti-diagonal matrix.

`custatevecExSVUpdaterConfigName_t`#

enum custatevecExSVUpdaterConfigName_t#

Specifies the configuration argument type of SVUpdater.

Values:

enumerator CUSTATEVEC_EX_SVUPDATER_CONFIG_MAX_NUM_HOST_THREADS = 0#: Number of host threads, int32_t.

enumerator CUSTATEVEC_EX_SVUPDATER_CONFIG_DENSE_FUSION_SIZE = 1#: Dense fusion size, int32_t.

enumerator CUSTATEVEC_EX_SVUPDATER_CONFIG_DIAGONAL_FUSION_SIZE = 2#: Diagonal fusion size, int32_t.

`custatevecExMemorySharingMethod_t`#

enum custatevecExMemorySharingMethod_t#

Specifies the method to share device virtual memory among processes.

Values:

enumerator CUSTATEVEC_EX_MEMORY_SHARING_METHOD_AUTODETECT = 0#: Auto-detect.

enumerator CUSTATEVEC_EX_MEMORY_SHARING_METHOD_NONE = 1#: No P2P memory sharing.

enumerator CUSTATEVEC_EX_MEMORY_SHARING_METHOD_FABRIC_HANDLE = 2#: Use FabricHandle.

enumerator CUSTATEVEC_EX_MEMORY_SHARING_METHOD_PIDFD = 3#: Use pidfd syscalls with POSIX file descriptor.

Structures#

`custatevecExSVUpdaterConfigItem_t`#

struct custatevecExSVUpdaterConfigItem_t#

Specifies the configuration item of SVUpdater.

Public Members

custatevecExSVUpdaterConfigName_t name#: Configuration name.

int32_t int32#: int32 value

char placeholder[32]#: Placeholder to keep 32 bytes for the value member.

union custatevecExSVUpdaterConfigItem_t::[anonymous] value#: Configuration value.

cuStateVec Ex functions#

Dictionary API#

`custatevecExDictionaryDestroy`#

custatevecStatus_t custatevecExDictionaryDestroy( custatevecExDictionaryDescriptor_t dictionary )#

Destroy dictionary instance.

custatevecExDictionaryDestroy() destroys dictionary instance. Dictionary is the object to hold key-value pairs.

Parameters:: dictionary – [in] dictionary descriptor instance

Communicator API#

`custatevecExCommunicatorInitialize`#

custatevecStatus_t custatevecExCommunicatorInitialize(

custatevecCommunicatorType_t communicatorType,

const char *libraryPath,

int *argc,

char ***argv,

custatevecExCommunicatorStatus_t *exCommStatus

)#

Initialize inter-process communication.

The communicator is the abstraction of inter-process communication in cuStateVec Ex API. This function initializes the underlying inter-process communication library and prepares it for creating communicator instances via custatevecExCommunicatorCreate().

The library provides two built-in communicator implementations for Open MPI and MPICH. To use these, specify CUSTATEVEC_COMMUNICATOR_TYPE_OPENMPI or CUSTATEVEC_COMMUNICATOR_TYPE_MPICH for communicatorType. The API loads the MPI library from the path specified by libraryPath. If null, it defaults to “libmpi.so” and follows the standard library search order.

The exCommStatus argument returns the result of the call to the init() method of the underlying implementation. For built-in communicators, it returns the return value of MPI_Init() as custatevecExCommunicatorStatus_t.

The built-in implementation skips calling MPI_Init() if MPI is already initialized. In this case, CUSTATEVEC_EX_COMMUNICATOR_STATUS_SUCCESS is returned to the exCommStatus argument.

To use other inter-process communication libraries, users can build a custom communicator. To use it, specify CUSTATEVEC_COMMUNICATOR_TYPE_EXTERNAL for communicatorType and provide the path to the custom communicator library in libraryPath. If libraryPath is null for external communicators, the API searches for the required symbols in the current process, allowing applications to provide their own communicator implementation without a separate shared library.

This API returns successfully if the specified shared object is properly loaded and the IPC library is properly initialized.

Note

This function is intended for application initialization. It should be called once per process before any other communicator operations.

Note

The communicator dynamically loads the library that provides inter-process communication features. During the lifetime of the application, only one library may be used. This API returns CUSTATEVEC_STATUS_ALREADY_INITIALIZED on successive calls after the first successful initialization. If an application directly links to an MPI library, the communicator should use the same library binary. Otherwise, this API may fail or the communicator may not function properly.

Parameters:

communicatorType – [in] communicator type
libraryPath – [in] path to the inter-process communication library (can be null)
argc – [inout] pointer to argument count
argv – [inout] pointer to argument vector
exCommStatus – [out] pointer to the variable that receives the status from communicator’s init() method

`custatevecExCommunicatorFinalize`#

custatevecStatus_t custatevecExCommunicatorFinalize( custatevecExCommunicatorStatus_t *exCommStatus )#

Finalize inter-process communication library.

This function finalizes the underlying inter-process communication library and releases all associated resources.

The exCommStatus argument returns the result of the call to the communicator provider’s finalize() method. For built-in communicators, it returns the return value of MPI_Finalize() as custatevecExCommunicatorStatus_t.

The built-in implementation skips calling MPI_Finalize() if MPI_Init() was not called by custatevecExCommunicatorInitialize(). In this case, CUSTATEVEC_EX_COMMUNICATOR_STATUS_SUCCESS is returned.

Note

This function is intended for application finalization. All communicator instances must be destroyed before calling this function.

Parameters:: exCommStatus – [out] pointer to the variable that receives the status from communicator’s finalize() method

`custatevecExCommunicatorGetSizeAndRank`#

custatevecStatus_t custatevecExCommunicatorGetSizeAndRank( int32_t *size, int32_t *rank, custatevecExCommunicatorStatus_t *exCommStatus )#

Get the global size and rank.

This function retrieves the number of processes and the rank of the calling process. These values are identical to those obtained from MPI_COMM_WORLD for MPI-based communicator.

The size argument returns the global number of processes. The rank argument returns the global rank of the calling process (0-indexed, range [0, size)). The exCommStatus argument returns the status of the communicator operations.

custatevecExCommunicatorInitialize() must be called before calling this function. If the IPC library is not initialized, this function returns CUSTATEVEC_STATUS_NOT_INITIALIZED.

Parameters:

size – [out] pointer to the variable that receives the global number of processes
rank – [out] pointer to the variable that receives the global rank of the calling process
exCommStatus – [out] pointer to the variable that receives the status from communicator operations

`custatevecExCommunicatorCreate`#

custatevecStatus_t custatevecExCommunicatorCreate( custatevecExCommunicatorDescriptor_t *exCommunicator )#

Create communicator instance.

This function creates a communicator instance using the IPC library initialized by custatevecExCommunicatorInitialize().

custatevecExCommunicatorInitialize() must be called before calling this function. If the IPC library is not initialized, this function returns CUSTATEVEC_STATUS_NOT_INITIALIZED.

Parameters:: exCommunicator – [out] pointer to the variable that receives the created communicator instance

`custatevecExCommunicatorDestroy`#

custatevecStatus_t custatevecExCommunicatorDestroy( custatevecExCommunicatorDescriptor_t exCommunicator )#

Destroy communicator instance.

This function destroys a communicator instance and releases all associated resources.

Parameters:: exCommunicator – [in] communicator instance to destroy

StateVector API#

`custatevecExConfigureStateVectorSingleDevice`#

custatevecStatus_t custatevecExConfigureStateVectorSingleDevice( custatevecExDictionaryDescriptor_t *svConfig, cudaDataType_t svDataType, int32_t numWires, int32_t numDeviceWires, int32_t deviceId, uint32_t capability )#

Create configuration for single device state vector.

This function creates a dictionary that holds state vector configuration for a single device state vector according to the given set of arguments. The state vector will be allocated on the single device specified by deviceId.

The numWires argument specifies the number of wires of state vector, and the numDeviceWires argument specifies the number of wires allocated on the device.

The capability argument is to enable optional features. The value is specified as a bit-wise OR of custatevecExStateVectorCapability_t. As the present version does not have any capability defined, the value should be 0.

For the present release, the same value should be specified to the numWires and numDeviceWires arguments. These two arguments are declared for a future extension.

Note

This function creates a logical configuration and does not validate actual system or hardware requirements (e.g., non-existent deviceId, required memory capacity). Validation occurs when calling custatevecExStateVectorCreateSingleProcess() to create the state vector instance.

Parameters:

svConfig – [out] dictionary instance that holds state vector configuration
svDataType – [in] state vector data type
numWires – [in] number of wires of state vector
numDeviceWires – [in] number of wires of state vector on device
deviceId – [in] device id where the entire state vector will be allocated
capability – [in] bit mask to specify optional features of state vector

`custatevecExConfigureStateVectorMultiDevice`#

custatevecStatus_t custatevecExConfigureStateVectorMultiDevice( custatevecExDictionaryDescriptor_t *svConfig, cudaDataType_t svDataType, int32_t numWires, int32_t numDeviceWires, const int32_t *deviceIds, int32_t numDevices, custatevecDeviceNetworkType_t networkType, uint32_t capability )#

Create configuration for multi-device state vector.

This function creates a dictionary that holds state vector configuration for multi-device state vector within a single process. The state vector is distributed across multiple devices specified by deviceIds. The specified devices should be able to communicate with each other by GPUDirect P2P.

The numDevices should be a power-of-two value. The following relationship should be satisfied:

numWires = log2(numDevices) + numDeviceWires

The networkType argument specifies the device interconnect topology. Use CUSTATEVEC_DEVICE_NETWORK_TYPE_SWITCH for GPUs connected by switches (e.g., via NVSwitch or PCIe switch) or CUSTATEVEC_DEVICE_NETWORK_TYPE_FULLMESH for direct all-to-all connectivity (e.g., direct NVLink mesh).

The capability argument enables optional features. The value should be 0 as no capabilities are currently defined.

Note

This function creates a logical configuration and does not validate actual system or hardware requirements (e.g., non-existent deviceIds, P2P capability, required memory capacity). Validation occurs when calling custatevecExStateVectorCreateSingleProcess() to create the state vector instance.

Parameters:

svConfig – [out] dictionary instance that holds state vector configuration
svDataType – [in] state vector data type
numWires – [in] number of wires of state vector
numDeviceWires – [in] number of wires of state vector on each device
deviceIds – [in] host pointer to an array of device ids
numDevices – [in] number of devices
networkType – [in] device network topology type
capability – [in] bit mask to specify optional features of state vector

`custatevecExConfigureStateVectorMultiProcess`#

custatevecStatus_t custatevecExConfigureStateVectorMultiProcess( custatevecExDictionaryDescriptor_t *svConfig, cudaDataType_t svDataType, int32_t numWires, int32_t numDeviceWires, int32_t deviceId, custatevecExMemorySharingMethod_t memorySharingMethod, const custatevecExGlobalIndexBitClass_t *globalIndexBitClasses, const int32_t *numGlobalIndexBitsPerLayer, int32_t numGlobalIndexBitLayers, size_t transferWorkspaceSizeInBytes, const void *auxConfig, uint32_t capability )#

Create configuration for multi-process distributed state vector.

This function creates a dictionary that holds state vector configuration for multi-process distributed state vector according to the given set of arguments. In this configuration, the state vector is distributed across multiple processes, with each process owning a single device to allocate one sub state vector.

The numWires argument specifies the number of wires of state vector, and the numDeviceWires argument specifies the number of wires allocated on each device. The following relationship should be satisfied where numProcesses is the number of processes to which state vector is distributed.

numWires = log2(numProcesses) + numDeviceWires

The deviceId argument specifies the device ID where the sub state vector will be allocated. If set to -1, the device ID will be dynamically assigned during state vector creation using the formula: deviceId = processRank % numDevicesInHardwareNode, where processRank is obtained from the communicator and numDevicesInHardwareNode is the total number of CUDA devices available in the hardware node.

The memorySharingMethod argument specifies the method for sharing GPU virtual device memory between processes by inter-process GPUDirect P2P. Virtual device memory is utilized to allocate the device memory for state vector. If CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_INTERPROC_P2P is specified in globalIndexBitClasses, it’s required to specify an available memory sharing method. Use CUSTATEVEC_EX_MEMORY_SHARING_METHOD_AUTODETECT for automatic selection, or specify a particular method when the suitable sharing method is known. CUSTATEVEC_EX_MEMORY_SHARING_METHOD_FABRIC_HANDLE is the requirement to enable multi-node NVLink. CUSTATEVEC_EX_MEMORY_SHARING_METHOD_PIDFD uses POSIX file descriptors to export and import virtual device memory among processes. All GPUs that are assigned to distributed processes should be in a single hardware node due to the limitation of POSIX file descriptor sharing.

If CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_INTERPROC_P2P is not specified in globalIndexBitClasses, use CUSTATEVEC_EX_MEMORY_SHARING_METHOD_NONE that indicates no memory sharing is required. If another memory sharing method is specified, it will be silently ignored.

The process-to-process communication is organized in layers that correspond to network hierarchy. Each layer represents a different communication datalink (e.g., PCIe Switch, NVLink with NVSwitch) between processes with appropriate communication methods.

The globalIndexBitClasses, numGlobalIndexBitsPerLayer, and numGlobalIndexBitLayers arguments specify the communication datalink in each network layer:

globalIndexBitClasses[i] specifies the communication class for layer i, which should be one of custatevecExGlobalIndexBitClass_t values (network topology types, P2P variants, or communicator).
numGlobalIndexBitsPerLayer[i] specifies the number of global index bits assigned to layer i.
numGlobalIndexBitLayers specifies the total number of layers. The max acceptable number is 8.

The sum of all elements in numGlobalIndexBitsPerLayer should equal log2(numProcesses).

The transferWorkspaceSizeInBytes argument specifies the size in bytes of the device buffer allocated for inter-process data transfers. This device buffer serves as workspace during communication between processes. The API may internally adjust the specified value to the closest power-of-two number that is smaller than or equal to the given value. Larger buffer sizes can improve performance depending on the system hardware configuration. If the specified size is too small for optimal performance, the API will automatically increase it to an appropriate value.

The auxConfig argument specifies auxiliary configuration setup. No auxiliary configuration is currently defined; this argument must be NULL.

Note

This function creates a logical configuration and does not validate actual system or hardware requirements (e.g., non-existent deviceId, memory sharing method availability, required memory capacity). Validation occurs when calling custatevecExStateVectorCreateMultiProcess() to create the state vector instance, which may return CUSTATEVEC_STATUS_INVALID_CONFIGURATION if the system does not support the requested configuration.

Parameters:

svConfig – [out] dictionary instance that holds state vector configuration
svDataType – [in] state vector data type
numWires – [in] number of wires of state vector
numDeviceWires – [in] number of wires of state vector on each device
deviceId – [in] device id for this process
memorySharingMethod – [in] method for sharing GPU virtual device memory between processes
globalIndexBitClasses – [in] host pointer to an array of global index bit classes for each layer
numGlobalIndexBitsPerLayer – [in] host pointer to an array specifying number of global index bits per layer
numGlobalIndexBitLayers – [in] number of global index bit layers
transferWorkspaceSizeInBytes – [in] size in bytes of workspace memory for inter-process data transfers
auxConfig – [in] pointer to auxiliary configuration
capability – [in] bit mask to specify optional features of state vector

`custatevecExStateVectorCreateSingleProcess`#

custatevecStatus_t custatevecExStateVectorCreateSingleProcess( custatevecExStateVectorDescriptor_t *stateVector, const custatevecExDictionaryDescriptor_t svConfig, cudaStream_t const *streams, int32_t numStreams, custatevecExResourceManagerDescriptor_t resourceManager )#

Create state vector.

This function creates a state vector instance according to the svConfig argument and returns the instance to the stateVector argument. The svConfig should be created by custatevecExConfigureStateVectorSingleDevice() or custatevecExConfigureStateVectorMultiDevice().

The streams and numStreams arguments specify CUDA streams. CUDA API calls and kernel launches are serialized on the given streams. The number of streams should match the number of devices where the state vector is allocated. All CUDA calls serialized on the streams are synchronized by calling custatevecExStateVectorSynchronize(). If a null pointer is passed to the streams argument, all calls are serialized on the default streams. In this case, the value of the numStreams argument should be 0.

The resourceManager argument specifies the resource manager. If a null pointer is specified, the library default resource manager is used.

This API returns CUSTATEVEC_STATUS_INVALID_CONFIGURATION if an argument specified by custatevecExConfigureStateVectorSingleDevice() or custatevecExConfigureStateVectorMultiDevice() is invalid, such as specifying device ID that does not exist.

When using multiple devices (multi-device state vector), the following requirements must be met:

All devices must be of the same GPU generation (same compute capability).
GPUDirect P2P must be available between all devices. If these requirements are not met, this API returns CUSTATEVEC_STATUS_INVALID_CONFIGURATION.

This API returns CUSTATEVEC_STATUS_NOT_SUPPORTED if no CUDA-capable GPU devices are found in the system.

Note

Custom resource manager is not enabled in this release. Please pass nullptr to the resourceManager argument. Otherwise, this function returns CUSTATEVEC_STATUS_INVALID_VALUE.

Parameters:

stateVector – [out] a host pointer to the variable that receives state vector instance
svConfig – [in] state vector configuration created by a state vector configuration function
streams – [in] a pointer to a host array that holds CUDA streams
numStreams – [in] the number of streams given by the streams argument
resourceManager – [in] resource manager

`custatevecExStateVectorCreateMultiProcess`#

custatevecStatus_t custatevecExStateVectorCreateMultiProcess( custatevecExStateVectorDescriptor_t *stateVector, const custatevecExDictionaryDescriptor_t svConfig, cudaStream_t stream, custatevecExCommunicatorDescriptor_t exCommunicator, custatevecExResourceManagerDescriptor_t resourceManager )#

Create multi-process distributed state vector.

This function creates a multi-process distributed state vector instance according to the svConfig argument and returns the instance to the stateVector argument. The svConfig should be created by custatevecExConfigureStateVectorMultiProcess(). The state vector is distributed across multiple processes. Each process owns single device to allocate one sub state vector.

The stream argument specifies the CUDA stream for this process, corresponding to the fact that one process manages one device. All CUDA calls in this process are serialized on this stream and synchronized by calling custatevecExStateVectorSynchronize(). A null pointer is treated as the CUDA default stream.

The exCommunicator argument specifies a custatevecExCommunicatorDescriptor_t instance that is created by custatevecExCommunicatorCreate().

The resourceManager argument specifies the resource manager.

This API returns CUSTATEVEC_STATUS_INVALID_CONFIGURATION if an argument specified by custatevecExConfigureStateVectorMultiProcess() is invalid, such as specifying deviceId that does not exist, memorySharingMethod is not available on the system, or the number of processes does not match the configuration.

The number of processes must be a power-of-two number greater than 1. The number of processes must equal (1 << numGlobalBits) where numGlobalBits is the sum of all values in numGlobalIndexBitsPerLayer as specified in custatevecExConfigureStateVectorMultiProcess().

This API returns CUSTATEVEC_STATUS_NOT_SUPPORTED if no CUDA-capable GPU devices are found in the system.

Note

Custom resource manager is not enabled in this release. Please pass nullptr to the resourceManager argument. Otherwise, this function returns CUSTATEVEC_STATUS_INVALID_VALUE.

Parameters:

stateVector – [out] a host pointer to the variable that receives state vector instance
svConfig – [in] state vector configuration created by custatevecExConfigureStateVectorMultiProcess
stream – [in] CUDA stream for this process
exCommunicator – [in] communicator descriptor for inter-process communication
resourceManager – [in] resource manager

`custatevecExStateVectorDestroy`#

custatevecStatus_t custatevecExStateVectorDestroy( custatevecExStateVectorDescriptor_t stateVector )#

Destroy state vector instance.

This function destroys state vector instance.

Parameters:: stateVector – [in] state vector instance

`custatevecExStateVectorGetProperty`#

custatevecStatus_t custatevecExStateVectorGetProperty( const custatevecExStateVectorDescriptor_t stateVector, custatevecExStateVectorProperty_t property, void *value, size_t sizeInBytes )#

Retrieve state vector properties.

This function retrieves state vector properties. The property argument specifies one of properties, and the property value is returned to the host buffer specified by the value argument.

The following is the table for properties and corresponding data types.

custatevecExStateVectorProperty_t	data type
CUSTATEVEC_EX_SV_PROP_DISTRIBUTION_TYPE	custatevecExStateVectorDistributionType_t
CUSTATEVEC_EX_SV_PROP_DATA_TYPE	cudaDataType_t
CUSTATEVEC_EX_SV_PROP_NUM_WIRES	int32_t
CUSTATEVEC_EX_SV_PROP_WIRE_ORDERING	int32_t[]
CUSTATEVEC_EX_SV_PROP_NUM_LOCAL_WIRES	int32_t
CUSTATEVEC_EX_SV_PROP_NUM_DEVICE_SUBSVS	int32_t
CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES	int32_t[]

Each property enum returns the values as described below.

CUSTATEVEC_EX_SV_PROP_DISTRIBUTION_TYPE returns the distribution type of state vector.
CUSTATEVEC_EX_SV_PROP_DATA_TYPE returns the data type of state vector. The value is CUDA_C_32F or CUDA_C_64F.
CUSTATEVEC_EX_SV_PROP_NUM_WIRES returns the number of wires.
CUSTATEVEC_EX_SV_PROP_WIRE_ORDERING returns the wire ordering of state vector as int32_t array. The length of array is the number of wires.
CUSTATEVEC_EX_SV_PROP_NUM_LOCAL_WIRES returns the number of local wires that is wires local to the sub state vector.
CUSTATEVEC_EX_SV_PROP_NUM_DEVICE_SUBSVS returns the number of sub state vectors placed on device.
CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES returns the array of the sub state vector indices placed on device(s). The array length is the number of the sub state vectors placed on device.

The sizeInBytes argument specifies the byte size of the value buffer and should be equal to or larger than the required byte size. Otherwise, this function returns CUSTATEVEC_STATUS_INVALID_VALUE.

Parameters:

stateVector – [in] state vector instance
property – [in] a value of custatevecExStateVectorProperty_t
value – [out] host pointer to a host buffer that receives the value of the specified property
sizeInBytes – [in] byte size of the value buffer

`custatevecExStateVectorSetMathMode`#

custatevecStatus_t custatevecExStateVectorSetMathMode( custatevecExStateVectorDescriptor_t stateVector, custatevecMathMode_t mode )#

Set the compute precision mode for a state vector instance.

This function sets the compute precision mode for the specified state vector instance. Each state vector can have its own compute precision mode independently.

The compute precision mode controls the precision and performance characteristics of mathematical operations performed on the state vector.

The default math mode for the cuStateVec Ex API is CUSTATEVEC_MATH_MODE_ALLOW_FP32_EMULATED_BF16X9. To disable the use of BF16x9 floating point emulation, set the mode to CUSTATEVEC_MATH_MODE_DISALLOW_FP32_EMULATED_BF16X9.

Parameters:

stateVector – [inout] state vector instance
mode – [in] Compute precision mode as defined by custatevecMathMode_t.

`custatevecExStateVectorSetZeroState`#

custatevecStatus_t custatevecExStateVectorSetZeroState( custatevecExStateVectorDescriptor_t stateVector )#

Set the zero state to state vector.

This function sets the zero state (|0000…00>).

Parameters:: stateVector – [inout] state vector instance

`custatevecExStateVectorGetState`#

custatevecStatus_t custatevecExStateVectorGetState( const custatevecExStateVectorDescriptor_t stateVector, void *state, cudaDataType_t dataType, custatevecIndex_t begin, custatevecIndex_t end, int32_t maxNumConcurrentCopies )#

Copy state vector elements to host buffer.

State vector elements in [begin, end) are copied to the host buffer specified by the state argument. The data type specified by the dataType argument should equal to that specified when configuring state vector. The host buffer should be large enough to hold copied elements.

State vector elements in the range [begin, end) should be accessible in the current process. For single-device state vector, the whole state vector is allocated in a single process. For multi-device state vector, the state vector is sliced and distributed to multiple devices. However, all state vector elements are accessible in the process. Thus, [begin, end) is in the range of [0, svSize). For multi-process state vectors, the state vector is distributed to multiple processes. Each process has one process-local slice that is called sub-state vector. The range must be within: [(subSV_index * subSV_size), (subSV_index + 1) * subSV_size), where subSV_index is the sub-state vector index and subSV_size is the sub-state vector size. custatevecExStateVectorGetProperty with CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES returns the sub-state vector indices allocated in the process.

The dataType argument should be identical to that of the state vector instance specified by the stateVector argument. Otherwise, this API returns CUSTATEVEC_STATUS_INVALID_VALUE.

The maxNumConcurrentCopies argument specifies the max number of concurrent copies that can be utilized during the copy of state vector elements. The actual number of concurrent copies is implementation defined.

Note

The call to custatevecExStateVectorGetState() can be asynchronous. Please call custatevecExStateVectorSynchronize() to complete the copying of state vector elements.

Parameters:

stateVector – [in] state vector instance
state – [out] pointer to a host buffer that receives state vector elements
dataType – [in] dataType of the state vector elements
begin – [in] index of state vector element where the copy begins
end – [in] index of state vector element where the copy ends
maxNumConcurrentCopies – [in] Max number of parallel copies.

`custatevecExStateVectorSetState`#

custatevecStatus_t custatevecExStateVectorSetState( custatevecExStateVectorDescriptor_t stateVector, const void *state, cudaDataType_t dataType, custatevecIndex_t begin, custatevecIndex_t end, int32_t maxNumConcurrentCopies )#

Set complex value array on host to state vector.

Complex values given by the state argument are copied to the specified state vector. The copy range in the state vector index is [begin, end). The data type of complex values should equal to that specified on configuring state vector.

Note

The call to custatevecExStateVectorSetState() can be asynchronous. Please call custatevecExStateVectorSynchronize() to complete the copying of state vector elements.

Parameters:

stateVector – [out] state vector instance
state – [in] pointer to a complex vector on host
dataType – [in] dataType of the state vector elements
begin – [in] index of state vector element where the copy begins
end – [in] index of state vector element where the copy ends
maxNumConcurrentCopies – [in] Max number of parallel copies.

`custatevecExStateVectorReassignWireOrdering`#

custatevecStatus_t custatevecExStateVectorReassignWireOrdering( custatevecExStateVectorDescriptor_t stateVector, const int32_t *wireOrdering, int32_t wireOrderingLen )#

Reassign wire ordering to state vector.

Reassign (overwrite) wire ordering of the given state vector. The elements given by the wireOrdering argument should be in [0, numWires) where numWires represents the number of wires of the state vector. Otherwise this API returns CUSTATEVEC_STATUS_INVALID_VALUE. The wireOrderingLen argument represents the length of wire ordering. The value should match the number of wires. Otherwise this API returns CUSTATEVEC_STATUS_INVALID_VALUE.

Parameters:

stateVector – [inout] state vector instance
wireOrdering – [in] the pointer to a integer host array that holds wire ordering
wireOrderingLen – [in] the length of wire ordering

`custatevecExStateVectorPermuteIndexBits`#

custatevecStatus_t custatevecExStateVectorPermuteIndexBits( custatevecExStateVectorDescriptor_t stateVector, const int32_t *permutation, int32_t permutationLen, custatevecExPermutationType_t permutationType )#

Permute index bits and wires of state vector.

Permutes index bits of state vector, and the wire ordering is accordingly updated to reflect the permuted index bit ordering.

Permutation is specified by the permutation argument as an integer array. The permutationLen argument specifies the length of the permutation. The function can apply two types of permutation, scatter or gather, which is specified by the permutationType argument.

The wire ordering is updated as defined in the following formulas.

// When CUSTATEVEC_EX_PERMUTATION_SCATTER specified.
dstWires[permutation[idx]] = srcWires[idx]

// When CUSTATEVEC_EX_PERMUTATION_GATHER specified.
dstWires[idx] = srcWires[permutation[idx]]

By definition, the elements of permutation array should be integers in [0, numWires - 1] where numWires represents the number of wires of the specified state vector instance. If CUSTATEVEC_EX_PERMUTATION_SCATTER is specified to the permutationType, the permutation length should be identical to the number of wires. If CUSTATEVEC_EX_PERMUTATION_GATHER is specified to the permutationType and permutationLen is smaller than the number of wires in state vector, wires that don’t appear in the permutation argument are sorted and appended to the permutation array.

Ex. numWires = 5, permutation = {0, 2, 3}, permutationLen = 3. Wires {1, 4} don’t appear. Those two wires are sorted and appended to the permutation. Therefore, the complemented permutation is {0, 2, 3, 1, 4}.

Parameters:

stateVector – [inout] StateVector instance
permutation – [in] a host pointer to an integer array specifying the permutation
permutationLen – [in] length of the permutation
permutationType – [in] permutation type (scatter or gather)

`custatevecExStateVectorGetResourcesFromDeviceSubSV`#

custatevecStatus_t custatevecExStateVectorGetResourcesFromDeviceSubSV(

custatevecExStateVectorDescriptor_t stateVector,

int32_t subSVIndex,

int32_t *deviceId,

void **d_subSV,

cudaStream_t *stream,

custatevecHandle_t *handle

)#

Get resources from device sub state vector.

Get the computing resource associated with the device state vector. The subSVIndex argument specifies the sub state vector index. The deviceId, d_subSV arguments return the device id, the device memory pointer of the specified sub state vector. The stream and handle arguments return the CUDA stream and the cuStateVec handle. All CUDA calls in custatevecEx API are serialized on the returned stream. The returned cuStateVec handle can be passed to cuStateVec APIs to operate on the returned device pointer.

The number of the device sub state vectors is retrieved by calling custatevecExStateVectorGetProperty() API with CUSTATEVEC_EX_SV_PROP_NUM_DEVICE_SUBSVS specified as the property name, and the sub state vector indices are retrieved by specifying CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES, respectively. A single-device state vector always has one device sub-state vector.

The deviceId, d_subSV, and stream arguments must not be null pointers. Otherwise, this API returns CUSTATEVEC_STATUS_INVALID_VALUE. A null pointer can be passed to the handle argument. For this case, the cuStateVec handle will not be returned.

Parameters:

stateVector – [in] StateVector instance
subSVIndex – [in] sub state vector index
deviceId – [out] device id
d_subSV – [out] a host pointer that receives the specified sub state vector pointer.
stream – [out] a host pointer to CUDA stream
handle – [out] a host pointer to custatevecHandle_t

`custatevecExStateVectorGetResourcesFromDeviceSubSVView`#

custatevecStatus_t custatevecExStateVectorGetResourcesFromDeviceSubSVView(

const custatevecExStateVectorDescriptor_t stateVector,

int32_t subSVIndex,

int32_t *deviceId,

const void **d_subSV,

cudaStream_t *stream,

custatevecHandle_t *handle

)#

Get resources from device sub state vector view.

This API works almost the same as custatevecExStateVectorGetResourcesFromDeviceSubSV() except for the returned device pointer is immutable.

Please refer to the documentation of custatevecExStateVectorGetResourcesFromDeviceSubSV() API for the descriptions of the arguments.

Parameters:

stateVector – [in] StateVector instance
subSVIndex – [in] sub state vector index
deviceId – [out] device id
d_subSV – [out] a host pointer that receives the specified sub state vector pointer.
stream – [out] a host pointer to CUDA stream
handle – [out] a host pointer to custatevecHandle_t

`custatevecExStateVectorSynchronize`#

custatevecStatus_t custatevecExStateVectorSynchronize( custatevecExStateVectorDescriptor_t stateVector )#

Flush all operations and synchronize.

This function flushes all operations issued before the call of this function and synchronizes all streams associated with the specified state vector instance. For multi-process state vectors, a barrier among processes will be issued in addition to the synchronization in the current process.

Parameters:: stateVector – [in] state vector instance

Simulator API#

`custatevecExAbs2SumArray`#

custatevecStatus_t custatevecExAbs2SumArray( custatevecExStateVectorDescriptor_t stateVector, double *abs2sum, const int32_t *outputOrdering, int32_t outputOrderingLen, const int32_t *maskBitString, const int32_t *maskWireOrdering, int32_t maskLen )#

Calculate abs2sum array for a given set of wires.

Calculates an array of sums of squared absolute values of state vector elements. The abs2sum array can be on host or device. The tensor ordering of the abs2sum array is specified by the outputOrdering and the outputOrderingLen arguments. Unspecified wires are folded (summed up).

The maskBitString, maskWireOrdering and maskLen arguments set a bit string to mask the state vector. The abs2sum array is calculated by using state vector elements that match the mask bit string. If the maskLen argument is 0, null pointers can be specified to the maskBitString and maskWireOrdering arguments, and all state vector elements are used for calculation.

By definition, all values in outputOrdering and maskWireOrdering arguments should be in [0, numWires). The outputOrdering and maskWireOrdering arguments should not contain overlapping or duplicate wires. This function returns CUSTATEVEC_STATUS_INVALID_WIRE if the above requirements are not satisfied.

The empty outputOrdering can be specified to calculate the norm of state vector. In this case, 0 is passed to the outputOrderingLen argument and the outputOrdering argument can be a null pointer.

For distributed state vector configurations, the abs2sum argument should point to an array on the host.

Note

Since the size of abs2sum array is proportional to \( 2^{outputOrderingLen} \) , the max length of outputOrdering depends on the amount of available memory and maskLen.

Parameters:

stateVector – [in] StateVector instance
abs2sum – [out] pointer to a host or device array of sums of squared absolute values
outputOrdering – [in] pointer to a host array of output tensor ordering
outputOrderingLen – [in] the length of outputOrdering
maskBitString – [in] pointer to a host array for a bit string to specify mask bits
maskWireOrdering – [in] pointer to a host array that specifies the wire ordering of maskBitString
maskLen – [in] the length of mask

`custatevecExMeasure`#

custatevecStatus_t custatevecExMeasure( custatevecExStateVectorDescriptor_t stateVector, custatevecIndex_t *bitString, const int32_t *bitStringOrdering, int32_t bitStringOrderingLen, double randnum, custatevecCollapseOp_t collapse, const void *reserved )#

Perform qubit measurements.

This function executes multiple single qubit measurements with a single call and returns a bit string that represents the measurement outcomes. The bitStringOrdering argument specifies wires to be measured.

The measurement result is stored in bitString as a 64-bit integer bit mask. The ordering of bitString is specified by the bitStringOrdering and bitStringOrderingLen arguments. The idx-th bit of bitString corresponds to the measurement outcome of bitStringOrdering[idx].

By definition, all values in bitStringOrdering arguments should be in [0, numWires). The bitStringOrdering argument should not contain duplicate wires. This function returns CUSTATEVEC_STATUS_INVALID_WIRE if the above requirements are not satisfied.

The collapse argument specifies the operation applied for the state vector.

If CUSTATEVEC_COLLAPSE_NONE is specified, this function only returns the measured bit string without modifying the state vector.

If CUSTATEVEC_COLLAPSE_NORMALIZE_AND_ZERO is specified, this function collapses the state vector.

If CUSTATEVEC_COLLAPSE_RESET is specified, the state vector is collapsed as CUSTATEVEC_COLLAPSE_NORMALIZE_AND_ZERO does. Then, the measurement outcome is checked. If the measurement outcome for a specified wire is |1>, the wire is flipped (reset) to |0>. Otherwise, the state vector is not modified.

If a random number is not in [0, 1), this function returns CUSTATEVEC_STATUS_INVALID_VALUE. At least one wire should be specified, otherwise this function returns CUSTATEVEC_STATUS_INVALID_VALUE.

Parameters:

stateVector – [in] StateVector instance
bitString – [out] host pointer that receives the measured bit string
bitStringOrdering – [in] pointer to a host array of bit string ordering
bitStringOrderingLen – [in] length of bitStringOrdering
randnum – [in] random number, [0, 1).
collapse – [in] Collapse operation
reserved – [in] Reserved argument. A null pointer should be passed.

`custatevecExSample`#

custatevecStatus_t custatevecExSample( custatevecExStateVectorDescriptor_t stateVector, custatevecIndex_t *bitStrings, const int32_t *bitStringOrdering, int32_t bitStringOrderingLen, const double *randnums, int32_t numShots, custatevecSamplerOutput_t output, const void *reserved )#

Sample bit strings from the state vector.

This function performs sampling. The bitStringOrdering and bitStringOrderingLen arguments specify wires to be sampled. Sampled bit strings are represented as an array of custatevecIndex_t and are stored to the host memory buffer that the bitStrings argument points to.

The randnums argument is an array of user-generated random numbers whose length is numShots. The range of random numbers should be in [0, 1). A random number given by the randnums argument is clipped to [0, 1) if its range is not in [0, 1).

The output argument specifies the order of sampled bit strings:

If CUSTATEVEC_SAMPLER_OUTPUT_RANDNUM_ORDER is specified, the order of sampled bit strings is the same as that in the randnums argument.
If CUSTATEVEC_SAMPLER_OUTPUT_ASCENDING_ORDER is specified, bit strings are returned in the ascending order.

Parameters:

stateVector – [in] State vector instance
bitStrings – [out] pointer to a host array to store sampled bit strings
bitStringOrdering – [in] pointer to a host array of bit string ordering for sampling
bitStringOrderingLen – [in] length of bitStringOrdering
randnums – [in] pointer to an array of random numbers
numShots – [in] the number of shots
output – [in] the order of sampled bit strings
reserved – [in] Reserved argument. A null pointer should be passed.

`custatevecExApplyMatrix`#

custatevecStatus_t custatevecExApplyMatrix( custatevecExStateVectorDescriptor_t stateVector, const void *matrix, cudaDataType_t matrixDataType, custatevecExMatrixType_t exMatrixType, custatevecMatrixLayout_t layout, int32_t adjoint, const int32_t *targets, int32_t numTargets, const int32_t *controls, const int32_t *controlBitValues, int32_t numControls )#

Apply gate matrix.

Apply gate matrix for state vector.

The matrix argument is a host or device pointer that points to matrix element array. For distributed state vector configurations, the matrix argument should be a host pointer. Otherwise, this function returns CUSTATEVEC_STATUS_INVALID_VALUE. The matrixDataType argument specifies the data type of matrix elements. The value should be CUDA_C_32F or CUDA_C_64F for complex 64 or complex 128 type, respectively.

The exMatrixType argument specifies the matrix types, dense (square), diagonal or anti-diagonal. The memory layout of the matrix is row- or column-major, which is specified by the layout argument.

The matrix type is dense (square) if CUSTATEVEC_EX_MATRIX_DENSE is specified. The matrix argument points to a buffer that holds dense matrix as two-dimensional array. The matrix dimension is ( \(2^\text{numTargets} \times 2^\text{numTargets}\) ). The memory layout follows the specification of the layout argument. The layout is row-major if CUSTATEVEC_MATRIX_LAYOUT_ROW, or column-major if CUSTATEVEC_MATRIX_LAYOUT_COL is specified.

The matrix type is diagonal if CUSTATEVEC_EX_MATRIX_DIAGONAL is specified. The matrix argument points to a buffer that accommodates a complex vector of diagonal elements. The vector length is identical to the matrix dimension, ( \(2^\text{numTargets}\) ). The memory layout of the diagonal elements is identical for row- and column-major layouts. Thus, the memory layout specification by the layout argument is ignored.

The matrix type is anti-diagonal if CUSTATEVEC_EX_MATRIX_ANTI_DIAGONAL is specified. The matrix argument points to a buffer that accommodates a complex vector of anti-diagonal elements. The vector length is identical to the matrix dimension, ( \(2^\text{numTargets}\) ). The memory layout is specified by the layout argument expressed as shown below,

elements[idx] = matrix(idx, dim - (idx + 1))  // row-major layout

elements[idx] = matrix(dim - (idx + 1), idx)  // col-major layout

by using elements[idx] for the idx-th anti-diagonal element and matrix(row, col) for the matrix element at (row, col).

The targets and controls arguments specify target and control wires in the state vector. The controlBitValues argument specifies bit values of control wires. The ordering of controlBitValues is specified by the controls argument. If a null pointer is specified to this argument, all control bit values are set to 1.

By definition, all target and control values should be in [0, numWires). Wires in targets and controls arguments should not overlap or have duplicates. This function returns CUSTATEVEC_STATUS_INVALID_WIRE if the above requirements are not satisfied.

For distributed state vector configurations, the numTargets argument should be equal to or less than numDeviceWires when exMatrixType is CUSTATEVEC_EX_MATRIX_DENSE or CUSTATEVEC_EX_MATRIX_ANTI_DIAGONAL. If a larger matrix is passed, this function returns CUSTATEVEC_STATUS_INVALID_VALUE.

Parameters:

stateVector – [inout] state vector instance
matrix – [in] pointer to a buffer that holds matrix elements
matrixDataType – [in] data type of matrix
exMatrixType – [in] enumerator specifying the matrix type and layout
layout – [in] enumerator specifying the matrix layout
adjoint – [in] apply adjoint of matrix
targets – [in] pointer to a host array of target wires
numTargets – [in] the number of target wires
controls – [in] pointer to a host array of control wires
controlBitValues – [in] pointer to a host array of control bit values
numControls – [in] the number of control wires

`custatevecExApplyPauliRotation`#

custatevecStatus_t custatevecExApplyPauliRotation( custatevecExStateVectorDescriptor_t stateVector, double theta, const custatevecPauli_t *paulis, const int32_t *targets, int32_t numTargets, const int32_t *controls, const int32_t *controlBitValues, int32_t numControls )#

Apply the exponential of a multi-qubit Pauli operator.

Apply exponential of a tensor product of Pauli operators, \( e^{i \theta P} \), where \(P\) is the tensor product \(P = paulis[0] \otimes paulis[1] \otimes \cdots \otimes paulis[numTargets-1]\) acting on the wires specified by the targets argument. The paulis and numTargets arguments specify the Pauli operators and their count.

At least one target and a corresponding Pauli basis should be specified.

The controls and numControls arguments specify the control wires in the state vector.

By definition, all target and control values should be in [0, numWires). The targets and controls arguments should not contain overlapping or duplicate wires. This function returns CUSTATEVEC_STATUS_INVALID_WIRE if the above requirements are not satisfied.

The controlBitValues argument specifies bit values of control wires. The ordering of controlBitValues is specified by the controls argument. If a null pointer is specified to this argument, all control bit values are set to 1.

Parameters:

stateVector – [inout] state vector instance
theta – [in] theta
paulis – [in] host pointer to custatevecPauli_t array
targets – [in] pointer to a host array of target wires
numTargets – [in] the number of target wires
controls – [in] pointer to a host array of control wires
controlBitValues – [in] pointer to a host array of control bit values
numControls – [in] the number of control wires

`custatevecExComputeExpectationOnPauliBasis`#

custatevecStatus_t custatevecExComputeExpectationOnPauliBasis(

custatevecExStateVectorDescriptor_t stateVector,

double *expectationValues,

const custatevecPauli_t **pauliOperatorArrays,

int32_t numPauliOperatorArrays,

const int32_t **basisWiresArray,

const int32_t *numBasisWiresArray

)#

Compute expectation values for a batch of (multi-qubit) Pauli operators.

This function computes multiple expectation values for given sequences of Pauli operators by a single call.

A single Pauli operator sequence, pauliOperators, is represented by using an array of custatevecPauli_t. The basis wires on which these Pauli operators are acting are represented by an array of wires.

The length of pauliOperators and basisWires are the same and specified by numBasisWires.

The number of Pauli operator sequences is specified by the numPauliOperatorArrays argument.

Multiple sequences of Pauli operators are represented in the form of arrays of arrays in the following manners:

The pauliOperatorArrays argument is an array of arrays of custatevecPauli_t.
The basisWiresArray is an array of the wire arrays.
The numBasisWiresArray argument holds an array of the length of Pauli operator sequences and wire arrays.

By definition, all wires in each array of basisWiresArray arguments should be in [0, numWires). Each array in basisWiresArray should not contain duplicate wires. This function returns CUSTATEVEC_STATUS_INVALID_WIRE if the above requirements are not satisfied.

Computed expectation values are stored in a host buffer specified by the expectationValues argument of length numPauliOperatorArrays.

This function returns CUSTATEVEC_STATUS_INVALID_VALUE if wires specified for a Pauli operator sequence has duplicates and/or points to a wire that does not exist.

This function accepts empty Pauli operator sequence to get the norm of the state vector.

Parameters:

stateVector – [in] state vector instance
expectationValues – [out] pointer to a host array to store expectation values
pauliOperatorArrays – [in] pointer to a host array of Pauli operator arrays
numPauliOperatorArrays – [in] the number of Pauli operator arrays
basisWiresArray – [in] host array of basis wire arrays
numBasisWiresArray – [in] host array of the number of basis wires

SVUpdater API#

`custatevecExConfigureSVUpdater`#

custatevecStatus_t custatevecExConfigureSVUpdater( custatevecExDictionaryDescriptor_t *svUpdaterConfig, cudaDataType_t dataType, const custatevecExSVUpdaterConfigItem_t *configItems, int32_t numConfigItems )#

Create SVUpdater configuration.

This function creates the configuration dictionary for SVUpdater. The dataType argument specifies the value type internally used in SVUpdater. Its value should be CUDA_C_32F or CUDA_C_64F. The configItems and numConfigItems arguments are the array of custatevecExSVUpdaterConfigItem_t.

The following configuration items are available:

CUSTATEVEC_EX_SVUPDATER_CONFIG_MAX_NUM_HOST_THREADS
- Type: int32_t, acceptable range: [1, 32]
- Description: The maximum number of host threads utilized during the call to custatevecExSVUpdaterApply().
CUSTATEVEC_EX_SVUPDATER_CONFIG_DENSE_FUSION_SIZE
- Type: int32_t, acceptable range: [1, 10]
- Description: The maximum number of targets for fused dense gate matrices.
CUSTATEVEC_EX_SVUPDATER_CONFIG_DIAGONAL_FUSION_SIZE
- Type: int32_t, acceptable range: [0, 20]
- Description: The maximum number of targets for fused diagonal gate matrices. If 0 is specified, diagonal gate matrices are not fused.

The configItems should not contain duplicate items with the same name. For configurations that are not specified, system default values will be used.

The configItems can be a null pointer. In that case, numConfigItems should be set to 0.

It’s the user’s responsibility to destroy the created svUpdaterConfig object by calling custatevecExDictionaryDestroy().

Note

Default configuration is recommended for optimal performance. For CUSTATEVEC_EX_SVUPDATER_CONFIG_MAX_NUM_HOST_THREADS, values between 4 and 16 generally provide good performance. For CUSTATEVEC_EX_SVUPDATER_CONFIG_DENSE_FUSION_SIZE, values between 4 and 6 are automatically selected by the library. These values correspond to the upper limit where gate application kernels remain memory-bound rather than compute-bound, ensuring optimal performance. For CUSTATEVEC_EX_SVUPDATER_CONFIG_DIAGONAL_FUSION_SIZE, the library default is recommended. The value is automatically adjusted based on the number of qubits in the state vector.

Parameters:

svUpdaterConfig – [out] SVUpdater configuration
dataType – [in] data type used in SVUpdater
configItems – [in] host pointer to the array of Configuration items
numConfigItems – [in] the number of configuration items

`custatevecExSVUpdaterCreate`#

custatevecStatus_t custatevecExSVUpdaterCreate( custatevecExSVUpdaterDescriptor_t *svUpdater, const custatevecExDictionaryDescriptor_t svUpdaterConfig, custatevecExResourceManagerDescriptor_t resourceManager )#

Create SVUpdater.

This function creates SVUpdater instance.

Note

Custom resource manager is not enabled in this release. Please pass nullptr to the resourceManager argument. Otherwise, this function returns CUSTATEVEC_STATUS_INVALID_VALUE.

Parameters:

svUpdater – [out] created SVUpdater
svUpdaterConfig – [in] SVUpdater configuration
resourceManager – [in] resource manager

`custatevecExSVUpdaterDestroy`#

custatevecStatus_t custatevecExSVUpdaterDestroy( custatevecExSVUpdaterDescriptor_t svUpdater )#

Destroy SVUpdater instance.

This function destroys SVUpdater instance.

Parameters:: svUpdater – [in] SVUpdater instance

`custatevecExSVUpdaterClear`#

custatevecStatus_t custatevecExSVUpdaterClear( custatevecExSVUpdaterDescriptor_t svUpdater )#

Clear operations queued in SVUpdater.

This function clears queued operations in SVUpdater.

Parameters:: svUpdater – [inout] SVUpdater instance

`custatevecExSVUpdaterEnqueueMatrix`#

custatevecStatus_t custatevecExSVUpdaterEnqueueMatrix( custatevecExSVUpdaterDescriptor_t svUpdater, const void *matrix, cudaDataType_t matrixDataType, custatevecExMatrixType_t exMatrixType, custatevecMatrixLayout_t layout, int32_t adjoint, const int32_t *targets, int32_t numTargets, const int32_t *controls, const int32_t *controlBitValues, int32_t numControls )#

Enqueue matrix to SVUpdater.

This function enqueues a gate matrix to SVUpdater.

The matrix argument is a host pointer that points to matrix element array. The matrixDataType argument specifies the data type of matrix elements. The value should be CUDA_C_32F or CUDA_C_64F for complex 64 or complex 128 type, respectively. Only CUDA_C_64F is acceptable if the SVUpdater data type is CUDA_C_64F by calling custatevecExConfigureSVUpdater(). Also, CUDA_C_64F and CUDA_C_32F are acceptable if the SVUpdater data type is CUDA_C_32F.

elements[idx] = matrix(idx, dim - (idx + 1))  // row-major layout

elements[idx] = matrix(dim - (idx + 1), idx)  // col-major layout

by using elements[idx] for the idx-th anti-diagonal element and matrix(row, col) for the matrix element at (row, col).

The max number of targets is limited to 15 if CUSTATEVEC_EX_MATRIX_DENSE or CUSTATEVEC_EX_MATRIX_ANTI_DIAGONAL specified, and 30 if CUSTATEVEC_EX_MATRIX_DIAGONAL, specified, respectively. However, 10 or more targets is not recommended; the recommended number of targets for typical usage is 6 or fewer.

By definition, all target and control values should be in [0, numWires). The targets and controls arguments should not contain overlapping wires.

For distributed state vector configurations, the total number of wires involved (numTargets + numControls) should not exceed numDeviceWires or the number of local wires on each device. Note that this constraint is not validated by custatevecExSVUpdaterEnqueueMatrix(). The validation occurs later during the call to custatevecExSVUpdaterApply() where the StateVector instance is provided, and CUSTATEVEC_STATUS_INVALID_VALUE is returned if the constraint is violated.

Parameters:

svUpdater – [inout] SVUpdater instance
matrix – [in] pointer to a host buffer that holds matrix elements
matrixDataType – [in] data type of matrix
exMatrixType – [in] enumerator specifying the matrix type
layout – [in] enumerator specifying the matrix layout
adjoint – [in] apply adjoint of matrix
targets – [in] pointer to a host array of target wires
numTargets – [in] the number of target wires
controls – [in] pointer to a host array of control wires
controlBitValues – [in] pointer to a host array of control bit values
numControls – [in] the number of control wires

`custatevecExSVUpdaterEnqueueUnitaryChannel`#

custatevecStatus_t custatevecExSVUpdaterEnqueueUnitaryChannel( custatevecExSVUpdaterDescriptor_t svUpdater, const void *const *unitaries, cudaDataType_t unitariesDataType, const custatevecExMatrixType_t *exMatrixTypes, int32_t numUnitaries, custatevecMatrixLayout_t layout, const double *probabilities, const int32_t *channelWires, int32_t numChannelWires )#

Enqueue mixed unitary channel.

This function enqueues a mixed unitary channel to SVUpdater.

The unitaries argument is a host pointer to an array of unitary matrices. Each array element in the unitaries argument points to an array of matrix elements that represents a unitary matrix. The actual data type of matrix elements is specified by the unitariesDataType argument. The exMatrixTypes is a host array that specifies the matrix types of unitary matrices. The matrix types specified by exMatrixTypes determine the lengths of matrix element arrays specified by the unitaries argument. The layout argument specifies the matrix layout in the same way as described in custatevecExSVUpdaterEnqueueMatrix().

The probabilities argument is a host array that represents the probabilities associated with the specified unitary matrices to be randomly sampled. The array length is numUnitaries. The total sum of all probabilities must be less than or equal to 1.0. When the sum is less than 1.0, the remaining probability corresponds to no transformation, implementing a probabilistic quantum channel where the state vector remains unchanged with probability (1 - sum_of_probabilities). The probabilities are used as-is for sampling without normalization in this API.

If any probability value in the probabilities array is zero or negative, CUSTATEVEC_STATUS_INVALID_VALUE is returned.

Only CUDA_C_64F is acceptable if the SVUpdater data type is CUDA_C_64F by calling custatevecExConfigureSVUpdater(). Also, CUDA_C_64F and CUDA_C_32F are acceptable if the SVUpdater data type is CUDA_C_32F.

The channelWires and numChannelWires arguments specify the wires on which the sampled unitary matrix is to be applied. The max number of wires are limited to 15 if CUSTATEVEC_EX_MATRIX_DENSE or CUSTATEVEC_EX_MATRIX_ANTI_DIAGONAL specified, and 30 if CUSTATEVEC_EX_MATRIX_DIAGONAL, specified, respectively. However, 10 or more wires is not recommended; the recommended number of wires for typical usage is 6 or fewer.

For distributed state vector configurations, the numChannelWires argument should be equal to or less than numDeviceWires, or the number of local wires on each device. Note that this constraint is not validated by custatevecExSVUpdaterEnqueueUnitaryChannel(). The validation occurs later during the call to custatevecExSVUpdaterApply() where the StateVector instance is provided, and CUSTATEVEC_STATUS_INVALID_VALUE is returned if the constraint is violated.

By definition, all wires in channelWires argument should be in [0, numWires). custatevecExSVUpdaterApply() returns CUSTATEVEC_STATUS_INVALID_WIRE if the above requirements are not satisfied.

Parameters:

svUpdater – [inout] SVUpdater instance
unitaries – [in] host pointer to an array of unitary matrix elements
unitariesDataType – [in] dataType of the specified unitary matrices
exMatrixTypes – [in] matrix types of the specified unitary matrices
numUnitaries – [in] the number of matrices.
layout – [in] layout of the specified unitary matrices
probabilities – [in] host array that holds the probabilities
channelWires – [in] wires that sampled unitary channel is applied
numChannelWires – [in] the number of wires

`custatevecExSVUpdaterEnqueueGeneralChannel`#

custatevecStatus_t custatevecExSVUpdaterEnqueueGeneralChannel( custatevecExSVUpdaterDescriptor_t svUpdater, const void *const *matrices, cudaDataType_t matrixDataType, const custatevecExMatrixType_t *exMatrixTypes, int32_t numMatrices, custatevecMatrixLayout_t layout, const int32_t *channelWires, int32_t numChannelWires )#

Enqueue general channel.

This function enqueues a general channel to SVUpdater.

The matrices argument is a host pointer to an array of matrices that are assumed to be Kraus operators defining the general quantum channel. Each array element in the matrices argument points to an array of matrix elements that represents a matrix. The actual data type of matrix elements is specified by the matrixDataType argument. The numMatrices argument specifies the number of matrices. The exMatrixTypes is a host array that specifies the matrix types of the matrices. The matrix types specified by exMatrixTypes determine the lengths of matrix element arrays specified by the matrices argument. The layout argument specifies the matrix layout in the same way as described in custatevecExSVUpdaterEnqueueMatrix(). If a general channel is queued to SVUpdater, state vector will be normalized, and its norm will be 1 after the call of custatevecExSVUpdaterApply().

The channelWires and numChannelWires arguments specify the wires on which the general channel is to be applied. The max number of wires is limited to 15 for all matrix types. However, 10 or more wires is not recommended; the recommended number of wires for typical usage is 6 or fewer.

For distributed state vector configurations, the numChannelWires argument should be equal to or less than numDeviceWires, or the number of local wires on each device. Note that this constraint is not validated by custatevecExSVUpdaterEnqueueGeneralChannel(). The validation occurs later during the call to custatevecExSVUpdaterApply() where the StateVector instance is provided, and CUSTATEVEC_STATUS_INVALID_VALUE is returned if the constraint is violated.

By definition, all wires in channelWires argument should be in [0, numWires). custatevecExSVUpdaterApply() returns CUSTATEVEC_STATUS_INVALID_WIRE if the above requirements are not satisfied.

It is the user’s responsibility to provide a complete set of Kraus operators. The completeness of Kraus operators is validated immediately during custatevecExSVUpdaterEnqueueGeneralChannel() by computing the minimum expectation value using eigenvalues of the Kraus operators. If an incomplete set of Kraus operators is provided, custatevecExSVUpdaterEnqueueGeneralChannel() returns CUSTATEVEC_STATUS_NUMERICAL_ERROR.

Parameters:

svUpdater – [inout] SVUpdater instance
matrices – [in] host pointer to an array of matrix elements
matrixDataType – [in] dataType of the matrices
exMatrixTypes – [in] matrix types of the specified matrices
numMatrices – [in] the number of matrices.
layout – [in] layout of the matrices
channelWires – [in] wires that the general channel is applied
numChannelWires – [in] the number of wires

`custatevecExSVUpdaterGetMaxNumRequiredRandnums`#

custatevecStatus_t custatevecExSVUpdaterGetMaxNumRequiredRandnums( custatevecExSVUpdaterDescriptor_t svUpdater, int32_t *maxNumRequiredRandnums )#

Get the max number of required random numbers.

Get the max required number of random numbers to call custatevecExSVUpdaterApply().

Parameters:

svUpdater – [in] SVUpdater instance
maxNumRequiredRandnums – [out] the max required number of random numbers.

`custatevecExSVUpdaterApply`#

custatevecStatus_t custatevecExSVUpdaterApply( custatevecExSVUpdaterDescriptor_t svUpdater, custatevecExStateVectorDescriptor_t stateVector, const double *randnums, int32_t numRandnums )#

Apply queued operations.

Apply operations queued in SVUpdater to the specified state vector.

The data type of state vector should be identical to that of SVUpdater specified by calling custatevecExConfigureSVUpdater(). Otherwise, this API returns CUSTATEVEC_STATUS_INVALID_VALUE. The randnums and numRandnums arguments pass the array of random numbers that is utilized to apply noise channels. Each random number in the randnums array must be in the range [0, 1). The number of required random numbers is retrieved by calling custatevecExSVUpdaterGetMaxNumRequiredRandnums().

It’s a user’s responsibility to generate the required number of random numbers in the valid range. If a given random number is not in the range, the value is clipped.

This function performs validation of queued operations against the provided state vector and may return the following error codes:

CUSTATEVEC_STATUS_INVALID_WIRE: This error is returned if any queued operation references wire indices that do not exist in the provided state vector. All wires from enqueued operations (targets, controls, and channelWires) are validated against the wire range [0, numWires) of the specified state vector instance during the call to this API.
CUSTATEVEC_STATUS_INVALID_VALUE: For distributed state vector configurations, this error is returned if any queued matrix operation has a total number of wires (targets + controls for matrices, or channelWires for channels) that exceeds the number of local wires on each device as described in documentation of custatevecExSVUpdaterEnqueueMatrix(), custatevecExSVUpdaterEnqueueUnitaryChannel(), and custatevecExSVUpdaterEnqueueGeneralChannel(). This check is performed here when the StateVector instance is available.
CUSTATEVEC_STATUS_NUMERICAL_ERROR: This error can occur in two scenarios: (1) When the state vector norm becomes almost close to zero during the application of operations, making normalization impossible. (2) For general channels, if the completeness of Kraus operators is violated as determined by computing expectation values during the application process.

Parameters:

svUpdater – [inout] SVUpdater instance
stateVector – [inout] state vector instance
randnums – [in] a host pointer to an array of random numbers in the range [0, 1)
numRandnums – [in] the number of random numbers

cuStateVec Ex API Reference#

cuStateVec Ex data types#

Opaque data structures#

custatevecExDictionaryDescriptor_t#

custatevecExCommunicatorDescriptor_t#

custatevecExStateVectorDescriptor_t#

custatevecExSVUpdaterDescriptor_t#

custatevecExResourceManagerDescriptor_t#

Enumerators#

custatevecExCommunicatorStatus_t#

custatevecExStateVectorCapability_t#

custatevecExStateVectorDistributionType_t#

custatevecExGlobalIndexBitClass_t#

custatevecExStateVectorProperty_t#

custatevecExPermutationType_t#

custatevecExMatrixType_t#

custatevecExSVUpdaterConfigName_t#

custatevecExMemorySharingMethod_t#

Structures#

custatevecExSVUpdaterConfigItem_t#

cuStateVec Ex functions#

Dictionary API#

custatevecExDictionaryDestroy#

Communicator API#

custatevecExCommunicatorInitialize#

custatevecExCommunicatorFinalize#

custatevecExCommunicatorGetSizeAndRank#

custatevecExCommunicatorCreate#

custatevecExCommunicatorDestroy#

StateVector API#

custatevecExConfigureStateVectorSingleDevice#

custatevecExConfigureStateVectorMultiDevice#

custatevecExConfigureStateVectorMultiProcess#

custatevecExStateVectorCreateSingleProcess#

custatevecExStateVectorCreateMultiProcess#

custatevecExStateVectorDestroy#

custatevecExStateVectorGetProperty#

custatevecExStateVectorSetMathMode#

custatevecExStateVectorSetZeroState#

custatevecExStateVectorGetState#

custatevecExStateVectorSetState#

custatevecExStateVectorReassignWireOrdering#

custatevecExStateVectorPermuteIndexBits#

custatevecExStateVectorGetResourcesFromDeviceSubSV#

custatevecExStateVectorGetResourcesFromDeviceSubSVView#

custatevecExStateVectorSynchronize#

Simulator API#

custatevecExAbs2SumArray#

custatevecExMeasure#

custatevecExSample#

custatevecExApplyMatrix#

custatevecExApplyPauliRotation#

custatevecExComputeExpectationOnPauliBasis#

SVUpdater API#

custatevecExConfigureSVUpdater#

custatevecExSVUpdaterCreate#

custatevecExSVUpdaterDestroy#

custatevecExSVUpdaterClear#

custatevecExSVUpdaterEnqueueMatrix#

custatevecExSVUpdaterEnqueueUnitaryChannel#

custatevecExSVUpdaterEnqueueGeneralChannel#

custatevecExSVUpdaterGetMaxNumRequiredRandnums#

custatevecExSVUpdaterApply#

`custatevecExDictionaryDescriptor_t`#

`custatevecExCommunicatorDescriptor_t`#

`custatevecExStateVectorDescriptor_t`#

`custatevecExSVUpdaterDescriptor_t`#

`custatevecExResourceManagerDescriptor_t`#

`custatevecExCommunicatorStatus_t`#

`custatevecExStateVectorCapability_t`#

`custatevecExStateVectorDistributionType_t`#

`custatevecExGlobalIndexBitClass_t`#

`custatevecExStateVectorProperty_t`#

`custatevecExPermutationType_t`#

`custatevecExMatrixType_t`#

`custatevecExSVUpdaterConfigName_t`#

`custatevecExMemorySharingMethod_t`#

`custatevecExSVUpdaterConfigItem_t`#

`custatevecExDictionaryDestroy`#

`custatevecExCommunicatorInitialize`#

`custatevecExCommunicatorFinalize`#

`custatevecExCommunicatorGetSizeAndRank`#

`custatevecExCommunicatorCreate`#

`custatevecExCommunicatorDestroy`#

`custatevecExConfigureStateVectorSingleDevice`#

`custatevecExConfigureStateVectorMultiDevice`#

`custatevecExConfigureStateVectorMultiProcess`#

`custatevecExStateVectorCreateSingleProcess`#

`custatevecExStateVectorCreateMultiProcess`#

`custatevecExStateVectorDestroy`#

`custatevecExStateVectorGetProperty`#

`custatevecExStateVectorSetMathMode`#

`custatevecExStateVectorSetZeroState`#

`custatevecExStateVectorGetState`#

`custatevecExStateVectorSetState`#

`custatevecExStateVectorReassignWireOrdering`#

`custatevecExStateVectorPermuteIndexBits`#

`custatevecExStateVectorGetResourcesFromDeviceSubSV`#

`custatevecExStateVectorGetResourcesFromDeviceSubSVView`#

`custatevecExStateVectorSynchronize`#

`custatevecExAbs2SumArray`#

`custatevecExMeasure`#

`custatevecExSample`#

`custatevecExApplyMatrix`#

`custatevecExApplyPauliRotation`#

`custatevecExComputeExpectationOnPauliBasis`#

`custatevecExConfigureSVUpdater`#

`custatevecExSVUpdaterCreate`#

`custatevecExSVUpdaterDestroy`#

`custatevecExSVUpdaterClear`#

`custatevecExSVUpdaterEnqueueMatrix`#

`custatevecExSVUpdaterEnqueueUnitaryChannel`#

`custatevecExSVUpdaterEnqueueGeneralChannel`#

`custatevecExSVUpdaterGetMaxNumRequiredRandnums`#

`custatevecExSVUpdaterApply`#