cuStateVec Ex API Reference#
This reference describes all components of the cuStateVec Ex API.
cuStateVec Ex data types#
Opaque data structures#
custatevecExDictionaryDescriptor_t#
-
typedef struct custatevecExDictionary *custatevecExDictionaryDescriptor_t#
This descriptor holds a handle to a dictionary instance.
custatevecExCommunicatorDescriptor_t#
-
typedef struct custatevecExCommunicator_t *custatevecExCommunicatorDescriptor_t#
This descriptor holds a handle to an inter-process communication object.
This abstraction enables communication between processes for multi-process state vector operations.
custatevecExStateVectorDescriptor_t#
-
typedef struct custatevecExStateVector *custatevecExStateVectorDescriptor_t#
This descriptor holds a handle to a state vector instance.
custatevecExSVUpdaterDescriptor_t#
-
typedef struct custatevecExSVUpdater *custatevecExSVUpdaterDescriptor_t#
This descriptor holds a handle to an SVUpdater instance.
custatevecExResourceManagerDescriptor_t#
-
typedef struct custatevecExResourceManager *custatevecExResourceManagerDescriptor_t#
This descriptor holds a handle to a resource manager instance.
Note
Custom resource manager is not supported in this release.
Enumerators#
custatevecExCommunicatorStatus_t#
-
enum custatevecExCommunicatorStatus_t#
Status code returned by communicator method functions.
This status code is returned by communicator functions defined in custatevecEx_ext.h. This enum only implements the success code. Other status codes are implementation dependent.
Values:
-
enumerator CUSTATEVEC_EX_COMMUNICATOR_STATUS_SUCCESS = 0#
Operation completed successfully.
-
enumerator CUSTATEVEC_EX_COMMUNICATOR_STATUS_SUCCESS = 0#
custatevecExStateVectorCapability_t#
-
enum custatevecExStateVectorCapability_t#
Bitmask that specifies state vector capability. This enum is reserved for future use.
Values:
-
enumerator CUSTATEVEC_EX_SV_CAPABILITY_NONE = 0#
No capability enabled
-
enumerator CUSTATEVEC_EX_SV_CAPABILITY_NONE = 0#
custatevecExStateVectorDistributionType_t#
-
enum custatevecExStateVectorDistributionType_t#
Enum that specifies the distribution type of state vector.
Values:
-
enumerator CUSTATEVEC_EX_SV_DISTRIBUTION_SINGLE_DEVICE = 0#
State vector on single device.
-
enumerator CUSTATEVEC_EX_SV_DISTRIBUTION_MULTI_DEVICE = 1#
State vector distributed to multiple devices.
-
enumerator CUSTATEVEC_EX_SV_DISTRIBUTION_MULTI_PROCESS = 2#
State vector distributed to multiple processes.
-
enumerator CUSTATEVEC_EX_SV_DISTRIBUTION_SINGLE_DEVICE = 0#
custatevecExGlobalIndexBitClass_t#
-
enum custatevecExGlobalIndexBitClass_t#
Communication method for global index bit operations in multi-process distributions.
Operations on global index bits require data transfers. This enum specifies the communication method to use.
Values:
-
enumerator CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_INTERPROC_P2P = 1#
Inter-process GPUDirect P2P.
-
enumerator CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_COMMUNICATOR = 2#
Communication via custatevecExCommunicator.
-
enumerator CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_INTERPROC_P2P = 1#
custatevecExStateVectorProperty_t#
-
enum custatevecExStateVectorProperty_t#
Specifies the name of state vector property.
Values:
-
enumerator CUSTATEVEC_EX_SV_PROP_DISTRIBUTION_TYPE = 0#
-
enumerator CUSTATEVEC_EX_SV_PROP_DATA_TYPE = 1#
Returns cudaDataType_t.
-
enumerator CUSTATEVEC_EX_SV_PROP_NUM_WIRES = 2#
Returns int32_t.
-
enumerator CUSTATEVEC_EX_SV_PROP_WIRE_ORDERING = 3#
Returns int32_t array.
-
enumerator CUSTATEVEC_EX_SV_PROP_NUM_LOCAL_WIRES = 4#
Returns int32_t.
-
enumerator CUSTATEVEC_EX_SV_PROP_NUM_DEVICE_SUBSVS = 5#
Returns int32_t.
-
enumerator CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES = 6#
Returns int32_t array.
-
enumerator CUSTATEVEC_EX_SV_PROP_DISTRIBUTION_TYPE = 0#
custatevecExPermutationType_t#
-
enum custatevecExPermutationType_t#
Specifies the permutation type.
Values:
-
enumerator CUSTATEVEC_EX_PERMUTATION_SCATTER = 0#
Scatter permutation.
-
enumerator CUSTATEVEC_EX_PERMUTATION_GATHER = 1#
Gather permutation.
-
enumerator CUSTATEVEC_EX_PERMUTATION_SCATTER = 0#
custatevecExMatrixType_t#
-
enum custatevecExMatrixType_t#
Specifies the type of matrix.
Values:
-
enumerator CUSTATEVEC_EX_MATRIX_DENSE = 1#
Dense matrix.
-
enumerator CUSTATEVEC_EX_MATRIX_DIAGONAL = 2#
Diagonal matrix.
-
enumerator CUSTATEVEC_EX_MATRIX_ANTI_DIAGONAL = 4#
Anti-diagonal matrix.
-
enumerator CUSTATEVEC_EX_MATRIX_DENSE = 1#
custatevecExSVUpdaterConfigName_t#
-
enum custatevecExSVUpdaterConfigName_t#
Specifies the configuration argument type of SVUpdater.
Values:
-
enumerator CUSTATEVEC_EX_SVUPDATER_CONFIG_MAX_NUM_HOST_THREADS = 0#
Number of host threads, int32_t.
-
enumerator CUSTATEVEC_EX_SVUPDATER_CONFIG_DENSE_FUSION_SIZE = 1#
Dense fusion size, int32_t.
-
enumerator CUSTATEVEC_EX_SVUPDATER_CONFIG_DIAGONAL_FUSION_SIZE = 2#
Diagonal fusion size, int32_t.
-
enumerator CUSTATEVEC_EX_SVUPDATER_CONFIG_MAX_NUM_HOST_THREADS = 0#
custatevecExMemorySharingMethod_t#
-
enum custatevecExMemorySharingMethod_t#
Specifies the method to share device virtual memory among processes.
Values:
-
enumerator CUSTATEVEC_EX_MEMORY_SHARING_METHOD_AUTODETECT = 0#
Auto-detect.
-
enumerator CUSTATEVEC_EX_MEMORY_SHARING_METHOD_NONE = 1#
No P2P memory sharing.
-
enumerator CUSTATEVEC_EX_MEMORY_SHARING_METHOD_FABRIC_HANDLE = 2#
Use FabricHandle.
-
enumerator CUSTATEVEC_EX_MEMORY_SHARING_METHOD_PIDFD = 3#
Use pidfd syscalls with POSIX file descriptor.
-
enumerator CUSTATEVEC_EX_MEMORY_SHARING_METHOD_AUTODETECT = 0#
Structures#
custatevecExSVUpdaterConfigItem_t#
-
struct custatevecExSVUpdaterConfigItem_t#
Specifies the configuration item of SVUpdater.
Public Members
-
custatevecExSVUpdaterConfigName_t name#
Configuration name.
-
int32_t int32#
int32 value
-
char placeholder[32]#
Placeholder to keep 32 bytes for the value member.
-
union custatevecExSVUpdaterConfigItem_t::[anonymous] value#
Configuration value.
-
custatevecExSVUpdaterConfigName_t name#
cuStateVec Ex functions#
Dictionary API#
custatevecExDictionaryDestroy#
- custatevecStatus_t custatevecExDictionaryDestroy(
- custatevecExDictionaryDescriptor_t dictionary
Destroy dictionary instance.
custatevecExDictionaryDestroy() destroys dictionary instance. Dictionary is the object to hold key-value pairs.
- Parameters:
dictionary – [in] dictionary descriptor instance
Communicator API#
custatevecExCommunicatorInitialize#
- custatevecStatus_t custatevecExCommunicatorInitialize(
- custatevecCommunicatorType_t communicatorType,
- const char *libraryPath,
- int *argc,
- char ***argv,
- custatevecExCommunicatorStatus_t *exCommStatus
Initialize inter-process communication.
The communicator is the abstraction of inter-process communication in cuStateVec Ex API. This function initializes the underlying inter-process communication library and prepares it for creating communicator instances via custatevecExCommunicatorCreate().
The library provides two built-in communicator implementations for Open MPI and MPICH. To use these, specify CUSTATEVEC_COMMUNICATOR_TYPE_OPENMPI or CUSTATEVEC_COMMUNICATOR_TYPE_MPICH for
communicatorType. The API loads the MPI library from the path specified bylibraryPath. If null, it defaults to “libmpi.so” and follows the standard library search order.The
exCommStatusargument returns the result of the call to the init() method of the underlying implementation. For built-in communicators, it returns the return value of MPI_Init() ascustatevecExCommunicatorStatus_t.The built-in implementation skips calling MPI_Init() if MPI is already initialized. In this case, CUSTATEVEC_EX_COMMUNICATOR_STATUS_SUCCESS is returned to the
exCommStatusargument.To use other inter-process communication libraries, users can build a custom communicator. To use it, specify CUSTATEVEC_COMMUNICATOR_TYPE_EXTERNAL for
communicatorTypeand provide the path to the custom communicator library inlibraryPath. IflibraryPathis null for external communicators, the API searches for the required symbols in the current process, allowing applications to provide their own communicator implementation without a separate shared library.This API returns successfully if the specified shared object is properly loaded and the IPC library is properly initialized.
Note
This function is intended for application initialization. It should be called once per process before any other communicator operations.
Note
The communicator dynamically loads the library that provides inter-process communication features. During the lifetime of the application, only one library may be used. This API returns CUSTATEVEC_STATUS_ALREADY_INITIALIZED on successive calls after the first successful initialization. If an application directly links to an MPI library, the communicator should use the same library binary. Otherwise, this API may fail or the communicator may not function properly.
- Parameters:
communicatorType – [in] communicator type
libraryPath – [in] path to the inter-process communication library (can be null)
argc – [inout] pointer to argument count
argv – [inout] pointer to argument vector
exCommStatus – [out] pointer to the variable that receives the status from communicator’s init() method
custatevecExCommunicatorFinalize#
- custatevecStatus_t custatevecExCommunicatorFinalize(
- custatevecExCommunicatorStatus_t *exCommStatus
Finalize inter-process communication library.
This function finalizes the underlying inter-process communication library and releases all associated resources.
The
exCommStatusargument returns the result of the call to the communicator provider’s finalize() method. For built-in communicators, it returns the return value of MPI_Finalize() ascustatevecExCommunicatorStatus_t.The built-in implementation skips calling MPI_Finalize() if MPI_Init() was not called by custatevecExCommunicatorInitialize(). In this case, CUSTATEVEC_EX_COMMUNICATOR_STATUS_SUCCESS is returned.
Note
This function is intended for application finalization. All communicator instances must be destroyed before calling this function.
- Parameters:
exCommStatus – [out] pointer to the variable that receives the status from communicator’s finalize() method
custatevecExCommunicatorGetSizeAndRank#
- custatevecStatus_t custatevecExCommunicatorGetSizeAndRank(
- int32_t *size,
- int32_t *rank,
- custatevecExCommunicatorStatus_t *exCommStatus
Get the global size and rank.
This function retrieves the number of processes and the rank of the calling process. These values are identical to those obtained from MPI_COMM_WORLD for MPI-based communicator.
The
sizeargument returns the global number of processes. Therankargument returns the global rank of the calling process (0-indexed, range [0, size)). TheexCommStatusargument returns the status of the communicator operations.custatevecExCommunicatorInitialize() must be called before calling this function. If the IPC library is not initialized, this function returns CUSTATEVEC_STATUS_NOT_INITIALIZED.
- Parameters:
size – [out] pointer to the variable that receives the global number of processes
rank – [out] pointer to the variable that receives the global rank of the calling process
exCommStatus – [out] pointer to the variable that receives the status from communicator operations
custatevecExCommunicatorCreate#
- custatevecStatus_t custatevecExCommunicatorCreate(
- custatevecExCommunicatorDescriptor_t *exCommunicator
Create communicator instance.
This function creates a communicator instance using the IPC library initialized by custatevecExCommunicatorInitialize().
custatevecExCommunicatorInitialize() must be called before calling this function. If the IPC library is not initialized, this function returns CUSTATEVEC_STATUS_NOT_INITIALIZED.
- Parameters:
exCommunicator – [out] pointer to the variable that receives the created communicator instance
custatevecExCommunicatorDestroy#
- custatevecStatus_t custatevecExCommunicatorDestroy(
- custatevecExCommunicatorDescriptor_t exCommunicator
Destroy communicator instance.
This function destroys a communicator instance and releases all associated resources.
- Parameters:
exCommunicator – [in] communicator instance to destroy
StateVector API#
custatevecExConfigureStateVectorSingleDevice#
- custatevecStatus_t custatevecExConfigureStateVectorSingleDevice(
- custatevecExDictionaryDescriptor_t *svConfig,
- cudaDataType_t svDataType,
- int32_t numWires,
- int32_t numDeviceWires,
- int32_t deviceId,
- uint32_t capability
Create configuration for single device state vector.
This function creates a dictionary that holds state vector configuration for a single device state vector according to the given set of arguments. The state vector will be allocated on the single device specified by
deviceId.The
numWiresargument specifies the number of wires of state vector, and thenumDeviceWiresargument specifies the number of wires allocated on the device.The
capabilityargument is to enable optional features. The value is specified as a bit-wise OR of custatevecExStateVectorCapability_t. As the present version does not have any capability defined, the value should be 0.For the present release, the same value should be specified to the
numWiresandnumDeviceWiresarguments. These two arguments are declared for a future extension.Note
This function creates a logical configuration and does not validate actual system or hardware requirements (e.g., non-existent deviceId, required memory capacity). Validation occurs when calling custatevecExStateVectorCreateSingleProcess() to create the state vector instance.
- Parameters:
svConfig – [out] dictionary instance that holds state vector configuration
svDataType – [in] state vector data type
numWires – [in] number of wires of state vector
numDeviceWires – [in] number of wires of state vector on device
deviceId – [in] device id where the entire state vector will be allocated
capability – [in] bit mask to specify optional features of state vector
custatevecExConfigureStateVectorMultiDevice#
- custatevecStatus_t custatevecExConfigureStateVectorMultiDevice(
- custatevecExDictionaryDescriptor_t *svConfig,
- cudaDataType_t svDataType,
- int32_t numWires,
- int32_t numDeviceWires,
- const int32_t *deviceIds,
- int32_t numDevices,
- custatevecDeviceNetworkType_t networkType,
- uint32_t capability
Create configuration for multi-device state vector.
This function creates a dictionary that holds state vector configuration for multi-device state vector within a single process. The state vector is distributed across multiple devices specified by
deviceIds. The specified devices should be able to communicate with each other by GPUDirect P2P.The
numDevicesshould be a power-of-two value. The following relationship should be satisfied:numWires = log2(numDevices) + numDeviceWires
The
networkTypeargument specifies the device interconnect topology. Use CUSTATEVEC_DEVICE_NETWORK_TYPE_SWITCH for GPUs connected by switches (e.g., via NVSwitch or PCIe switch) or CUSTATEVEC_DEVICE_NETWORK_TYPE_FULLMESH for direct all-to-all connectivity (e.g., direct NVLink mesh).The
capabilityargument enables optional features. The value should be 0 as no capabilities are currently defined.Note
This function creates a logical configuration and does not validate actual system or hardware requirements (e.g., non-existent deviceIds, P2P capability, required memory capacity). Validation occurs when calling custatevecExStateVectorCreateSingleProcess() to create the state vector instance.
- Parameters:
svConfig – [out] dictionary instance that holds state vector configuration
svDataType – [in] state vector data type
numWires – [in] number of wires of state vector
numDeviceWires – [in] number of wires of state vector on each device
deviceIds – [in] host pointer to an array of device ids
numDevices – [in] number of devices
networkType – [in] device network topology type
capability – [in] bit mask to specify optional features of state vector
custatevecExConfigureStateVectorMultiProcess#
- custatevecStatus_t custatevecExConfigureStateVectorMultiProcess(
- custatevecExDictionaryDescriptor_t *svConfig,
- cudaDataType_t svDataType,
- int32_t numWires,
- int32_t numDeviceWires,
- int32_t deviceId,
- custatevecExMemorySharingMethod_t memorySharingMethod,
- const custatevecExGlobalIndexBitClass_t *globalIndexBitClasses,
- const int32_t *numGlobalIndexBitsPerLayer,
- int32_t numGlobalIndexBitLayers,
- size_t transferWorkspaceSizeInBytes,
- const void *auxConfig,
- uint32_t capability
Create configuration for multi-process distributed state vector.
This function creates a dictionary that holds state vector configuration for multi-process distributed state vector according to the given set of arguments. In this configuration, the state vector is distributed across multiple processes, with each process owning a single device to allocate one sub state vector.
The
numWiresargument specifies the number of wires of state vector, and thenumDeviceWiresargument specifies the number of wires allocated on each device. The following relationship should be satisfied where numProcesses is the number of processes to which state vector is distributed.numWires = log2(numProcesses) + numDeviceWires
The
deviceIdargument specifies the device ID where the sub state vector will be allocated. If set to -1, the device ID will be dynamically assigned during state vector creation using the formula: deviceId = processRank % numDevicesInHardwareNode, where processRank is obtained from the communicator and numDevicesInHardwareNode is the total number of CUDA devices available in the hardware node.The
memorySharingMethodargument specifies the method for sharing GPU virtual device memory between processes by inter-process GPUDirect P2P. Virtual device memory is utilized to allocate the device memory for state vector. If CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_INTERPROC_P2P is specified inglobalIndexBitClasses, it’s required to specify an available memory sharing method. Use CUSTATEVEC_EX_MEMORY_SHARING_METHOD_AUTODETECT for automatic selection, or specify a particular method when the suitable sharing method is known. CUSTATEVEC_EX_MEMORY_SHARING_METHOD_FABRIC_HANDLE is the requirement to enable multi-node NVLink. CUSTATEVEC_EX_MEMORY_SHARING_METHOD_PIDFD uses POSIX file descriptors to export and import virtual device memory among processes. All GPUs that are assigned to distributed processes should be in a single hardware node due to the limitation of POSIX file descriptor sharing.If CUSTATEVEC_EX_GLOBAL_INDEX_BIT_CLASS_INTERPROC_P2P is not specified in
globalIndexBitClasses, use CUSTATEVEC_EX_MEMORY_SHARING_METHOD_NONE that indicates no memory sharing is required. If another memory sharing method is specified, it will be silently ignored.The process-to-process communication is organized in layers that correspond to network hierarchy. Each layer represents a different communication datalink (e.g., PCIe Switch, NVLink with NVSwitch) between processes with appropriate communication methods.
The
globalIndexBitClasses,numGlobalIndexBitsPerLayer, andnumGlobalIndexBitLayersarguments specify the communication datalink in each network layer:globalIndexBitClasses[i] specifies the communication class for layer i, which should be one of custatevecExGlobalIndexBitClass_t values (network topology types, P2P variants, or communicator).numGlobalIndexBitsPerLayer[i] specifies the number of global index bits assigned to layer i.numGlobalIndexBitLayersspecifies the total number of layers. The max acceptable number is 8.
The sum of all elements in
numGlobalIndexBitsPerLayershould equal log2(numProcesses).The
transferWorkspaceSizeInBytesargument specifies the size in bytes of the device buffer allocated for inter-process data transfers. This device buffer serves as workspace during communication between processes. The API may internally adjust the specified value to the closest power-of-two number that is smaller than or equal to the given value. Larger buffer sizes can improve performance depending on the system hardware configuration. If the specified size is too small for optimal performance, the API will automatically increase it to an appropriate value.The
auxConfigargument specifies auxiliary configuration setup. No auxiliary configuration is currently defined; this argument must be NULL.The
capabilityargument is to enable optional features. The value is specified as a bit-wise OR of custatevecExStateVectorCapability_t. As the present version does not have any capability defined, the value should be 0.Note
This function creates a logical configuration and does not validate actual system or hardware requirements (e.g., non-existent deviceId, memory sharing method availability, required memory capacity). Validation occurs when calling custatevecExStateVectorCreateMultiProcess() to create the state vector instance, which may return CUSTATEVEC_STATUS_INVALID_CONFIGURATION if the system does not support the requested configuration.
- Parameters:
svConfig – [out] dictionary instance that holds state vector configuration
svDataType – [in] state vector data type
numWires – [in] number of wires of state vector
numDeviceWires – [in] number of wires of state vector on each device
deviceId – [in] device id for this process
memorySharingMethod – [in] method for sharing GPU virtual device memory between processes
globalIndexBitClasses – [in] host pointer to an array of global index bit classes for each layer
numGlobalIndexBitsPerLayer – [in] host pointer to an array specifying number of global index bits per layer
numGlobalIndexBitLayers – [in] number of global index bit layers
transferWorkspaceSizeInBytes – [in] size in bytes of workspace memory for inter-process data transfers
auxConfig – [in] pointer to auxiliary configuration
capability – [in] bit mask to specify optional features of state vector
custatevecExStateVectorCreateSingleProcess#
- custatevecStatus_t custatevecExStateVectorCreateSingleProcess(
- custatevecExStateVectorDescriptor_t *stateVector,
- const custatevecExDictionaryDescriptor_t svConfig,
- cudaStream_t const *streams,
- int32_t numStreams,
- custatevecExResourceManagerDescriptor_t resourceManager
Create state vector.
This function creates a state vector instance according to the
svConfigargument and returns the instance to thestateVectorargument. ThesvConfigshould be created by custatevecExConfigureStateVectorSingleDevice() or custatevecExConfigureStateVectorMultiDevice().The
streamsandnumStreamsarguments specify CUDA streams. CUDA API calls and kernel launches are serialized on the given streams. The number of streams should match the number of devices where the state vector is allocated. All CUDA calls serialized on the streams are synchronized by calling custatevecExStateVectorSynchronize(). If a null pointer is passed to thestreamsargument, all calls are serialized on the default streams. In this case, the value of thenumStreamsargument should be 0.The
resourceManagerargument specifies the resource manager. If a null pointer is specified, the library default resource manager is used.This API returns CUSTATEVEC_STATUS_INVALID_CONFIGURATION if an argument specified by custatevecExConfigureStateVectorSingleDevice() or custatevecExConfigureStateVectorMultiDevice() is invalid, such as specifying device ID that does not exist.
When using multiple devices (multi-device state vector), the following requirements must be met:
All devices must be of the same GPU generation (same compute capability).
GPUDirect P2P must be available between all devices. If these requirements are not met, this API returns CUSTATEVEC_STATUS_INVALID_CONFIGURATION.
Note
Custom resource manager is not enabled in this release. Please pass nullptr to the
resourceManagerargument. Otherwise, this function returns CUSTATEVEC_STATUS_INVALID_VALUE.- Parameters:
stateVector – [out] a host pointer to the variable that receives state vector instance
svConfig – [in] state vector configuration created by a state vector configuration function
streams – [in] a pointer to a host array that holds CUDA streams
numStreams – [in] the number of streams given by the streams argument
resourceManager – [in] resource manager
custatevecExStateVectorCreateMultiProcess#
- custatevecStatus_t custatevecExStateVectorCreateMultiProcess(
- custatevecExStateVectorDescriptor_t *stateVector,
- const custatevecExDictionaryDescriptor_t svConfig,
- cudaStream_t stream,
- custatevecExCommunicatorDescriptor_t exCommunicator,
- custatevecExResourceManagerDescriptor_t resourceManager
Create multi-process distributed state vector.
This function creates a multi-process distributed state vector instance according to the
svConfigargument and returns the instance to thestateVectorargument. ThesvConfigshould be created by custatevecExConfigureStateVectorMultiProcess(). The state vector is distributed across multiple processes. Each process owns single device to allocate one sub state vector.The
streamargument specifies the CUDA stream for this process, corresponding to the fact that one process manages one device. All CUDA calls in this process are serialized on this stream and synchronized by calling custatevecExStateVectorSynchronize(). A null pointer is treated as the CUDA default stream.The
exCommunicatorargument specifies a custatevecExCommunicatorDescriptor_t instance that is created by custatevecExCommunicatorCreate().The
resourceManagerargument specifies the resource manager.This API returns CUSTATEVEC_STATUS_INVALID_CONFIGURATION if an argument specified by custatevecExConfigureStateVectorMultiProcess() is invalid, such as specifying
deviceIdthat does not exist,memorySharingMethodis not available on the system, or the number of processes does not match the configuration.The number of processes must be a power-of-two number greater than 1. The number of processes must equal (1 << numGlobalBits) where numGlobalBits is the sum of all values in
numGlobalIndexBitsPerLayeras specified in custatevecExConfigureStateVectorMultiProcess().This API returns CUSTATEVEC_STATUS_NOT_SUPPORTED if no CUDA-capable GPU devices are found in the system.
Note
Custom resource manager is not enabled in this release. Please pass nullptr to the
resourceManagerargument. Otherwise, this function returns CUSTATEVEC_STATUS_INVALID_VALUE.- Parameters:
stateVector – [out] a host pointer to the variable that receives state vector instance
svConfig – [in] state vector configuration created by custatevecExConfigureStateVectorMultiProcess
stream – [in] CUDA stream for this process
exCommunicator – [in] communicator descriptor for inter-process communication
resourceManager – [in] resource manager
custatevecExStateVectorDestroy#
- custatevecStatus_t custatevecExStateVectorDestroy(
- custatevecExStateVectorDescriptor_t stateVector
Destroy state vector instance.
This function destroys state vector instance.
- Parameters:
stateVector – [in] state vector instance
custatevecExStateVectorGetProperty#
- custatevecStatus_t custatevecExStateVectorGetProperty(
- const custatevecExStateVectorDescriptor_t stateVector,
- custatevecExStateVectorProperty_t property,
- void *value,
- size_t sizeInBytes
Retrieve state vector properties.
This function retrieves state vector properties. The
propertyargument specifies one of properties, and the property value is returned to the host buffer specified by thevalueargument.The following is the table for properties and corresponding data types.
data type
cudaDataType_t
int32_t
int32_t[]
int32_t
int32_t
int32_t[]
Each property enum returns the values as described below.
CUSTATEVEC_EX_SV_PROP_DISTRIBUTION_TYPE returns the distribution type of state vector.
CUSTATEVEC_EX_SV_PROP_DATA_TYPE returns the data type of state vector. The value is CUDA_C_32F or CUDA_C_64F.
CUSTATEVEC_EX_SV_PROP_NUM_WIRES returns the number of wires.
CUSTATEVEC_EX_SV_PROP_WIRE_ORDERING returns the wire ordering of state vector as int32_t array. The length of array is the number of wires.
CUSTATEVEC_EX_SV_PROP_NUM_LOCAL_WIRES returns the number of local wires that is wires local to the sub state vector.
CUSTATEVEC_EX_SV_PROP_NUM_DEVICE_SUBSVS returns the number of sub state vectors placed on device.
CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES returns the array of the sub state vector indices placed on device(s). The array length is the number of the sub state vectors placed on device.
The
sizeInBytesargument specifies the byte size of thevaluebuffer and should be equal to or larger than the required byte size. Otherwise, this function returns CUSTATEVEC_STATUS_INVALID_VALUE.- Parameters:
stateVector – [in] state vector instance
property – [in] a value of custatevecExStateVectorProperty_t
value – [out] host pointer to a host buffer that receives the value of the specified property
sizeInBytes – [in] byte size of the value buffer
custatevecExStateVectorSetMathMode#
- custatevecStatus_t custatevecExStateVectorSetMathMode(
- custatevecExStateVectorDescriptor_t stateVector,
- custatevecMathMode_t mode
Set the compute precision mode for a state vector instance.
This function sets the compute precision mode for the specified state vector instance. Each state vector can have its own compute precision mode independently.
The compute precision mode controls the precision and performance characteristics of mathematical operations performed on the state vector.
The default math mode for the cuStateVec Ex API is CUSTATEVEC_MATH_MODE_ALLOW_FP32_EMULATED_BF16X9. To disable the use of BF16x9 floating point emulation, set the mode to CUSTATEVEC_MATH_MODE_DISALLOW_FP32_EMULATED_BF16X9.
- Parameters:
stateVector – [inout] state vector instance
mode – [in] Compute precision mode as defined by custatevecMathMode_t.
custatevecExStateVectorSetZeroState#
- custatevecStatus_t custatevecExStateVectorSetZeroState(
- custatevecExStateVectorDescriptor_t stateVector
Set the zero state to state vector.
This function sets the zero state (|0000…00>).
- Parameters:
stateVector – [inout] state vector instance
custatevecExStateVectorGetState#
- custatevecStatus_t custatevecExStateVectorGetState(
- const custatevecExStateVectorDescriptor_t stateVector,
- void *state,
- cudaDataType_t dataType,
- custatevecIndex_t begin,
- custatevecIndex_t end,
- int32_t maxNumConcurrentCopies
Copy state vector elements to host buffer.
State vector elements in [
begin,end) are copied to the host buffer specified by thestateargument. The data type specified by thedataTypeargument should equal to that specified when configuring state vector. The host buffer should be large enough to hold copied elements.State vector elements in the range [
begin,end) should be accessible in the current process. For single-device state vector, the whole state vector is allocated in a single process. For multi-device state vector, the state vector is sliced and distributed to multiple devices. However, all state vector elements are accessible in the process. Thus, [begin, end) is in the range of [0, svSize). For multi-process state vectors, the state vector is distributed to multiple processes. Each process has one process-local slice that is called sub-state vector. The range must be within: [(subSV_index * subSV_size), (subSV_index + 1) * subSV_size), where subSV_index is the sub-state vector index and subSV_size is the sub-state vector size. custatevecExStateVectorGetProperty with CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES returns the sub-state vector indices allocated in the process.The
dataTypeargument should be identical to that of the state vector instance specified by thestateVectorargument. Otherwise, this API returns CUSTATEVEC_STATUS_INVALID_VALUE.The
maxNumConcurrentCopiesargument specifies the max number of concurrent copies that can be utilized during the copy of state vector elements. The actual number of concurrent copies is implementation defined.Note
The call to custatevecExStateVectorGetState() can be asynchronous. Please call custatevecExStateVectorSynchronize() to complete the copying of state vector elements.
- Parameters:
stateVector – [in] state vector instance
state – [out] pointer to a host buffer that receives state vector elements
dataType – [in] dataType of the state vector elements
begin – [in] index of state vector element where the copy begins
end – [in] index of state vector element where the copy ends
maxNumConcurrentCopies – [in] Max number of parallel copies.
custatevecExStateVectorSetState#
- custatevecStatus_t custatevecExStateVectorSetState(
- custatevecExStateVectorDescriptor_t stateVector,
- const void *state,
- cudaDataType_t dataType,
- custatevecIndex_t begin,
- custatevecIndex_t end,
- int32_t maxNumConcurrentCopies
Set complex value array on host to state vector.
Complex values given by the
stateargument are copied to the specified state vector. The copy range in the state vector index is [begin,end). The data type of complex values should equal to that specified on configuring state vector.State vector elements in the range [
begin,end) should be accessible in the current process. For single-device state vector, the whole state vector is allocated in a single process. For multi-device state vector, the state vector is sliced and distributed to multiple devices. However, all state vector elements are accessible in the process. Thus, [begin, end) is in the range of [0, svSize). For multi-process state vectors, the state vector is distributed to multiple processes. Each process has one process-local slice that is called sub-state vector. The range must be within: [(subSV_index * subSV_size), (subSV_index + 1) * subSV_size), where subSV_index is the sub-state vector index and subSV_size is the sub-state vector size. custatevecExStateVectorGetProperty with CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES returns the sub-state vector indices allocated in the process.The
maxNumConcurrentCopiesargument specifies the max number of concurrent copies that can be utilized during the copy of state vector elements. The actual number of concurrent copies is implementation defined.Note
The call to custatevecExStateVectorSetState() can be asynchronous. Please call custatevecExStateVectorSynchronize() to complete the copying of state vector elements.
- Parameters:
stateVector – [out] state vector instance
state – [in] pointer to a complex vector on host
dataType – [in] dataType of the state vector elements
begin – [in] index of state vector element where the copy begins
end – [in] index of state vector element where the copy ends
maxNumConcurrentCopies – [in] Max number of parallel copies.
custatevecExStateVectorReassignWireOrdering#
- custatevecStatus_t custatevecExStateVectorReassignWireOrdering(
- custatevecExStateVectorDescriptor_t stateVector,
- const int32_t *wireOrdering,
- int32_t wireOrderingLen
Reassign wire ordering to state vector.
Reassign (overwrite) wire ordering of the given state vector. The elements given by the
wireOrderingargument should be in [0,numWires) wherenumWiresrepresents the number of wires of the state vector. Otherwise this API returns CUSTATEVEC_STATUS_INVALID_VALUE. ThewireOrderingLenargument represents the length of wire ordering. The value should match the number of wires. Otherwise this API returns CUSTATEVEC_STATUS_INVALID_VALUE.- Parameters:
stateVector – [inout] state vector instance
wireOrdering – [in] the pointer to a integer host array that holds wire ordering
wireOrderingLen – [in] the length of wire ordering
custatevecExStateVectorPermuteIndexBits#
- custatevecStatus_t custatevecExStateVectorPermuteIndexBits(
- custatevecExStateVectorDescriptor_t stateVector,
- const int32_t *permutation,
- int32_t permutationLen,
- custatevecExPermutationType_t permutationType
Permute index bits and wires of state vector.
Permutes index bits of state vector, and the wire ordering is accordingly updated to reflect the permuted index bit ordering.
Permutation is specified by the
permutationargument as an integer array. ThepermutationLenargument specifies the length of the permutation. The function can apply two types of permutation, scatter or gather, which is specified by thepermutationTypeargument.The wire ordering is updated as defined in the following formulas.
// When CUSTATEVEC_EX_PERMUTATION_SCATTER specified. dstWires[permutation[idx]] = srcWires[idx] // When CUSTATEVEC_EX_PERMUTATION_GATHER specified. dstWires[idx] = srcWires[permutation[idx]]
By definition, the elements of permutation array should be integers in [0, numWires - 1] where numWires represents the number of wires of the specified state vector instance. If CUSTATEVEC_EX_PERMUTATION_SCATTER is specified to the
permutationType, the permutation length should be identical to the number of wires. If CUSTATEVEC_EX_PERMUTATION_GATHER is specified to thepermutationTypeandpermutationLenis smaller than the number of wires in state vector, wires that don’t appear in thepermutationargument are sorted and appended to the permutation array.Ex. numWires = 5, permutation = {0, 2, 3}, permutationLen = 3. Wires {1, 4} don’t appear. Those two wires are sorted and appended to the permutation. Therefore, the complemented permutation is {0, 2, 3, 1, 4}.
- Parameters:
stateVector – [inout] StateVector instance
permutation – [in] a host pointer to an integer array specifying the permutation
permutationLen – [in] length of the permutation
permutationType – [in] permutation type (scatter or gather)
custatevecExStateVectorGetResourcesFromDeviceSubSV#
- custatevecStatus_t custatevecExStateVectorGetResourcesFromDeviceSubSV(
- custatevecExStateVectorDescriptor_t stateVector,
- int32_t subSVIndex,
- int32_t *deviceId,
- void **d_subSV,
- cudaStream_t *stream,
- custatevecHandle_t *handle
Get resources from device sub state vector.
Get the computing resource associated with the device state vector. The
subSVIndexargument specifies the sub state vector index. ThedeviceId,d_subSVarguments return the device id, the device memory pointer of the specified sub state vector. Thestreamandhandlearguments return the CUDA stream and the cuStateVec handle. All CUDA calls in custatevecEx API are serialized on the returned stream. The returned cuStateVec handle can be passed to cuStateVec APIs to operate on the returned device pointer.The number of the device sub state vectors is retrieved by calling custatevecExStateVectorGetProperty() API with CUSTATEVEC_EX_SV_PROP_NUM_DEVICE_SUBSVS specified as the property name, and the sub state vector indices are retrieved by specifying CUSTATEVEC_EX_SV_PROP_DEVICE_SUBSV_INDICES, respectively. A single-device state vector always has one device sub-state vector.
The
deviceId,d_subSV, andstreamarguments must not be null pointers. Otherwise, this API returns CUSTATEVEC_STATUS_INVALID_VALUE. A null pointer can be passed to thehandleargument. For this case, the cuStateVec handle will not be returned.- Parameters:
stateVector – [in] StateVector instance
subSVIndex – [in] sub state vector index
deviceId – [out] device id
d_subSV – [out] a host pointer that receives the specified sub state vector pointer.
stream – [out] a host pointer to CUDA stream
handle – [out] a host pointer to custatevecHandle_t
custatevecExStateVectorGetResourcesFromDeviceSubSVView#
- custatevecStatus_t custatevecExStateVectorGetResourcesFromDeviceSubSVView(
- const custatevecExStateVectorDescriptor_t stateVector,
- int32_t subSVIndex,
- int32_t *deviceId,
- const void **d_subSV,
- cudaStream_t *stream,
- custatevecHandle_t *handle
Get resources from device sub state vector view.
This API works almost the same as custatevecExStateVectorGetResourcesFromDeviceSubSV() except for the returned device pointer is immutable.
Please refer to the documentation of custatevecExStateVectorGetResourcesFromDeviceSubSV() API for the descriptions of the arguments.
- Parameters:
stateVector – [in] StateVector instance
subSVIndex – [in] sub state vector index
deviceId – [out] device id
d_subSV – [out] a host pointer that receives the specified sub state vector pointer.
stream – [out] a host pointer to CUDA stream
handle – [out] a host pointer to custatevecHandle_t
custatevecExStateVectorSynchronize#
- custatevecStatus_t custatevecExStateVectorSynchronize(
- custatevecExStateVectorDescriptor_t stateVector
Flush all operations and synchronize.
This function flushes all operations issued before the call of this function and synchronizes all streams associated with the specified state vector instance. For multi-process state vectors, a barrier among processes will be issued in addition to the synchronization in the current process.
- Parameters:
stateVector – [in] state vector instance
Simulator API#
custatevecExAbs2SumArray#
- custatevecStatus_t custatevecExAbs2SumArray(
- custatevecExStateVectorDescriptor_t stateVector,
- double *abs2sum,
- const int32_t *outputOrdering,
- int32_t outputOrderingLen,
- const int32_t *maskBitString,
- const int32_t *maskWireOrdering,
- int32_t maskLen
Calculate abs2sum array for a given set of wires.
Calculates an array of sums of squared absolute values of state vector elements. The abs2sum array can be on host or device. The tensor ordering of the abs2sum array is specified by the
outputOrderingand theoutputOrderingLenarguments. Unspecified wires are folded (summed up).The
maskBitString,maskWireOrderingandmaskLenarguments set a bit string to mask the state vector. The abs2sum array is calculated by using state vector elements that match the mask bit string. If themaskLenargument is 0, null pointers can be specified to themaskBitStringandmaskWireOrderingarguments, and all state vector elements are used for calculation.By definition, all values in
outputOrderingandmaskWireOrderingarguments should be in [0, numWires). TheoutputOrderingandmaskWireOrderingarguments should not contain overlapping or duplicate wires. This function returns CUSTATEVEC_STATUS_INVALID_WIRE if the above requirements are not satisfied.The empty
outputOrderingcan be specified to calculate the norm of state vector. In this case, 0 is passed to theoutputOrderingLenargument and theoutputOrderingargument can be a null pointer.For distributed state vector configurations, the
abs2sumargument should point to an array on the host.Note
Since the size of abs2sum array is proportional to \( 2^{outputOrderingLen} \) , the max length of
outputOrderingdepends on the amount of available memory andmaskLen.- Parameters:
stateVector – [in] StateVector instance
abs2sum – [out] pointer to a host or device array of sums of squared absolute values
outputOrdering – [in] pointer to a host array of output tensor ordering
outputOrderingLen – [in] the length of outputOrdering
maskBitString – [in] pointer to a host array for a bit string to specify mask bits
maskWireOrdering – [in] pointer to a host array that specifies the wire ordering of maskBitString
maskLen – [in] the length of mask
custatevecExMeasure#
- custatevecStatus_t custatevecExMeasure(
- custatevecExStateVectorDescriptor_t stateVector,
- custatevecIndex_t *bitString,
- const int32_t *bitStringOrdering,
- int32_t bitStringOrderingLen,
- double randnum,
- custatevecCollapseOp_t collapse,
- const void *reserved
Perform qubit measurements.
This function executes multiple single qubit measurements with a single call and returns a bit string that represents the measurement outcomes. The
bitStringOrderingargument specifies wires to be measured.The measurement result is stored in
bitStringas a 64-bit integer bit mask. The ordering ofbitStringis specified by thebitStringOrderingandbitStringOrderingLenarguments. The idx-th bit ofbitStringcorresponds to the measurement outcome of bitStringOrdering[idx].By definition, all values in
bitStringOrderingarguments should be in [0, numWires). ThebitStringOrderingargument should not contain duplicate wires. This function returns CUSTATEVEC_STATUS_INVALID_WIRE if the above requirements are not satisfied.The
collapseargument specifies the operation applied for the state vector.If CUSTATEVEC_COLLAPSE_NONE is specified, this function only returns the measured bit string without modifying the state vector.
If CUSTATEVEC_COLLAPSE_NORMALIZE_AND_ZERO is specified, this function collapses the state vector.
If CUSTATEVEC_COLLAPSE_RESET is specified, the state vector is collapsed as CUSTATEVEC_COLLAPSE_NORMALIZE_AND_ZERO does. Then, the measurement outcome is checked. If the measurement outcome for a specified wire is |1>, the wire is flipped (reset) to |0>. Otherwise, the state vector is not modified.
If a random number is not in [0, 1), this function returns CUSTATEVEC_STATUS_INVALID_VALUE. At least one wire should be specified, otherwise this function returns CUSTATEVEC_STATUS_INVALID_VALUE.
- Parameters:
stateVector – [in] StateVector instance
bitString – [out] host pointer that receives the measured bit string
bitStringOrdering – [in] pointer to a host array of bit string ordering
bitStringOrderingLen – [in] length of bitStringOrdering
randnum – [in] random number, [0, 1).
collapse – [in] Collapse operation
reserved – [in] Reserved argument. A null pointer should be passed.
custatevecExSample#
- custatevecStatus_t custatevecExSample(
- custatevecExStateVectorDescriptor_t stateVector,
- custatevecIndex_t *bitStrings,
- const int32_t *bitStringOrdering,
- int32_t bitStringOrderingLen,
- const double *randnums,
- int32_t numShots,
- custatevecSamplerOutput_t output,
- const void *reserved
Sample bit strings from the state vector.
This function performs sampling. The
bitStringOrderingandbitStringOrderingLenarguments specify wires to be sampled. Sampled bit strings are represented as an array of custatevecIndex_t and are stored to the host memory buffer that thebitStringsargument points to.By definition, all values in
bitStringOrderingarguments should be in [0, numWires). ThebitStringOrderingargument should not contain duplicate wires. This function returns CUSTATEVEC_STATUS_INVALID_WIRE if the above requirements are not satisfied.The
randnumsargument is an array of user-generated random numbers whose length isnumShots. The range of random numbers should be in [0, 1). A random number given by therandnumsargument is clipped to [0, 1) if its range is not in [0, 1).The
outputargument specifies the order of sampled bit strings:If CUSTATEVEC_SAMPLER_OUTPUT_RANDNUM_ORDER is specified, the order of sampled bit strings is the same as that in the
randnumsargument.If CUSTATEVEC_SAMPLER_OUTPUT_ASCENDING_ORDER is specified, bit strings are returned in the ascending order.
- Parameters:
stateVector – [in] State vector instance
bitStrings – [out] pointer to a host array to store sampled bit strings
bitStringOrdering – [in] pointer to a host array of bit string ordering for sampling
bitStringOrderingLen – [in] length of bitStringOrdering
randnums – [in] pointer to an array of random numbers
numShots – [in] the number of shots
output – [in] the order of sampled bit strings
reserved – [in] Reserved argument. A null pointer should be passed.
custatevecExApplyMatrix#
- custatevecStatus_t custatevecExApplyMatrix(
- custatevecExStateVectorDescriptor_t stateVector,
- const void *matrix,
- cudaDataType_t matrixDataType,
- custatevecExMatrixType_t exMatrixType,
- custatevecMatrixLayout_t layout,
- int32_t adjoint,
- const int32_t *targets,
- int32_t numTargets,
- const int32_t *controls,
- const int32_t *controlBitValues,
- int32_t numControls
Apply gate matrix.
Apply gate matrix for state vector.
The
matrixargument is a host or device pointer that points to matrix element array. For distributed state vector configurations, thematrixargument should be a host pointer. Otherwise, this function returns CUSTATEVEC_STATUS_INVALID_VALUE. ThematrixDataTypeargument specifies the data type of matrix elements. The value should be CUDA_C_32F or CUDA_C_64F for complex 64 or complex 128 type, respectively.The
exMatrixTypeargument specifies the matrix types, dense (square), diagonal or anti-diagonal. The memory layout of the matrix is row- or column-major, which is specified by thelayoutargument.The matrix type is dense (square) if CUSTATEVEC_EX_MATRIX_DENSE is specified. The
matrixargument points to a buffer that holds dense matrix as two-dimensional array. The matrix dimension is ( \(2^\text{numTargets} \times 2^\text{numTargets}\) ). The memory layout follows the specification of thelayoutargument. The layout is row-major if CUSTATEVEC_MATRIX_LAYOUT_ROW, or column-major if CUSTATEVEC_MATRIX_LAYOUT_COL is specified.The matrix type is diagonal if CUSTATEVEC_EX_MATRIX_DIAGONAL is specified. The
matrixargument points to a buffer that accommodates a complex vector of diagonal elements. The vector length is identical to the matrix dimension, ( \(2^\text{numTargets}\) ). The memory layout of the diagonal elements is identical for row- and column-major layouts. Thus, the memory layout specification by thelayoutargument is ignored.The matrix type is anti-diagonal if CUSTATEVEC_EX_MATRIX_ANTI_DIAGONAL is specified. The
matrixargument points to a buffer that accommodates a complex vector of anti-diagonal elements. The vector length is identical to the matrix dimension, ( \(2^\text{numTargets}\) ). The memory layout is specified by thelayoutargument expressed as shown below,elements[idx] = matrix(idx, dim - (idx + 1)) // row-major layout elements[idx] = matrix(dim - (idx + 1), idx) // col-major layout
by using elements[idx] for the idx-th anti-diagonal element and matrix(row, col) for the matrix element at (row, col).
The
targetsandcontrolsarguments specify target and control wires in the state vector. ThecontrolBitValuesargument specifies bit values of control wires. The ordering ofcontrolBitValuesis specified by thecontrolsargument. If a null pointer is specified to this argument, all control bit values are set to 1.By definition, all target and control values should be in [0, numWires). Wires in
targetsandcontrolsarguments should not overlap or have duplicates. This function returns CUSTATEVEC_STATUS_INVALID_WIRE if the above requirements are not satisfied.For distributed state vector configurations, the
numTargetsargument should be equal to or less thannumDeviceWireswhenexMatrixTypeis CUSTATEVEC_EX_MATRIX_DENSE or CUSTATEVEC_EX_MATRIX_ANTI_DIAGONAL. If a larger matrix is passed, this function returns CUSTATEVEC_STATUS_INVALID_VALUE.- Parameters:
stateVector – [inout] state vector instance
matrix – [in] pointer to a buffer that holds matrix elements
matrixDataType – [in] data type of matrix
exMatrixType – [in] enumerator specifying the matrix type and layout
layout – [in] enumerator specifying the matrix layout
adjoint – [in] apply adjoint of matrix
targets – [in] pointer to a host array of target wires
numTargets – [in] the number of target wires
controls – [in] pointer to a host array of control wires
controlBitValues – [in] pointer to a host array of control bit values
numControls – [in] the number of control wires
custatevecExApplyPauliRotation#
- custatevecStatus_t custatevecExApplyPauliRotation(
- custatevecExStateVectorDescriptor_t stateVector,
- double theta,
- const custatevecPauli_t *paulis,
- const int32_t *targets,
- int32_t numTargets,
- const int32_t *controls,
- const int32_t *controlBitValues,
- int32_t numControls
Apply the exponential of a multi-qubit Pauli operator.
Apply exponential of a tensor product of Pauli operators, \( e^{i \theta P} \), where \(P\) is the tensor product \(P = paulis[0] \otimes paulis[1] \otimes \cdots \otimes paulis[numTargets-1]\) acting on the wires specified by the
targetsargument. ThepaulisandnumTargetsarguments specify the Pauli operators and their count.At least one target and a corresponding Pauli basis should be specified.
The
controlsandnumControlsarguments specify the control wires in the state vector.By definition, all target and control values should be in [0, numWires). The
targetsandcontrolsarguments should not contain overlapping or duplicate wires. This function returns CUSTATEVEC_STATUS_INVALID_WIRE if the above requirements are not satisfied.The
controlBitValuesargument specifies bit values of control wires. The ordering ofcontrolBitValuesis specified by thecontrolsargument. If a null pointer is specified to this argument, all control bit values are set to 1.- Parameters:
stateVector – [inout] state vector instance
theta – [in] theta
paulis – [in] host pointer to custatevecPauli_t array
targets – [in] pointer to a host array of target wires
numTargets – [in] the number of target wires
controls – [in] pointer to a host array of control wires
controlBitValues – [in] pointer to a host array of control bit values
numControls – [in] the number of control wires
custatevecExComputeExpectationOnPauliBasis#
- custatevecStatus_t custatevecExComputeExpectationOnPauliBasis(
- custatevecExStateVectorDescriptor_t stateVector,
- double *expectationValues,
- const custatevecPauli_t **pauliOperatorArrays,
- int32_t numPauliOperatorArrays,
- const int32_t **basisWiresArray,
- const int32_t *numBasisWiresArray
Compute expectation values for a batch of (multi-qubit) Pauli operators.
This function computes multiple expectation values for given sequences of Pauli operators by a single call.
A single Pauli operator sequence, pauliOperators, is represented by using an array of custatevecPauli_t. The basis wires on which these Pauli operators are acting are represented by an array of wires.
The length of pauliOperators and basisWires are the same and specified by numBasisWires.
The number of Pauli operator sequences is specified by the
numPauliOperatorArraysargument.Multiple sequences of Pauli operators are represented in the form of arrays of arrays in the following manners:
The
pauliOperatorArraysargument is an array of arrays of custatevecPauli_t.The
basisWiresArrayis an array of the wire arrays.The
numBasisWiresArrayargument holds an array of the length of Pauli operator sequences and wire arrays.
By definition, all wires in each array of
basisWiresArrayarguments should be in [0, numWires). Each array inbasisWiresArrayshould not contain duplicate wires. This function returns CUSTATEVEC_STATUS_INVALID_WIRE if the above requirements are not satisfied.Computed expectation values are stored in a host buffer specified by the
expectationValuesargument of lengthnumPauliOperatorArrays.This function returns CUSTATEVEC_STATUS_INVALID_VALUE if wires specified for a Pauli operator sequence has duplicates and/or points to a wire that does not exist.
This function accepts empty Pauli operator sequence to get the norm of the state vector.
- Parameters:
stateVector – [in] state vector instance
expectationValues – [out] pointer to a host array to store expectation values
pauliOperatorArrays – [in] pointer to a host array of Pauli operator arrays
numPauliOperatorArrays – [in] the number of Pauli operator arrays
basisWiresArray – [in] host array of basis wire arrays
numBasisWiresArray – [in] host array of the number of basis wires
SVUpdater API#
custatevecExConfigureSVUpdater#
- custatevecStatus_t custatevecExConfigureSVUpdater(
- custatevecExDictionaryDescriptor_t *svUpdaterConfig,
- cudaDataType_t dataType,
- const custatevecExSVUpdaterConfigItem_t *configItems,
- int32_t numConfigItems
Create SVUpdater configuration.
This function creates the configuration dictionary for SVUpdater. The
dataTypeargument specifies the value type internally used in SVUpdater. Its value should be CUDA_C_32F or CUDA_C_64F. TheconfigItemsandnumConfigItemsarguments are the array ofcustatevecExSVUpdaterConfigItem_t.The following configuration items are available:
CUSTATEVEC_EX_SVUPDATER_CONFIG_MAX_NUM_HOST_THREADS
Type: int32_t, acceptable range: [1, 32]
Description: The maximum number of host threads utilized during the call to custatevecExSVUpdaterApply().
CUSTATEVEC_EX_SVUPDATER_CONFIG_DENSE_FUSION_SIZE
Type: int32_t, acceptable range: [1, 10]
Description: The maximum number of targets for fused dense gate matrices.
CUSTATEVEC_EX_SVUPDATER_CONFIG_DIAGONAL_FUSION_SIZE
Type: int32_t, acceptable range: [0, 20]
Description: The maximum number of targets for fused diagonal gate matrices. If 0 is specified, diagonal gate matrices are not fused.
The
configItemsshould not contain duplicate items with the same name. For configurations that are not specified, system default values will be used.The
configItemscan be a null pointer. In that case,numConfigItemsshould be set to 0.It’s the user’s responsibility to destroy the created svUpdaterConfig object by calling custatevecExDictionaryDestroy().
Note
Default configuration is recommended for optimal performance. For CUSTATEVEC_EX_SVUPDATER_CONFIG_MAX_NUM_HOST_THREADS, values between 4 and 16 generally provide good performance. For CUSTATEVEC_EX_SVUPDATER_CONFIG_DENSE_FUSION_SIZE, values between 4 and 6 are automatically selected by the library. These values correspond to the upper limit where gate application kernels remain memory-bound rather than compute-bound, ensuring optimal performance. For CUSTATEVEC_EX_SVUPDATER_CONFIG_DIAGONAL_FUSION_SIZE, the library default is recommended. The value is automatically adjusted based on the number of qubits in the state vector.
- Parameters:
svUpdaterConfig – [out] SVUpdater configuration
dataType – [in] data type used in SVUpdater
configItems – [in] host pointer to the array of Configuration items
numConfigItems – [in] the number of configuration items
custatevecExSVUpdaterCreate#
- custatevecStatus_t custatevecExSVUpdaterCreate(
- custatevecExSVUpdaterDescriptor_t *svUpdater,
- const custatevecExDictionaryDescriptor_t svUpdaterConfig,
- custatevecExResourceManagerDescriptor_t resourceManager
Create SVUpdater.
This function creates SVUpdater instance.
Note
Custom resource manager is not enabled in this release. Please pass nullptr to the
resourceManagerargument. Otherwise, this function returns CUSTATEVEC_STATUS_INVALID_VALUE.- Parameters:
svUpdater – [out] created SVUpdater
svUpdaterConfig – [in] SVUpdater configuration
resourceManager – [in] resource manager
custatevecExSVUpdaterDestroy#
- custatevecStatus_t custatevecExSVUpdaterDestroy(
- custatevecExSVUpdaterDescriptor_t svUpdater
Destroy SVUpdater instance.
This function destroys SVUpdater instance.
- Parameters:
svUpdater – [in] SVUpdater instance
custatevecExSVUpdaterClear#
- custatevecStatus_t custatevecExSVUpdaterClear(
- custatevecExSVUpdaterDescriptor_t svUpdater
Clear operations queued in SVUpdater.
This function clears queued operations in SVUpdater.
- Parameters:
svUpdater – [inout] SVUpdater instance
custatevecExSVUpdaterEnqueueMatrix#
- custatevecStatus_t custatevecExSVUpdaterEnqueueMatrix(
- custatevecExSVUpdaterDescriptor_t svUpdater,
- const void *matrix,
- cudaDataType_t matrixDataType,
- custatevecExMatrixType_t exMatrixType,
- custatevecMatrixLayout_t layout,
- int32_t adjoint,
- const int32_t *targets,
- int32_t numTargets,
- const int32_t *controls,
- const int32_t *controlBitValues,
- int32_t numControls
Enqueue matrix to SVUpdater.
This function enqueues a gate matrix to SVUpdater.
The
matrixargument is a host pointer that points to matrix element array. ThematrixDataTypeargument specifies the data type of matrix elements. The value should be CUDA_C_32F or CUDA_C_64F for complex 64 or complex 128 type, respectively. Only CUDA_C_64F is acceptable if the SVUpdater data type is CUDA_C_64F by calling custatevecExConfigureSVUpdater(). Also, CUDA_C_64F and CUDA_C_32F are acceptable if the SVUpdater data type is CUDA_C_32F.The
exMatrixTypeargument specifies the matrix types, dense (square), diagonal or anti-diagonal. The memory layout of the matrix is row- or column-major, which is specified by thelayoutargument.The matrix type is dense (square) if CUSTATEVEC_EX_MATRIX_DENSE is specified. The
matrixargument points to a buffer that holds dense matrix as two-dimensional array. The matrix dimension is ( \(2^\text{numTargets} \times 2^\text{numTargets}\) ). The memory layout follows the specification of thelayoutargument. The layout is row-major if CUSTATEVEC_MATRIX_LAYOUT_ROW, or column-major if CUSTATEVEC_MATRIX_LAYOUT_COL is specified.The matrix type is diagonal if CUSTATEVEC_EX_MATRIX_DIAGONAL is specified. The
matrixargument points to a buffer that accommodates a complex vector of diagonal elements. The vector length is identical to the matrix dimension, ( \(2^\text{numTargets}\) ). The memory layout of the diagonal elements is identical for row- and column-major layouts. Thus, the memory layout specification by thelayoutargument is ignored.The matrix type is anti-diagonal if CUSTATEVEC_EX_MATRIX_ANTI_DIAGONAL is specified. The
matrixargument points to a buffer that accommodates a complex vector of anti-diagonal elements. The vector length is identical to the matrix dimension, ( \(2^\text{numTargets}\) ). The memory layout is specified by thelayoutargument expressed as shown below,elements[idx] = matrix(idx, dim - (idx + 1)) // row-major layout elements[idx] = matrix(dim - (idx + 1), idx) // col-major layout
by using elements[idx] for the idx-th anti-diagonal element and matrix(row, col) for the matrix element at (row, col).
The
targetsandcontrolsarguments specify target and control wires in the state vector. ThecontrolBitValuesargument specifies bit values of control wires. The ordering ofcontrolBitValuesis specified by thecontrolsargument. If a null pointer is specified to this argument, all control bit values are set to 1.The max number of targets is limited to 15 if CUSTATEVEC_EX_MATRIX_DENSE or CUSTATEVEC_EX_MATRIX_ANTI_DIAGONAL specified, and 30 if CUSTATEVEC_EX_MATRIX_DIAGONAL, specified, respectively. However, 10 or more targets is not recommended; the recommended number of targets for typical usage is 6 or fewer.
By definition, all target and control values should be in [0, numWires). The
targetsandcontrolsarguments should not contain overlapping wires.For distributed state vector configurations, the total number of wires involved (
numTargets+numControls) should not exceednumDeviceWiresor the number of local wires on each device. Note that this constraint is not validated by custatevecExSVUpdaterEnqueueMatrix(). The validation occurs later during the call to custatevecExSVUpdaterApply() where the StateVector instance is provided, and CUSTATEVEC_STATUS_INVALID_VALUE is returned if the constraint is violated.- Parameters:
svUpdater – [inout] SVUpdater instance
matrix – [in] pointer to a host buffer that holds matrix elements
matrixDataType – [in] data type of matrix
exMatrixType – [in] enumerator specifying the matrix type
layout – [in] enumerator specifying the matrix layout
adjoint – [in] apply adjoint of matrix
targets – [in] pointer to a host array of target wires
numTargets – [in] the number of target wires
controls – [in] pointer to a host array of control wires
controlBitValues – [in] pointer to a host array of control bit values
numControls – [in] the number of control wires
custatevecExSVUpdaterEnqueueUnitaryChannel#
- custatevecStatus_t custatevecExSVUpdaterEnqueueUnitaryChannel(
- custatevecExSVUpdaterDescriptor_t svUpdater,
- const void *const *unitaries,
- cudaDataType_t unitariesDataType,
- const custatevecExMatrixType_t *exMatrixTypes,
- int32_t numUnitaries,
- custatevecMatrixLayout_t layout,
- const double *probabilities,
- const int32_t *channelWires,
- int32_t numChannelWires
Enqueue mixed unitary channel.
This function enqueues a mixed unitary channel to SVUpdater.
The
unitariesargument is a host pointer to an array of unitary matrices. Each array element in the unitaries argument points to an array of matrix elements that represents a unitary matrix. The actual data type of matrix elements is specified by theunitariesDataTypeargument. TheexMatrixTypesis a host array that specifies the matrix types of unitary matrices. The matrix types specified byexMatrixTypesdetermine the lengths of matrix element arrays specified by theunitariesargument. Thelayoutargument specifies the matrix layout in the same way as described in custatevecExSVUpdaterEnqueueMatrix().The
probabilitiesargument is a host array that represents the probabilities associated with the specified unitary matrices to be randomly sampled. The array length isnumUnitaries. The total sum of all probabilities must be less than or equal to 1.0. When the sum is less than 1.0, the remaining probability corresponds to no transformation, implementing a probabilistic quantum channel where the state vector remains unchanged with probability (1 - sum_of_probabilities). The probabilities are used as-is for sampling without normalization in this API.If any probability value in the
probabilitiesarray is zero or negative, CUSTATEVEC_STATUS_INVALID_VALUE is returned.Only CUDA_C_64F is acceptable if the SVUpdater data type is CUDA_C_64F by calling custatevecExConfigureSVUpdater(). Also, CUDA_C_64F and CUDA_C_32F are acceptable if the SVUpdater data type is CUDA_C_32F.
The
channelWiresandnumChannelWiresarguments specify the wires on which the sampled unitary matrix is to be applied. The max number of wires are limited to 15 if CUSTATEVEC_EX_MATRIX_DENSE or CUSTATEVEC_EX_MATRIX_ANTI_DIAGONAL specified, and 30 if CUSTATEVEC_EX_MATRIX_DIAGONAL, specified, respectively. However, 10 or more wires is not recommended; the recommended number of wires for typical usage is 6 or fewer.For distributed state vector configurations, the
numChannelWiresargument should be equal to or less thannumDeviceWires, or the number of local wires on each device. Note that this constraint is not validated by custatevecExSVUpdaterEnqueueUnitaryChannel(). The validation occurs later during the call to custatevecExSVUpdaterApply() where the StateVector instance is provided, and CUSTATEVEC_STATUS_INVALID_VALUE is returned if the constraint is violated.By definition, all wires in
channelWiresargument should be in [0,numWires). custatevecExSVUpdaterApply() returns CUSTATEVEC_STATUS_INVALID_WIRE if the above requirements are not satisfied.- Parameters:
svUpdater – [inout] SVUpdater instance
unitaries – [in] host pointer to an array of unitary matrix elements
unitariesDataType – [in] dataType of the specified unitary matrices
exMatrixTypes – [in] matrix types of the specified unitary matrices
numUnitaries – [in] the number of matrices.
layout – [in] layout of the specified unitary matrices
probabilities – [in] host array that holds the probabilities
channelWires – [in] wires that sampled unitary channel is applied
numChannelWires – [in] the number of wires
custatevecExSVUpdaterEnqueueGeneralChannel#
- custatevecStatus_t custatevecExSVUpdaterEnqueueGeneralChannel(
- custatevecExSVUpdaterDescriptor_t svUpdater,
- const void *const *matrices,
- cudaDataType_t matrixDataType,
- const custatevecExMatrixType_t *exMatrixTypes,
- int32_t numMatrices,
- custatevecMatrixLayout_t layout,
- const int32_t *channelWires,
- int32_t numChannelWires
Enqueue general channel.
This function enqueues a general channel to SVUpdater.
The
matricesargument is a host pointer to an array of matrices that are assumed to be Kraus operators defining the general quantum channel. Each array element in the matrices argument points to an array of matrix elements that represents a matrix. The actual data type of matrix elements is specified by thematrixDataTypeargument. ThenumMatricesargument specifies the number of matrices. TheexMatrixTypesis a host array that specifies the matrix types of the matrices. The matrix types specified byexMatrixTypesdetermine the lengths of matrix element arrays specified by thematricesargument. Thelayoutargument specifies the matrix layout in the same way as described in custatevecExSVUpdaterEnqueueMatrix(). If a general channel is queued to SVUpdater, state vector will be normalized, and its norm will be 1 after the call of custatevecExSVUpdaterApply().Only CUDA_C_64F is acceptable if the SVUpdater data type is CUDA_C_64F by calling custatevecExConfigureSVUpdater(). Also, CUDA_C_64F and CUDA_C_32F are acceptable if the SVUpdater data type is CUDA_C_32F.
The
channelWiresandnumChannelWiresarguments specify the wires on which the general channel is to be applied. The max number of wires is limited to 15 for all matrix types. However, 10 or more wires is not recommended; the recommended number of wires for typical usage is 6 or fewer.For distributed state vector configurations, the
numChannelWiresargument should be equal to or less thannumDeviceWires, or the number of local wires on each device. Note that this constraint is not validated by custatevecExSVUpdaterEnqueueGeneralChannel(). The validation occurs later during the call to custatevecExSVUpdaterApply() where the StateVector instance is provided, and CUSTATEVEC_STATUS_INVALID_VALUE is returned if the constraint is violated.By definition, all wires in
channelWiresargument should be in [0,numWires). custatevecExSVUpdaterApply() returns CUSTATEVEC_STATUS_INVALID_WIRE if the above requirements are not satisfied.It is the user’s responsibility to provide a complete set of Kraus operators. The completeness of Kraus operators is validated immediately during custatevecExSVUpdaterEnqueueGeneralChannel() by computing the minimum expectation value using eigenvalues of the Kraus operators. If an incomplete set of Kraus operators is provided, custatevecExSVUpdaterEnqueueGeneralChannel() returns CUSTATEVEC_STATUS_NUMERICAL_ERROR.
- Parameters:
svUpdater – [inout] SVUpdater instance
matrices – [in] host pointer to an array of matrix elements
matrixDataType – [in] dataType of the matrices
exMatrixTypes – [in] matrix types of the specified matrices
numMatrices – [in] the number of matrices.
layout – [in] layout of the matrices
channelWires – [in] wires that the general channel is applied
numChannelWires – [in] the number of wires
custatevecExSVUpdaterGetMaxNumRequiredRandnums#
- custatevecStatus_t custatevecExSVUpdaterGetMaxNumRequiredRandnums(
- custatevecExSVUpdaterDescriptor_t svUpdater,
- int32_t *maxNumRequiredRandnums
Get the max number of required random numbers.
Get the max required number of random numbers to call custatevecExSVUpdaterApply().
- Parameters:
svUpdater – [in] SVUpdater instance
maxNumRequiredRandnums – [out] the max required number of random numbers.
custatevecExSVUpdaterApply#
- custatevecStatus_t custatevecExSVUpdaterApply(
- custatevecExSVUpdaterDescriptor_t svUpdater,
- custatevecExStateVectorDescriptor_t stateVector,
- const double *randnums,
- int32_t numRandnums
Apply queued operations.
Apply operations queued in SVUpdater to the specified state vector.
The data type of state vector should be identical to that of SVUpdater specified by calling custatevecExConfigureSVUpdater(). Otherwise, this API returns CUSTATEVEC_STATUS_INVALID_VALUE. The
randnumsandnumRandnumsarguments pass the array of random numbers that is utilized to apply noise channels. Each random number in therandnumsarray must be in the range [0, 1). The number of required random numbers is retrieved by calling custatevecExSVUpdaterGetMaxNumRequiredRandnums().It’s a user’s responsibility to generate the required number of random numbers in the valid range. If a given random number is not in the range, the value is clipped.
This function performs validation of queued operations against the provided state vector and may return the following error codes:
CUSTATEVEC_STATUS_INVALID_WIRE: This error is returned if any queued operation references wire indices that do not exist in the provided state vector. All wires from enqueued operations (targets, controls, and channelWires) are validated against the wire range [0, numWires) of the specified state vector instance during the call to this API.
CUSTATEVEC_STATUS_INVALID_VALUE: For distributed state vector configurations, this error is returned if any queued matrix operation has a total number of wires (targets + controls for matrices, or channelWires for channels) that exceeds the number of local wires on each device as described in documentation of custatevecExSVUpdaterEnqueueMatrix(), custatevecExSVUpdaterEnqueueUnitaryChannel(), and custatevecExSVUpdaterEnqueueGeneralChannel(). This check is performed here when the StateVector instance is available.
CUSTATEVEC_STATUS_NUMERICAL_ERROR: This error can occur in two scenarios: (1) When the state vector norm becomes almost close to zero during the application of operations, making normalization impossible. (2) For general channels, if the completeness of Kraus operators is violated as determined by computing expectation values during the application process.
- Parameters:
svUpdater – [inout] SVUpdater instance
stateVector – [inout] state vector instance
randnums – [in] a host pointer to an array of random numbers in the range [0, 1)
numRandnums – [in] the number of random numbers