cuSOLVERMp C API#
Library Management#
cusolverMpCreate
#
cusolverStatus_t cusolverMpCreate(
cusolverMpHandle_t *handle,
int device,
cudaStream_t stream)
device
and the CUDA stream stream
.stream
cannot be the default (NULL
or 0
) stream.Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
Out |
cuSOLVERMp library handle |
device |
Host |
In |
Device that will be assigned to the handle. |
stream |
Host |
In |
Stream that will be assigned to the handle. |
cusolverMpDestroy
#
cusolverStatus_t cusolverMpDestroy(
cusolverMpHandle_t handle)
device
. Only one handle per process and per GPU supported.Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In/Out |
cuSOLVERMp library handle |
cusolverMpGetStream
#
cusolverStatus_t cusolverMpGetStream(
cusolverMpHandle_t handle,
cudaStream_t *stream)
stream
associated to the handle.Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle |
stream |
Host |
Out |
Stream associated with the handle. |
cusolverMpGetVersion
#
cusolverStatus_t cusolverMpGetVersion(
cusolverMpHandle_t handle,
int *version)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle |
version |
Host |
Out |
cuSOLVERMp library version. Value is |
Grid Management#
cusolverMpCreateDeviceGrid
#
cusolverStatus_t cusolverMpCreateDeviceGrid(
cusolverMpHandle_t handle,
cusolverMpGrid_t *grid,
cal_comm_t comm,
int32_t numRowDevices,
int32_t numColDevices,
cusolverMpGridMapping_t mapping)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle |
grid |
Host |
Out |
Grid object to be initialized. |
comm |
Host |
In |
Communicator that will be associated with the grid. |
numRowDevices |
Host |
In |
How many of process rows the grid will contain. |
numColDevices |
Host |
In |
How many of process columns the grid will contain. |
mapping |
Host |
In |
How to map processes to the grid. See description of cusolverMpGrid_t for further details. Currently, only |
cusolverMpDestroyGrid
#
cusolverStatus_t cusolverMpDestroyGrid(
cusolverMpGrid_t grid)
grid
object.Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
grid |
Host |
In/Out |
Grid object to be destroyed. |
Matrix Management#
cusolverMpCreateMatrixDesc
#
cusolverStatus_t cusolverMpCreateMatrixDesc(
cusolverMpMatrixDescriptor_t *descr,
cusolverMpGrid_t grid,
cudaDataType dataType,
int64_t M_A,
int64_t N_A,
int64_t MB_A,
int64_t NB_A,
uint32_t RSRC_A,
uint32_t CSRC_A,
int64_t LLD_A)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
descr |
Host |
Out |
Matrix descriptor object initialized by this function. |
dataType |
Host |
In |
Data type of the matrix A. |
M_A |
Host |
In |
Number of rows in the global array A. |
N_A |
Host |
In |
Number of columns in the global matrix A. |
MB_A |
Host |
In |
Blocking factor used to distribute the rows of the global matrix A. |
NB_A |
Host |
In |
Blocking factor used to distribute the columns of the global matrix A. |
RSRC_A |
Host |
In |
Process row over which the first row of the matrix A is distributed. Only the value of |
CSRC_A |
Host |
In |
Process column over which the first row of the matrix A is distributed. Only the value of |
LLD_A |
Host |
In |
Leading dimension of the local matrix. |
dataType
argument are listed.Data Type of A |
Description |
---|---|
CUDA_R_32I |
32-bit integer values. |
CUDA_R_64I |
64-bit integer values. |
CUDA_R_32F |
Single precision real values. |
CUDA_R_64F |
Double precision real values. |
CUDA_C_32F |
Single precision complex values. |
CUDA_C_64F |
Double precision complex values. |
cusolverMpDestroyMatrixDesc
#
cusolverStatus_t cusolverMpDestroyMatrixDesc(
cusolverMpMatrixDescriptor_t descr )
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
descr |
Host |
In/Out |
Matrix descriptor object destroyed by this function. |
Utility#
cusolverMpNUMROC
#
int64_t cusolverMpNUMROC(
int64_t n,
int64_t nb,
uint32_t iproc,
uint32_t isrcproc,
uint32_t nprocs)
iproc
argument.Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
n |
Host |
In |
Number of rows or columns in the global distributed matrix. |
nb |
Host |
In |
Row or column blocking size of the global matrix. |
iproc |
Host |
In |
The coordinate of the process whole local array row or column is to be determined. |
isrcproc |
Host |
In |
The coordinate of the process that owns the first row or column of the distributed matrix. |
nprocs |
Host |
In |
The total number of row or column processes over which the matrix is distributed. |
iproc
argument.cusolverMpMatrixGatherD2H
#
cusolverStatus_t cusolverMpMatrixGatherD2H(
cusolverMpHandle_t handle,
int64_t M,
int64_t N,
void *d_A,
int64_t IA,
int64_t JA,
cusolverMpMatrixDescriptor_t descrA,
int root,
void *h_dst,
int64_t h_lddst)
A
on a buffer provided on process root
. The input matrix A
is originally distributed using 2D block cyclic format, on output h_dst
contains the matrix in column-major format.Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
M |
Host |
In |
Number of rows of the global distributed matrix A. |
M |
Host |
In |
Number of columns of the global distributed matrix A. |
d_A |
Device |
In |
Number of columns of the global distributed matrix A. |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index in the global matrix A indicating the first row of sub(A). This function does not make any assumptions on the alignment of |
JA |
Host |
In |
Column index in the global matrix A indicating the first column of sub(A). This function does not make any assumptions on the alignment of |
descrA |
Host |
In |
Matrix descriptor of the global matrix A. |
root |
Host |
In |
Process ID on which the matrix A will be gathered. |
h_dst |
Host |
Out |
Destination host buffer on |
h_lddst |
Host |
In |
Leading dimension of the |
Warning
This is function is meant as an utility function to verify correctness of the data layouts and it is not intended to achieve high performance on large inputs.
cusolverMpMatrixScatterH2D
#
cusolverStatus_t cusolverMpMatrixScatterH2D(
cusolverMpHandle_t handle,
int64_t M,
int64_t N,
void *d_A,
int64_t IA,
int64_t JA,
cusolverMpMatrixDescriptor_t descrA,
int root,
void *h_src,
int64_t h_ldsrc)
h_src
from root
process to a distributed global matrix A
.h_src
is stored in column-major format. On output, d_A
contains the local portions of the global matrix A
distributed in 2D block cyclic format.Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
M |
Host |
In |
Number of rows of the global distributed matrix A. |
M |
Host |
In |
Number of columns of the global distributed matrix A. |
d_A |
Device |
Out |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index in the global matrix A indicating the first row of sub(A). This function does not make any assumptions on the alignment of |
JA |
Host |
In |
Column index in the global matrix A indicating the first column of sub(A). This function does not make any assumptions on the alignment of |
descrA |
Host |
In |
Matrix descriptor of the global matrix A. |
root |
Host |
In |
Blocking factor used to distribute the columns of the global matrix A. |
h_src |
Host |
In |
Source buffer on |
h_ldsrc |
Host |
In |
Leading dimension of the |
Warning
This is function is meant as an utility function to verify correctness of the data layouts and it is not intended to achieve high performance on large inputs.
Logging#
cusolverMpLoggerSetCallback
#
cusolverStatus_t cusolverMpLoggerSetCallback(
cusolverMpLoggerCallback_t callback)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
callback |
Host |
In |
Pointer to a callback function. See |
Warning
This is an experimental feature.
cusolverMpLoggerSetFile
#
cusolverStatus_t cusolverMpLoggerSetFile(
FILE *file)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
file |
Host |
In |
Pointer to an open file. File should have write permission |
Warning
This is an experimental feature.
cusolverMpLoggerOpenFile
#
cusolverStatus_t cusolverMpLoggerOpenFile(
const char* logFile)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
logFile |
Host |
In |
Path of the logging output file. |
Warning
This is an experimental feature.
cusolverMpLoggerSetLevel
#
cusolverStatus_t cusolverMpLoggerSetLevel(
int level)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
level |
Host |
In |
Value of the logging level. See |
Warning
This is an experimental feature.
cusolverMpLoggerSetMask
#
cusolverStatus_t cusolverMpLoggerSetMask(
int mask)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
mask |
Host |
In |
Value of the logging mask. See |
Warning
This is an experimental feature.
cusolverMpLoggerForceDisable
#
cusolverStatus_t cusolverMpLoggerForceDisable(
int level)
Warning
This is an experimental feature.
Dense Linear Algebra APIs#
cusolverMpGetrf
#
cusolverStatus_t cusolverMpGetrf(
cusolverMpHandle_t handle,
int64_t M,
int64_t N,
void *d_A,
int64_t IA,
int64_t JA,
cusolverMpMatrixDescriptor_t descrA,
int64_t *d_ipiv,
cudaDataType_t computeType,
void *d_work,
size_t workspaceInBytesOnDevice,
void *h_work,
size_t workspaceInBytesOnHost,
int *info)
d_ipiv=NULL
.Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
M |
Host |
In |
Number of rows of sub(A). |
N |
Host |
In |
Number of columns of sub(A). |
d_A |
Device |
In/Out |
Pointer to the first entry of the local portion of the global matrix A. On output, the sub(A) is overwritten with the L and U factors. |
IA |
Host |
In |
Row index of the first row of the sub(A). This function does not make any assumptions on the alignment of |
JA |
Host |
In |
Column index of the first column of the sub(A). This function does not make any assumptions on the alignment of |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A |
d_ipiv |
Device |
Out |
Local array of dimension |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
d_work |
Host |
Out |
Device workspace of size |
workspaceInBytesOnDevice |
Host |
In |
The size in bytes of the local device workspace needed by the routine as provided by cusolverMpGetrf_bufferSize(). |
h_work |
Host |
Out |
Host workspace of size |
workspaceInBytesOnHost |
Host |
In |
The size in bytes of the local host workspace needed by the routine as provided by cusolverMpGetrf_bufferSize() |
info |
Device |
Out |
|
Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpGetrf_bufferSize
#
cusolverStatus_t cusolverMpGetrf_bufferSize(
cusolverMpHandle_t handle,
int64_t M,
int64_t N,
void *d_A,
int64_t IA,
int64_t JA,
cusolverMpMatrixDescriptor_t descrA,
int64_t *d_ipiv,
cudaDataType_t computeType,
size_t *workspaceInBytesOnDevice,
size_t *workspaceInBytesOnHost)
d_ipiv=NULL
so cusolverMpGetrf() will compute the LU factorization of the input matrix A without pivoting.Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
M |
Host |
In |
Number of rows of sub(A). |
N |
Host |
In |
Number of columns of sub(A). |
d_A |
Device |
In |
Pointer to the first entry of the local portion of the global matrix A. |
IA |
Host |
In |
Row index of the first row of the sub(A). This function does not make any assumptions on the alignment of |
JA |
Host |
In |
Column index of the first column of the sub(A). This function does not make any assumptions on the alignment of |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A |
d_ipiv |
Device |
In |
Indicates a pointer to a distributed integer array. When it is not |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
workspaceInBytesOnDevice |
Host |
Out |
On output, contains the size in bytes of the local device workspace needed by cusolverMpGetrf(). |
workspaceInBytesOnHost |
Host |
Out |
On output, contains the size in bytes of the local host workspace needed by cusolverMpGetrf(). |
Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpGetrs
#
cusolverStatus_t cusolverMpGetrs(
cusolverMpHandle_t handle,
cublasOperation_t trans,
int64_t N,
int64_t NRHS,
const void *d_A,
int64_t IA,
int64_t JA,
cusolverMpMatrixDescriptor_t descrA,
const int64_t *d_ipiv,
void *d_B,
int64_t IB,
int64_t JB,
cusolverMpMatrixDescriptor_t descrB,
cudaDataType_t computeType,
void *d_work,
size_t workspaceInBytesOnDevice,
void *h_work,
size_t workspaceInBytesOnHost,
int *d_info)
trans
, which allows to solve linear systems of the form:trans |
Form of the linear system |
---|---|
CUBLAS_OP_N |
\(sub(A) \cdot X = sub(B)\) |
CUBLAS_OP_T |
\(sub(A)^T \cdot X = sub(B)\) |
CUBLAS_OP_C |
\(sub(A)^H \cdot X = sub(B)\) |
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
trans |
Host |
In |
Specifies the form of the linear system. Only |
N |
Host |
In |
Number of rows of sub(A). |
NRHS |
Host |
In |
Number of columns of sub(B). Currently, this routine only supports |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). This function does not make any assumptions on the alignment of |
JA |
Host |
In |
Column index of the first column of the sub(A). This function does not make any assumptions on the alignment of |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A |
d_ipiv |
Device |
In |
Local array of dimension |
d_B |
Device |
In/Out |
Pointer into the local memory to an array of dimension |
IB |
Host |
In |
Row index of the first row of the sub(B). This function does not make any assumptions on the alignment of |
JB |
Host |
In |
Column index of the first column of the sub(B). This function does not make any assumptions on the alignment of |
descrB |
Host |
In |
Matrix descriptor associated to the global matrix B. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
d_work |
Device |
Out |
Host workspace of size |
workspaceInBytesOnDevice |
Host |
In |
The size in bytes of the local device workspace needed by the routine as provided by cusolverMpGetrs_bufferSize(). |
h_work |
Host |
Out |
Host workspace of size |
workspaceInBytesOnHost |
Host |
In |
The size in bytes of the local host workspace needed by the routine as provided by cusolverMpGetrs_bufferSize() |
info |
Device |
Out |
|
Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpGetrs_bufferSize
#
cusolverStatus_t cusolverMpGetrs_bufferSize(
cusolverMpHandle_t handle,
cublasOperation_t trans,
int64_t N,
int64_t NRHS,
const void *d_A,
int64_t IA,
int64_t JA,
cusolverMpMatrixDescriptor_t descrA,
const int64_t *d_ipiv,
void *d_B,
int64_t IB,
int64_t JB,
cusolverMpMatrixDescriptor_t descrB,
cudaDataType_t computeType,
size_t *workspaceInBytesOnDevice,
size_t *workspaceInBytesOnHost)
d_ipiv=NULL
.Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
trans |
Host |
In |
Specifies the form of the linear system. Only |
N |
Host |
In |
Number of rows of sub(A). |
NRHS |
Host |
In |
Number of columns of sub(B). Currently, this routine only supports |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). This function does not make any assumptions on the alignment of |
JA |
Host |
In |
Column index of the first column of the sub(A). This function does not make any assumptions on the alignment of |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A |
d_ipiv |
Device |
In |
Local array of dimension |
d_B |
Device |
In |
Pointer to the first entry of the local portion of the global matrix B. On output, B is overwritten the solution of the linear system. |
IB |
Host |
In |
Row index of the first row of the sub(B). This function does not make any assumptions on the alignment of |
JB |
Host |
In |
Column index of the first column of the sub(B). This function does not make any assumptions on the alignment of |
descrB |
Host |
In |
Matrix descriptor associated to the global matrix B. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
workspaceInBytesOnDevice |
Host |
Out |
On output, contains the size in bytes of the local device workspace needed by cusolverMpGetrs(). |
workspaceInBytesOnHost |
Host |
Out |
On output, contains the size in bytes of the local host workspace needed by cusolverMpGetrs(). |
Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpPotrf
#
cusolverStatus_t cusolverMpPotrf(
cusolverMpHandle_t handle,
cublasFillMode_t uplo,
int64_t N,
void *d_A,
int64_t IA,
int64_t JA,
cusolverMpMatrixDescriptor_t descrA,
cudaDataType_t computeType,
void *d_work,
size_t workspaceInBytesOnDevice,
void *h_work,
size_t workspaceInBytesOnHost,
int *info)
A(IA:IA+N-1, JA:JA+N-1)
.uplo=CUBLAS_FILL_MODE_UPPER
, the factorization has the formuplo
is set to CUBLAS_FILL_MODE_LOWER
, the factorization has the formParameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
uplo |
Host |
In |
Specifies if A is upper ( |
N |
Host |
In |
Number of rows and columns of sub(A). |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A).`JA` must be a multiple of the column blocking dimension |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
d_work |
Device |
Out |
Device workspace of size |
workspaceInBytesOnDevice |
Host |
In |
The size in bytes of the local device workspace needed by the routine as provided by cusolverMpPotrf_bufferSize(). |
h_work |
Host |
Out |
Host workspace of size |
workspaceInBytesOnHost |
Host |
In |
The size in bytes of the local host workspace needed by the routine as provided by cusolverMpPotrf_bufferSize() |
info |
Device |
Out |
|
(MB_A == NB_A)
.Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpPotrf_bufferSize
#
cusolverStatus_t cusolverMpPotrf_bufferSize(
cusolverMpHandle_t handle,
cublasFillMode_t uplo,
int64_t N,
const void *d_A,
int64_t IA,
int64_t JA,
cusolverMpMatrixDescriptor_t descrA,
cudaDataType_t computeType,
size_t* workspaceInBytesOnDevice,
size_t* workspaceInBytesOnHost)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
uplo |
Host |
In |
Specifies if A is upper ( |
N |
Host |
In |
Number of rows and columns of sub(A). |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). This function does not make any assumptions on the alignment of |
JA |
Host |
In |
Column index of the first column of the sub(A). This function does not make any assumptions on the alignment of |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
workspaceInBytesOnDevice |
Host |
Out |
On output, contains the size in bytes of the local device workspace needed by cusolverMpPotrf(). |
workspaceInBytesOnHost |
Host |
Out |
On output, contains the size in bytes of the local host workspace needed by cusolverMpPotrf(). |
Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpPotrs
#
cusolverStatus_t cusolverMpPotrs(
cusolverMpHandle_t handle,
cublasFillMode_t uplo,
int64_t N,
int64_t NRHS,
const void *d_A,
int64_t IA,
int64_t JA,
cusolverMpMatrixDescriptor_t descrA,
void *d_B,
int64_t IB,
int64_t JB,
cusolverMpMatrixDescriptor_t descB,
cudaDataType_t computeType,
void *d_work,
size_t workspaceInBytesOnDevice,
void *h_work,
size_t workspaceInBytesOnHost,
int *info)
A(IA:IA+N-1,JA:JA+N-1)
and is a N-by-N symmetric or hermitian positive definite distributed matrix using the Cholesky factorization:\[sub(A) = U^H \cdot U\]
B(IB:IB+N-1,JB:JB+NRHS-1)
.Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
uplo |
Host |
In |
Specifies if A is upper ( |
N |
Host |
In |
Number of rows and columns of sub(A). |
NRHS |
Host |
In |
Number of columns of sub(B). Currently, this routine only supports |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A). |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A |
d_B |
Device |
In/Out |
Pointer into the local memory to an array of dimension |
IB |
Host |
In |
Row index of the first row of the sub(B). This function does not make any assumptions on the alignment of |
JB |
Host |
In |
Column index of the first column of the sub(B). This function does not make any assumptions on the alignment of |
descrB |
Host |
In |
Matrix descriptor associated to the global matrix B. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
d_work |
Device |
Out |
Device workspace of size |
workspaceInBytesOnDevice |
Host |
In |
The size in bytes of the local device workspace needed by the routine as provided by cusolverMpPotrs_bufferSize(). |
h_work |
Host |
Out |
Host workspace of size |
workspaceInBytesOnHost |
Host |
In |
The size in bytes of the local host workspace needed by the routine as provided by cusolverMpPotrs_bufferSize() |
info |
Device |
Out |
|
(MB_A == NB_A)
and alignment of sub(A) and sub(B) matrices, meaning (MB_A == MB_B)
and (IA == IB)
.Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
—
cusolverMpPotrs_bufferSize
#
cusolverStatus_t cusolverMpPotrs_bufferSize(
cusolverMpHandle_t handle,
cublasFillMode_t uplo,
int64_t n,
int64_t nrhs,
const void *a,
int64_t ia,
int64_t ja,
cusolverMpMatrixDescriptor_t descrA,
const void *b,
int64_t ib,
int64_t jb,
cusolverMpMatrixDescriptor_t descB,
cudaDataType_t computeType,
size_t* workspaceInBytesOnDevice,
size_t* workspaceInBytesOnHost)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
uplo |
Host |
In |
Specifies if A is upper ( |
N |
Host |
In |
Number of rows and columns of sub(A). |
NRHS |
Host |
In |
Number of columns of sub(B). Currently, this routine only supports |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A). |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A |
d_B |
Device |
In |
Pointer into the local memory to an array of dimension |
IB |
Host |
In |
Row index of the first row of the sub(B). This function does not make any assumptions on the alignment of |
JB |
Host |
In |
Column index of the first column of the sub(B). This function does not make any assumptions on the alignment of |
descrB |
Host |
In |
Matrix descriptor associated to the global matrix B. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
workspaceInBytesOnDevice |
Host |
Out |
On output, contains the size in bytes of the local device workspace needed by cusolverMpPotrs(). |
workspaceInBytesOnHost |
Host |
Out |
On output, contains the size in bytes of the local host workspace needed by cusolverMpPotrs(). |
(MB_A == NB_A)
and alignment of sub(A) and sub(B) matrices, meaning (MB_A == MB_B)
and (IA == IB)
.Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
—
cusolverMpGeqrf
#
cusolverStatus_t cusolverMpGeqrf(
cusolverMpHandle_t handle,
int64_t M,
int64_t N,
void *d_A,
int64_t IA,
int64_t JA,
cusolverMpMatrixDescriptor_t descrA,
void *d_tau,
cudaDataType_t computeType,
void *d_work,
size_t workspaceInBytesOnDevice,
void *h_work,
size_t workspaceInBytesOnHost,
int *info)
A(IA:IA+M-1, JA:JA+N-1)
.tau
and R is upper triangular matrix.Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
M |
Host |
In |
Number of rows of sub(A). |
N |
Host |
In |
Number of columns of sub(A). |
d_A |
Device |
In/Out |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A). |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A. |
d_tau |
Device |
Out |
Pointer into the local memory to an array of dimension |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
d_work |
Device |
Out |
Device workspace of size |
workspaceInBytesOnDevice |
Host |
In |
The size in bytes of the local device workspace needed by the routine as provided by cusolverMpGeqrf_bufferSize(). |
h_work |
Host |
Out |
Host workspace of size |
workspaceInBytesOnHost |
Host |
In |
The size in bytes of the local host workspace needed by the routine as provided by cusolverMpGeqrf_bufferSize() |
info |
Device |
Out |
|
(MB_A == NB_A)
.Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpGeqrf_bufferSize
#
cusolverStatus_t cusolverMpGeqrf_bufferSize(
cusolverMpHandle_t handle,
int64_t M,
int64_t N,
const void *d_A,
int64_t IA,
int64_t JA,
cusolverMpMatrixDescriptor_t descrA,
cudaDataType_t computeType,
size_t* workspaceInBytesOnDevice,
size_t* workspaceInBytesOnHost)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
M |
Host |
In |
Number of rows of sub(A). |
N |
Host |
In |
Number of columns of sub(A). |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index in the global matrix A indicating the first row of sub(A). This function does not make any assumptions on the alignment of |
JA |
Host |
In |
Column index in the global matrix A indicating the first column of sub(A). This function does not make any assumptions on the alignment of |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
workspaceInBytesOnDevice |
Host |
Out |
On output, contains the size in bytes of the local device workspace needed by cusolverMpGeqrf(). |
workspaceInBytesOnHost |
Host |
Out |
On output, contains the size in bytes of the local host workspace needed by cusolverMpGeqrf(). |
Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
—
cusolverMpOrmqr
#
cusolverStatus_t cusolverMpOrmqr(
cusolverMpHandle_t handle,
cublasSideMode_t side,
cublasOperation_t trans,
int64_t M,
int64_t N,
int64_t K,
const void *d_A,
int64_t IA,
int64_t JA,
const cusolverMpMatrixDescriptor_t descrA,
const void *d_tau,
void *d_C,
int64_t IC,
int64_t JC,
const cusolverMpMatrixDescriptor_t descC,
cudaDataType_t computeType,
void *d_work,
size_t workspaceInBytesOnDevice,
void *h_work,
size_t workspaceInBytesOnHost,
int *info)
C(IC:IC+M-1, JC:JC+N-1)
by the orthogonal matrix Q can be given from cusolverMpGeqrf().side
of CUBLAS_SIDE_LEFT
and CUBLAS_SIDE_RIGHT
respectively. Note that the current implementation only support for CUBLAS_SIDE_LEFT
.K <= M
and K <= N
for CUBLAS_SIDE_LEFT
and CUBLAS_SIDE_RIGHT
respectively.op
can be translated to \(Q\), \(Q^T\), \(Q^H\) based on the trans
argument CUBLAS_OP_N
, CUBLAS_OP_T
and CUBLAS_OP_H
.Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
side |
Host |
In |
Indicate that Q is applied from left or right side. |
trans |
Host |
In |
Indicate that Q is applied with no-transpose or (conj)transpose. |
M |
Host |
In |
Number of rows of sub(C). |
N |
Host |
In |
Number of columns of sub(C). |
K |
Host |
In |
Number of Householder reflectors defining Q. |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A).`JA` must be a multiple of the column blocking dimension |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A. |
d_tau |
Device |
In |
Pointer into the local memory to an array of dimension |
d_C |
Device |
In/Out |
Pointer into the local memory to an array of dimension |
IC |
Host |
In |
Row index of the first row of the sub(C). |
JC |
Host |
In |
Column index of the first column of the sub(C). |
descrC |
Host |
In |
Matrix descriptor associated to the global matrix C. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
d_work |
Device |
Out |
Device workspace of size |
workspaceInBytesOnDevice |
Host |
In |
The size in bytes of the local device workspace needed by the routine as provided by cusolverMpOrmqr_bufferSize(). |
h_work |
Host |
Out |
Host workspace of size |
workspaceInBytesOnHost |
Host |
In |
The size in bytes of the local host workspace needed by the routine as provided by cusolverMpOrmqr_bufferSize() |
info |
Device |
Out |
|
(MB_A == NB_A)
and alignment of sub(A) and sub(C) matrices, meaning (MB_A == MB_C)
and (IA == IC)
.Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpOrmqr_bufferSize
#
cusolverStatus_t cusolverMpOrmqr_bufferSize(
cusolverMpHandle_t handle,
cublasSideMode_t side,
cublasOperation_t trans,
int64_t M,
int64_t N,
int64_t K,
const void *d_A,
int64_t IA,
int64_t JA,
const cusolverMpMatrixDescriptor_t descrA,
const void *d_tau,
void *d_C,
int64_t IC,
int64_t JC,
const cusolverMpMatrixDescriptor_t descrC,
cudaDataType_t computeType,
size_t* workspaceInBytesOnDevice,
size_t* workspaceInBytesOnHost)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
side |
Host |
In |
Indicate that Q is applied from left or right side. |
trans |
Host |
In |
Indicate that Q is applied with no-transpose or (conj)transpose. |
M |
Host |
In |
Number of rows of sub(C). |
N |
Host |
In |
Number of columns of sub(C). |
K |
Host |
In |
Number of Householder reflectors defining Q. |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A).`JA` must be a multiple of the column blocking dimension |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A. |
d_tau |
Device |
In |
Pointer into the local memory to an array of dimension |
d_C |
Device |
In |
Pointer into the local memory to an array of dimension |
IC |
Host |
In |
Row index of the first row of the sub(C). |
JC |
Host |
In |
Column index of the first column of the sub(C). |
descrC |
Host |
In |
Matrix descriptor associated to the global matrix C. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
workspaceInBytesOnDevice |
Host |
Out |
The size in bytes of the local device workspace needed by cusolverMpOrmqr(). |
workspaceInBytesOnHost |
Host |
Out |
The size in bytes of the local host workspace needed by cusolverMpOrmqr() |
Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpGels
#
cusolverStatus_t cusolverMpGels(
cusolverMpHandle_t handle,
cublasOperation_t trans,
int64_t M,
int64_t N,
int64_t NRHS,
void *d_A,
int64_t IA,
int64_t JA,
const cusolverMpMatrixDescriptor_t descrA,
void *d_B,
int64_t IB,
int64_t JB,
const cusolverMpMatrixDescriptor_t descrB,
cudaDataType_t computeType,
void *d_work,
size_t workspaceInBytesOnDevice,
void *h_work,
size_t workspaceInBytesOnHost,
int *info)
A(IA:IA+M-1, JA:JA+N-1)
or its transpose, using QR or LQ factorization of sub(A).M >= N
) with a no-transpose option is only supported via QR factorization cusolverMpGeqrf().B(IB:IB+M-1, JB:JB+NRHS-1)
and the solution multi-vector X
is overwritten on the sub(B).Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
trans |
Host |
In |
Indicate that the linear system of sub(A) involves with no-transpose or (conj)transpose. |
M |
Host |
In |
Number of rows of sub(A). |
N |
Host |
In |
Number of columns of sub(A). |
NRHS |
Host |
In |
Number of right hand side vectors i.e., number of columns of sub(B) and X. |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A). |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A. |
d_B |
Device |
In/Out |
Pointer into the local memory to an array of dimension |
IB |
Host |
In |
Row index of the first row of the sub(B). |
JB |
Host |
In |
Column index of the first column of the sub(B). |
descrB |
Host |
In |
Matrix descriptor associated to the global matrix B. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
d_work |
Device |
Out |
Device workspace of size |
workspaceInBytesOnDevice |
Host |
In |
The size in bytes of the local device workspace needed by the routine as provided by cusolverMpGels_bufferSize(). |
h_work |
Host |
Out |
Host workspace of size |
workspaceInBytesOnHost |
Host |
In |
The size in bytes of the local host workspace needed by the routine as provided by cusolverMpGels_bufferSize() |
info |
Device |
Out |
|
(MB_A == NB_A)
and alignment of sub(A) and sub(B) matrices, meaning (MB_A == MB_B)
and (IA == IB)
.Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpGels_bufferSize
#
cusolverStatus_t cusolverMpGels_bufferSize(
cusolverMpHandle_t handle,
cublasOperation_t trans,
int64_t M,
int64_t N,
int64_t NRHS,
void *d_A,
int64_t IA,
int64_t JA,
const cusolverMpMatrixDescriptor_t descrA,
void *d_B,
int64_t IB,
int64_t JB,
const cusolverMpMatrixDescriptor_t descrB,
cudaDataType_t computeType,
size_t* workspaceInBytesOnDevice,
size_t* workspaceInBytesOnHost)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
trans |
Host |
In |
Indicate that the linear system of sub(A) involves with no-transpose or (conj)transpose. |
M |
Host |
In |
Number of rows of sub(A). |
N |
Host |
In |
Number of columns of sub(A). |
NRHS |
Host |
In |
Number of right hand side vectors i.e., number of columns of sub(B) and X. |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A). |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A. |
d_B |
Device |
In |
Pointer into the local memory to an array of dimension |
IB |
Host |
In |
Row index of the first row of the sub(B). |
JB |
Host |
In |
Column index of the first column of the sub(B). |
descrB |
Host |
In |
Matrix descriptor associated to the global matrix B. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
workspaceInBytesOnDevice |
Host |
Out |
The size in bytes of the local device workspace needed by cusolverMpGels(). |
workspaceInBytesOnHost |
Host |
Out |
The size in bytes of the local host workspace needed by cusolverMpGels() |
Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpSytrd
#
cusolverStatus_t cusolverMpSytrd(
cusolverMpHandle_t handle,
cublasFillMode_t uplo,
int64_t N,
void *d_A,
int64_t IA,
int64_t JA,
const cusolverMpMatrixDescriptor_t descrA,
void *d_d,
void *d_e,
void *d_tau,
cudaDataType_t computeType,
void *d_work,
size_t workspaceInBytesOnDevice,
void *h_work,
size_t workspaceInBytesOnHost,
int *info)
A(IA:IA+N-1, JA:JA+N-1)
to a tridiagonal form.CUBLAS_FILL_MODE_LOWER
only.Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
uplo |
Host |
In |
Indicate that the function uses either upper or lower triangular part of sub(A). |
N |
Host |
In |
Number of rows/columns of square matrix sub(A). |
d_A |
Device |
In/Out |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A). |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A. |
d_d |
Device |
Out |
Pointer into the local memory to an array of dimension |
d_e |
Device |
Out |
Pointer into the local memory to an array of dimension |
d_tau |
Device |
Out |
Pointer into the local memory to an array of dimension |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
d_work |
Device |
Out |
Device workspace of size |
workspaceInBytesOnDevice |
Host |
In |
The size in bytes of the local device workspace needed by the routine as provided by cusolverMpSytrd_bufferSize(). |
h_work |
Host |
Out |
Host workspace of size |
workspaceInBytesOnHost |
Host |
In |
The size in bytes of the local host workspace needed by the routine as provided by cusolverMpSytrd_bufferSize(). |
info |
Device |
Out |
|
(MB_A == NB_A)
.Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpSytrd_bufferSize
#
cusolverStatus_t cusolverMpSytrd_bufferSize(
cusolverMpHandle_t handle,
cublasFillMode_t uplo,
int64_t N,
void *d_A,
int64_t IA,
int64_t JA,
const cusolverMpMatrixDescriptor_t descrA,
void *d_d,
void *d_e,
void *d_tau,
cudaDataType_t computeType,
size_t *workspaceInBytesOnDevice,
size_t *workspaceInBytesOnHost)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
uplo |
Host |
In |
Indicate that the function uses either upper or lower triangular part of sub(A). |
N |
Host |
In |
Number of rows/columns of square matrix sub(A). |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A). |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A. |
d_d |
Device |
In |
Pointer into the local memory to an array of dimension |
d_e |
Device |
In |
Pointer into the local memory to an array of dimension |
d_tau |
Device |
In |
Pointer into the local memory to an array of dimension |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
workspaceInBytesOnDevice |
Host |
Out |
The size in bytes of the local device workspace needed by cusolverMpSytrd(). |
workspaceInBytesOnHost |
Host |
Out |
The size in bytes of the local host workspace needed by cusolverMpSytrd() |
Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpStedc
#
cusolverStatus_t cusolverMpStedc(
cusolverMpHandle_t handle,
char *compz,
int64_t N,
void *d_d,
void *d_e,
void *d_Q,
int64_t IQ,
int64_t JQ,
const cusolverMpMatrixDescriptor_t descrQ,
cudaDataType_t computeType,
void *d_work,
size_t workspaceInBytesOnDevice,
void *h_work,
size_t workspaceInBytesOnHost,
int *info)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
compz |
Host |
In |
Option to compute eigenvalues only( |
N |
Host |
In |
Number of rows/columns of square matrix sub(A). |
d_d |
Device |
In/Out |
Pointer to an array of dimension |
d_e |
Device |
In/Out |
Pointer to an array of dimension |
d_Q |
Device |
Out |
Pointer into the local memory to an array of dimension |
IQ |
Host |
In |
Row index of the first row of the sub(Q). |
JQ |
Host |
In |
Column index of the first column of the sub(A). |
descrQ |
Host |
In |
Matrix descriptor associated to the global matrix Q. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
d_work |
Device |
Out |
Device workspace of size |
workspaceInBytesOnDevice |
Host |
In |
The size in bytes of the local device workspace needed by the routine as provided by cusolverMpStedc_bufferSize(). |
h_work |
Host |
Out |
Host workspace of size |
workspaceInBytesOnHost |
Host |
In |
The size in bytes of the local host workspace needed by the routine as provided by cusolverMpStedc_bufferSize() |
info |
Device |
Out |
|
(MB_Q == NB_Q)
.Data Type of Tridiagonal Matrix |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpStedc_bufferSize
#
cusolverStatus_t cusolverMpStedc_bufferSize(
cusolverMpHandle_t handle,
char *compz,
int64_t N,
void *d_d,
void *d_e,
void *d_Q,
int64_t IQ,
int64_t JQ,
const cusolverMpMatrixDescriptor_t descrQ,
cudaDataType_t computeType,
size_t *workspaceInBytesOnDevice,
size_t *workspaceInBytesOnHost,
int *iwork)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
compz |
Host |
In |
Option to compute eigenvalues only( |
N |
Host |
In |
Number of rows/columns of square matrix sub(A). |
d_d |
Device |
In |
Pointer to an array of dimension |
d_e |
Device |
In |
Pointer to an array of dimension |
d_Q |
Device |
In |
Pointer into the local memory to an array of dimension |
IQ |
Host |
In |
Row index of the first row of the sub(Q). |
JQ |
Host |
In |
Column index of the first column of the sub(A). |
descrQ |
Host |
In |
Matrix descriptor associated to the global matrix Q. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
workspaceInBytesOnDevice |
Host |
Out |
The size in bytes of the local device workspace needed by the routine cusolverMpStedc(). |
workspaceInBytesOnHost |
Host |
Out |
The size in bytes of the local host workspace needed by cusolverMpStedc() |
info |
Device |
Out |
|
Data Type of Tridiagonal Matrix |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpOrmtr
#
cusolverStatus_t cusolverMpOrmtr(
cusolverMpHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
cublasOperation_t trans,
int64_t M,
int64_t N,
const void *d_A,
int64_t IA,
int64_t JA,
const cusolverMpMatrixDescriptor_t descrA,
const void *d_tau,
void *d_C,
int64_t IC,
int64_t JC,
const cusolverMpMatrixDescriptor_t descrC,
cudaDataType_t computeType,
void *d_work,
size_t workspaceInBytesOnDevice,
void *h_work,
size_t workspaceInBytesOnHost,
int *info)
C(IC:IC+M-1, JC:JC+N-1)
by the orthogonal matrix Q can be given from cusolverMpSytrd().CUBLAS_SIDE_LEFT
and CUBLAS_SIDE_RIGHT
.op
can be translated to \(Q\), \(Q^T\), \(Q^H\) based on the trans
argument CUBLAS_OP_N
, CUBLAS_OP_T
and CUBLAS_OP_H
.CUBLAS_FILL_MODE_UPPER
and CUBLAS_FILL_MODE_LOWER
nq
is either m
or n
according to the side parameter of CUBLAS_SIDE_LEFT
or CUBLAS_SIDE_RIGHT
respectively.Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
side |
Host |
In |
Indicate that Q is applied from left or right side. |
uplo |
Host |
In |
Indicate that upper or lower triangular of sub(A) contains Householder reflectors. |
trans |
Host |
In |
Indicate that Q is applied with no-transpose or (conj)transpose. |
M |
Host |
In |
Number of rows of sub(C). |
N |
Host |
In |
Number of columns of sub(C). |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A).`JA` must be a multiple of the column blocking dimension |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A. |
d_tau |
Device |
In |
Pointer into the local memory to an array of dimension |
d_C |
Device |
In/Out |
Pointer into the local memory to an array of dimension |
IC |
Host |
In |
Row index of the first row of the sub(C). |
JC |
Host |
In |
Column index of the first column of the sub(C). |
descrC |
Host |
In |
Matrix descriptor associated to the global matrix C. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
d_work |
Device |
Out |
Device workspace of size |
workspaceInBytesOnDevice |
Host |
In |
The size in bytes of the local device workspace needed by the routine as provided by cusolverMpOrmtr_bufferSize(). |
h_work |
Host |
Out |
Host workspace of size |
workspaceInBytesOnHost |
Host |
In |
The size in bytes of the local host workspace needed by the routine as provided by cusolverMpOrmtr_bufferSize() |
info |
Device |
Out |
|
(MB_A == NB_A)
and alignment of sub(A) and sub(B) matrices, meaning (MB_A == MB_C)
and (IA == IC)
.Data Type of A and C |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpOrmtr_bufferSize
#
cusolverStatus_t cusolverMpOrmtr_bufferSize(
cusolverMpHandle_t handle,
cublasSideMode_t side,
cublasFillMode_t uplo,
cublasOperation_t trans,
int64_t M,
int64_t N,
const void *d_A,
int64_t IA,
int64_t JA,
const cusolverMpMatrixDescriptor_t descrA,
const void *d_tau,
void *d_C,
int64_t IC,
int64_t JC,
const cusolverMpMatrixDescriptor_t descrC,
cudaDataType_t computeType,
size_t* workspaceInBytesOnDevice,
size_t* workspaceInBytesOnHost)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
side |
Host |
In |
Indicate that Q is applied from left or right side. |
uplo |
Host |
In |
Indicate that upper or lower triangular of sub(A) contains Householder reflectors. |
trans |
Host |
In |
Indicate that Q is applied with no-transpose or (conj)transpose. |
M |
Host |
In |
Number of rows of sub(C). |
N |
Host |
In |
Number of columns of sub(C). |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A).`JA` must be a multiple of the column blocking dimension |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A. |
d_tau |
Device |
In |
Pointer into the local memory to an array of dimension |
d_C |
Device |
In |
Pointer into the local memory to an array of dimension |
IC |
Host |
In |
Row index of the first row of the sub(C). |
JC |
Host |
In |
Column index of the first column of the sub(C).`JC` must be a multiple of the column blocking dimension |
descrC |
Host |
In |
Matrix descriptor associated to the global matrix C. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
workspaceInBytesOnDevice |
Host |
Out |
The size in bytes of the local device workspace needed by cusolverMpOrmtr(). |
workspaceInBytesOnHost |
Host |
Out |
The size in bytes of the local host workspace needed by cusolverMpOrmtr() |
Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpSyevd
#
cusolverStatus_t cusolverMpSyevd(
cusolverMpHandle_t handle,
char *jobz,
cublasFillMode_t uplo,
int64_t N,
void *d_A,
int64_t IA,
int64_t JA,
const cusolverMpMatrixDescriptor_t descrA,
void *d_d,
void *d_Z,
int64_t IZ,
int64_t JZ,
const cusolverMpMatrixDescriptor_t descrZ,
cudaDataType_t computeType,
void *d_work,
size_t workspaceInBytesOnDevice,
void *h_work,
size_t workspaceInBytesOnHost,
int *info)
A(IA:IA+N-1, JA:JA+N-1)
using the divide and conquer algorithm cusolverMpStedc(). Note that the current implementation of the cusolverMpStedc may fail when the blocksize is not a power of two.Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
jobz |
Host |
In |
If |
uplo |
Host |
In |
Indicate that upper or lower triangular of sub(A) is used to compute eigen solutions. |
N |
Host |
In |
Number of rows and columns of sub(A). |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A).`JA` must be a multiple of the column blocking dimension |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A. |
d_d |
Device |
Out |
Pointer into the memory to an array of global size |
d_Z |
Device |
Out |
Pointer into the local memory to an array of dimension |
IZ |
Host |
In |
Row index of the first row of the sub(Z). |
JZ |
Host |
In |
Column index of the first column of the sub(Z). |
descrZ |
Host |
In |
Matrix descriptor associated to the global matrix Z. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
d_work |
Device |
Out |
Device workspace of size |
workspaceInBytesOnDevice |
Host |
In |
The size in bytes of the local device workspace needed by the routine as provided by cusolverMpSyevd_bufferSize(). |
h_work |
Host |
Out |
Host workspace of size |
workspaceInBytesOnHost |
Host |
In |
The size in bytes of the local host workspace needed by the routine as provided by cusolverMpSyevd_bufferSize() |
info |
Device |
Out |
|
(MB_A == NB_A)
and alignment of sub(A) and sub(B) matrices, meaning (MB_A == MB_Z)
and (IZ == IZ)
.Data Type of A and C |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpSyevd_bufferSize
#
cusolverStatus_t cusolverMpSyevd_bufferSize(
cusolverMpHandle_t handle,
char *jobz,
cublasFillMode_t uplo,
int64_t N,
void *d_A,
int64_t IA,
int64_t JA,
const cusolverMpMatrixDescriptor_t descrA,
void *d_d,
void *d_Z,
int64_t IZ,
int64_t JZ,
const cusolverMpMatrixDescriptor_t descrZ,
cudaDataType_t computeType,
size_t *workspaceInBytesOnDevice,
size_t *workspaceInBytesOnHost)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
jobz |
Host |
In |
If |
uplo |
Host |
In |
Indicate that upper or lower triangular of sub(A) is used to compute eigen solutions. |
N |
Host |
In |
Number of rows and columns of sub(A). |
d_A |
Device |
In |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A).`JA` must be a multiple of the column blocking dimension |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A. |
d_d |
Device |
In |
Pointer into the memory to an array of global size |
d_Z |
Device |
In |
Pointer into the local memory to an array of dimension |
IZ |
Host |
In |
Row index of the first row of the sub(Z). |
JZ |
Host |
In |
Column index of the first column of the sub(Z). |
descrZ |
Host |
In |
Matrix descriptor associated to the global matrix Z. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
workspaceInBytesOnDevice |
Host |
Out |
The size in bytes of the local device workspace needed by cusolverMpSyevd(). |
workspaceInBytesOnHost |
Host |
Out |
The size in bytes of the local host workspace needed by cusolverMpSyevd() |
Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpSygst
#
cusolverStatus_t cusolverMpSygst(
cusolverMpHandle_t handle,
cusolverEigType_t ibtype,
cublasFillMode_t uplo,
int64_t N,
void *d_A,
int64_t IA,
int64_t JA,
cusolverMpMatrixDescriptor_t descrA,
const void *d_B,
int64_t IB,
int64_t JB,
cusolverMpMatrixDescriptor_t descrB,
cudaDataType_t computeType,
void *d_work,
size_t workspaceInBytesOnDevice,
void *h_work,
size_t workspaceInBytesOnHost,
int *info)
ibtype = CUSOLVER_EIG_TYPE_1
: the problem is sub(A)*x = lambda*sub(B)*x, and sub(A) is overwritten by inv(L)*sub(A)*inv(L^H) or inv(U^H)*sub(A)*inv(U).
ibtype = CUSOLVER_EIG_TYPE_2 or 3
: the problem is sub(A)*sub(B)*x = lambda*x or sub(B)*sub(A)*x = lambda*x, and sub(A) is overwritten by L^H*sub(A)*L or U*sub(A)*U^H.
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
ibtype |
Host |
In |
Indicate the eigen problem type sub(A)*x=(lambda)*sub(B)*x, sub(A)*sub(B)x=(lambda)*x, or sub(B)*sub(A)*x=(lambda)*x. |
uplo |
Host |
In |
Indicate that lower |
N |
Host |
In |
Number of rows and columns of sub(A) and sub(B). |
d_A |
Device |
In/Out |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A). |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A. |
d_B |
Device |
In |
Pointer into the local memory to an array of dimension |
IB |
Host |
In |
Row index of the first row of the sub(B). |
JB |
Host |
In |
Column index of the first column of the sub(B). |
descrB |
Host |
In |
Matrix descriptor associated to the global matrix B. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
d_work |
Device |
Out |
Device workspace of size |
workspaceInBytesOnDevice |
Host |
In |
The size in bytes of the local device workspace needed by cusolverMpSygst(). |
h_work |
Host |
Out |
Host workspace of size |
workspaceInBytesOnHost |
Host |
In |
The size in bytes of the local host workspace needed by cusolverMpSygst() |
info |
Device |
Out |
|
Same square blocksize is used
(MB == NB)
for the matrix A and B.The beginning row and column of A and B are aligned each other i.e.,
(IA == IB)
and(JA == JB
.
ibtype = CUSOLVER_EIG_TYPE_1
, uplo = CUBLAS_FILL_MODE_LOWER
.Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpSygst_bufferSize
#
cusolverStatus_t cusolverMpSygst_bufferSize(
cusolverMpHandle_t handle,
cusolverEigType_t ibtype,
cublasFillMode_t uplo,
int64_t N,
int64_t IA,
int64_t JA,
cusolverMpMatrixDescriptor_t descrA,
int64_t IB,
int64_t JB,
cusolverMpMatrixDescriptor_t descrB,
cudaDataType_t computeType,
size_t *workspaceInBytesOnDevice,
size_t *workspaceInBytesOnHost)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
ibtype |
Host |
In |
Indicate the eigen problem type sub(A)*x=(lambda)*sub(B)*x, sub(A)*sub(B)x=(lambda)*x, or sub(B)*sub(A)*x=(lambda)*x. |
uplo |
Host |
In |
Indicate that lower |
N |
Host |
In |
Number of rows and columns of sub(A) and sub(B). |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A). |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A. |
IB |
Host |
In |
Row index of the first row of the sub(B). |
JB |
Host |
In |
Column index of the first column of the sub(B). |
descrB |
Host |
In |
Matrix descriptor associated to the global matrix B. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
workspaceInBytesOnDevice |
Host |
Out |
The size in bytes of the local device workspace needed by cusolverMpSygst(). |
workspaceInBytesOnHost |
Host |
Out |
The size in bytes of the local host workspace needed by cusolverMpSygst() |
Same square blocksize is used
(MB == NB)
for the matrix A and B.The beginning row and column of A and B are aligned each other i.e.,
(IA == IB)
and(JA == JB
.
ibtype = CUSOLVER_EIG_TYPE_1
, uplo = CUBLAS_FILL_MODE_LOWER
.Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpSygvd
#
cusolverStatus_t cusolverMpSygvd(
cusolverMpHandle_t handle,
cusolverEigType_t ibtype,
cusolverEigMode_t jobz,
cublasFillMode_t uplo,
int64_t N,
void *d_A,
int64_t IA,
int64_t JA,
cusolverMpMatrixDescriptor_t descrA,
void *d_B,
int64_t IB,
int64_t JB,
cusolverMpMatrixDescriptor_t descrB,
void *d_d,
void *d_Z,
int64_t IZ,
int64_t JZ,
cusolverMpMatrixDescriptor_t descrZ,
cudaDataType_t computeType,
void *d_work,
size_t workspaceInBytesOnDevice,
void *h_work,
size_t workspaceInBytesOnHost,
int *info)
ibtype = CUSOLVER_EIG_TYPE_1
: the problem is sub(A)*x = lambda*sub(B)*x.
ibtype = CUSOLVER_EIG_TYPE_2
: the problem is sub(A)*sub(B)*x = lambda*x.
ibtype = CUSOLVER_EIG_TYPE_3
: the problem is sub(B)*sub(A)*x = lambda*x.
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
ibtype |
Host |
In |
Indicate the eigen problem type sub(A)*x=(lambda)*sub(B)*x, sub(A)*sub(B)x=(lambda)*x, or sub(B)*sub(A)*x=(lambda)*x. |
jobz |
Host |
In |
Indicate whether the routine computes eigenvalues only |
uplo |
Host |
In |
Indicate that lower |
N |
Host |
In |
Number of rows and columns of sub(A) and sub(B). |
d_A |
Device |
In/Out |
Pointer into the local memory to an array of dimension |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A). |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A. |
d_B |
Device |
In/Out |
Pointer into the local memory to an array of dimension |
IB |
Host |
In |
Row index of the first row of the sub(B). |
JB |
Host |
In |
Column index of the first column of the sub(B). |
descrB |
Host |
In |
Matrix descriptor associated to the global matrix B. |
d_d |
Device |
Out |
Pointer into the memory to an array of global size |
d_Z |
Device |
Out |
Pointer into the local memory to an array of dimension |
IZ |
Host |
In |
Row index of the first row of the sub(Z). |
JZ |
Host |
In |
Column index of the first column of the sub(Z). |
descrZ |
Host |
In |
Matrix descriptor associated to the global matrix Z. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
d_work |
Device |
Out |
Device workspace of size |
workspaceInBytesOnDevice |
Host |
Out |
The size in bytes of the local device workspace needed by cusolverMpSygvd(). |
h_work |
Host |
Out |
Host workspace of size |
workspaceInBytesOnHost |
Host |
Out |
The size in bytes of the local host workspace needed by cusolverMpSygvd() |
info |
Device |
Out |
|
Same square blocksize is used
(MB == NB)
for the matrix A, B, and Z.The beginning row and column of A, B and Z are aligned each other i.e.,
(IA == IB == IZ)
and(JA == JB == JZ
.
ibtype = CUSOLVER_EIG_TYPE_1
, jobz = CUSOLVER_EIG_MODE_VECTOR
, uplo = CUBLAS_FILL_MODE_LOWER
.Data Type of A and C |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |
cusolverMpSygvd_bufferSize
#
cusolverStatus_t cusolverMpSygvd_bufferSize(
cusolverMpHandle_t handle,
cusolverEigType_t ibtype,
cusolverEigMode_t jobz,
cublasFillMode_t uplo,
int64_t N,
int64_t IA,
int64_t JA,
cusolverMpMatrixDescriptor_t descrA,
int64_t IB,
int64_t JB,
cusolverMpMatrixDescriptor_t descrB,
int64_t IZ,
int64_t JZ,
cusolverMpMatrixDescriptor_t descrZ,
cudaDataType_t computeType,
size_t *workspaceInBytesOnDevice,
size_t *workspaceInBytesOnHost)
Parameter |
Memory |
In/Out |
Description |
---|---|---|---|
handle |
Host |
In |
cuSOLVERMp library handle. |
ibtype |
Host |
In |
Indicate the eigen problem type sub(A)*x=(lambda)*sub(B)*x, sub(A)*sub(B)x=(lambda)*x, or sub(B)*sub(A)*x=(lambda)*x. |
jobz |
Host |
In |
Indicate whether the routine computes eigenvalues only |
uplo |
Host |
In |
Indicate that lower |
N |
Host |
In |
Number of rows and columns of sub(A) and sub(B). |
IA |
Host |
In |
Row index of the first row of the sub(A). |
JA |
Host |
In |
Column index of the first column of the sub(A). |
descrA |
Host |
In |
Matrix descriptor associated to the global matrix A. |
IB |
Host |
In |
Row index of the first row of the sub(B). |
JB |
Host |
In |
Column index of the first column of the sub(B). |
descrB |
Host |
In |
Matrix descriptor associated to the global matrix B. |
IZ |
Host |
In |
Row index of the first row of the sub(Z). |
JZ |
Host |
In |
Column index of the first column of the sub(Z). |
descrZ |
Host |
In |
Matrix descriptor associated to the global matrix Z. |
computeType |
Host |
In |
Data type used for computations. See table below for supported combinations. |
workspaceInBytesOnDevice |
Host |
Out |
The size in bytes of the local device workspace needed by cusolverMpSygvd(). |
workspaceInBytesOnHost |
Host |
Out |
The size in bytes of the local host workspace needed by cusolverMpSygvd() |
Same square blocksize is used
(MB == NB)
for the matrix A, B, and Z.The beginning row and column of A, B and Z are aligned each other i.e.,
(IA == IB == IZ)
and(JA == JB == JZ
.
ibtype = CUSOLVER_EIG_TYPE_1
, jobz = CUSOLVER_EIG_MODE_VECTOR
, uplo = CUBLAS_FILL_MODE_LOWER
.Data Type of A |
computeType |
Output Data Type |
---|---|---|
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_32F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_R_64F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_32F |
CUDA_C_64F |
CUDA_C_64F |
CUDA_C_64F |