#define CUDSS_CALL_AND_CHECK(call, status, msg) \
do { \
status = call; \
if (status != CUDSS_STATUS_SUCCESS) { \
printf("CUDSS call ended unsuccessfully with status = %d, " \
"details: " #msg "\n", \
status); \
} \
} while (0);
#define CUDSS_CALL_AND_CHECK_TIME(call, status, msg, func_name, WARM_UP, \
PERF_RUN) \
do { \
if (WARM_UP) { \
status = call; \
if (status != CUDSS_STATUS_SUCCESS) { \
printf("CUDSS call ended unsuccessfully with status = %d, " \
"details: " #msg "\n", \
status); \
} \
} \
cudaDeviceSynchronize(); \
start_time = second(); \
for (int i = 0; i < (PERF_RUN ? nrun : 1); i++) { \
status = call; \
if (status != CUDSS_STATUS_SUCCESS) { \
printf("CUDSS call ended unsuccessfully with status = %d, " \
"details: " #msg "\n", \
status); \
} \
} \
cudaDeviceSynchronize(); \
double tmp_t_ = (second() - start_time) / (PERF_RUN ? nrun : 1); \
if (rank == 0) { \
printf("%s: time = %1.8f\n", func_name, tmp_t_); \
fflush(0); \
} \
} while (0);
// Quite a crude host timer, just an example
static double second(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
}
// ...
cudssStatus_t status = CUDSS_STATUS_SUCCESS;
// Analysis
CUDSS_CALL_AND_CHECK_TIME(
cudssExecute(handle, CUDSS_PHASE_ANALYSIS, solverConfig, solverData, A, sol, rhs),
status, "cudssExecute for analysis", "ANALYSIS", 0, 0);
int nrun = 10; // for smaller matrices, one should have a larger value, say 1000; for
// bigger, often 1 is enough
int warmup = 1; // it is recommended to have at least one warm-up iteration for
// factorization and solve
// Factorization
CUDSS_CALL_AND_CHECK_TIME(cudssExecute(handle, CUDSS_PHASE_FACTORIZATION,
solverConfig, solverData, A, sol, rhs),
status, "cudssExecute for factor", "FACTOR", warmup, 1);
// Note: depending on the application, it might make sense also to measure
// performance of other phases,
// e.g., CUDSS_PHASE_REFACTORIZATION or solve sub-phases
// Solve
CUDSS_CALL_AND_CHECK_TIME(
cudssExecute(handle, CUDSS_PHASE_SOLVE, solverConfig, solverData, A, sol, rhs),
status, "cudssExecute for solve", "SOLVE", warmup, 1);