1/* { dg-do run { target openacc_nvidia_accel_selected } } */ 2/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ 3 4#include <stdio.h> 5#include <stdlib.h> 6#include <cuda.h> 7#include <cuda_runtime_api.h> 8#include <cublas_v2.h> 9#include <openacc.h> 10 11void 12saxpy (int n, float a, float *x, float *y) 13{ 14 int i; 15 16 for (i = 0; i < n; i++) 17 { 18 y[i] = a * x[i] + y[i]; 19 } 20} 21 22void 23context_check (CUcontext ctx1) 24{ 25 CUcontext ctx2, ctx3; 26 CUresult r; 27 28 r = cuCtxGetCurrent (&ctx2); 29 if (r != CUDA_SUCCESS) 30 { 31 fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); 32 exit (EXIT_FAILURE); 33 } 34 35 if (ctx1 != ctx2) 36 { 37 fprintf (stderr, "new context established\n"); 38 exit (EXIT_FAILURE); 39 } 40 41 ctx3 = (CUcontext) acc_get_current_cuda_context (); 42 43 if (ctx1 != ctx3) 44 { 45 fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n"); 46 exit (EXIT_FAILURE); 47 } 48 49 return; 50} 51 52int 53main (int argc, char **argv) 54{ 55 cublasStatus_t s; 56 cudaError_t e; 57 cublasHandle_t h; 58 CUcontext pctx, ctx; 59 CUresult r; 60 int dev; 61 int i; 62 const int N = 256; 63 float *h_X, *h_Y1, *h_Y2; 64 float *d_X,*d_Y; 65 float alpha = 2.0f; 66 float error_norm; 67 float ref_norm; 68 69 /* Test 1 - cuBLAS creates, OpenACC shares. */ 70 71 s = cublasCreate (&h); 72 if (s != CUBLAS_STATUS_SUCCESS) 73 { 74 fprintf (stderr, "cublasCreate failed: %d\n", s); 75 exit (EXIT_FAILURE); 76 } 77 78 r = cuCtxGetCurrent (&pctx); 79 if (r != CUDA_SUCCESS) 80 { 81 fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); 82 exit (EXIT_FAILURE); 83 } 84 85 e = cudaGetDevice (&dev); 86 if (e != cudaSuccess) 87 { 88 fprintf (stderr, "cudaGetDevice failed: %d\n", e); 89 exit (EXIT_FAILURE); 90 } 91 92 acc_set_device_num (dev, acc_device_nvidia); 93 94 h_X = (float *) malloc (N * sizeof (float)); 95 if (!h_X) 96 { 97 fprintf (stderr, "malloc failed: for h_X\n"); 98 exit (EXIT_FAILURE); 99 } 100 101 h_Y1 = (float *) malloc (N * sizeof (float)); 102 if (!h_Y1) 103 { 104 fprintf (stderr, "malloc failed: for h_Y1\n"); 105 exit (EXIT_FAILURE); 106 } 107 108 h_Y2 = (float *) malloc (N * sizeof (float)); 109 if (!h_Y2) 110 { 111 fprintf (stderr, "malloc failed: for h_Y2\n"); 112 exit (EXIT_FAILURE); 113 } 114 115 for (i = 0; i < N; i++) 116 { 117 h_X[i] = rand () / (float) RAND_MAX; 118 h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX; 119 } 120 121 d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float)); 122 if (d_X == NULL) 123 { 124 fprintf (stderr, "copyin error h_X\n"); 125 exit (EXIT_FAILURE); 126 } 127 128 context_check (pctx); 129 130 d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float)); 131 if (d_Y == NULL) 132 { 133 fprintf (stderr, "copyin error h_Y1\n"); 134 exit (EXIT_FAILURE); 135 } 136 137 context_check (pctx); 138 139 s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1); 140 if (s != CUBLAS_STATUS_SUCCESS) 141 { 142 fprintf (stderr, "cublasSaxpy failed: %d\n", s); 143 exit (EXIT_FAILURE); 144 } 145 146 context_check (pctx); 147 148 acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float)); 149 150 context_check (pctx); 151 152 saxpy (N, alpha, h_X, h_Y2); 153 154 error_norm = 0; 155 ref_norm = 0; 156 157 for (i = 0; i < N; ++i) 158 { 159 float diff; 160 161 diff = h_Y1[i] - h_Y2[i]; 162 error_norm += diff * diff; 163 ref_norm += h_Y2[i] * h_Y2[i]; 164 } 165 166 error_norm = (float) sqrt ((double) error_norm); 167 ref_norm = (float) sqrt ((double) ref_norm); 168 169 if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f)) 170 { 171 fprintf (stderr, "math error\n"); 172 exit (EXIT_FAILURE); 173 } 174 175 free (h_X); 176 free (h_Y1); 177 free (h_Y2); 178 179 acc_free (d_X); 180 acc_free (d_Y); 181 182 context_check (pctx); 183 184 s = cublasDestroy (h); 185 if (s != CUBLAS_STATUS_SUCCESS) 186 { 187 fprintf (stderr, "cublasDestroy failed: %d\n", s); 188 exit (EXIT_FAILURE); 189 } 190 191 acc_shutdown (acc_device_nvidia); 192 193 r = cuCtxGetCurrent (&ctx); 194 if (r != CUDA_SUCCESS) 195 { 196 fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); 197 exit (EXIT_FAILURE); 198 } 199 200 if (!ctx) 201 { 202 fprintf (stderr, "Expected context\n"); 203 exit (EXIT_FAILURE); 204 } 205 206 if (pctx != ctx) 207 { 208 fprintf (stderr, "Unexpected new context\n"); 209 exit (EXIT_FAILURE); 210 } 211 212 return EXIT_SUCCESS; 213} 214