1/* { dg-do run { target openacc_nvidia_accel_selected } } */
2/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
3
4#include <stdio.h>
5#include <stdlib.h>
6#include <cuda.h>
7#include <cuda_runtime_api.h>
8#include <cublas_v2.h>
9#include <openacc.h>
10
11void
12saxpy (int n, float a, float *x, float *y)
13{
14    int i;
15
16    for (i = 0; i < n; i++)
17    {
18        y[i] = a * x[i] + y[i];
19    }
20}
21
22void
23context_check (CUcontext ctx1)
24{
25    CUcontext ctx2, ctx3;
26    CUresult r;
27
28    r = cuCtxGetCurrent (&ctx2);
29    if (r != CUDA_SUCCESS)
30    {
31        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
32        exit (EXIT_FAILURE);
33    }
34
35    if (ctx1 != ctx2)
36    {
37        fprintf (stderr, "new context established\n");
38        exit (EXIT_FAILURE);
39    }
40
41    ctx3 = (CUcontext) acc_get_current_cuda_context ();
42
43    if (ctx1 != ctx3)
44    {
45        fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
46        exit (EXIT_FAILURE);
47    }
48
49    return;
50}
51
52int
53main (int argc, char **argv)
54{
55    cublasStatus_t s;
56    cudaError_t e;
57    cublasHandle_t h;
58    CUcontext pctx, ctx;
59    CUresult r;
60    int dev;
61    int i;
62    const int N = 256;
63    float *h_X, *h_Y1, *h_Y2;
64    float *d_X,*d_Y;
65    float alpha = 2.0f;
66    float error_norm;
67    float ref_norm;
68
69    /* Test 1 - cuBLAS creates, OpenACC shares.  */
70
71    s = cublasCreate (&h);
72    if (s != CUBLAS_STATUS_SUCCESS)
73    {
74        fprintf (stderr, "cublasCreate failed: %d\n", s);
75        exit (EXIT_FAILURE);
76    }
77
78    r = cuCtxGetCurrent (&pctx);
79    if (r != CUDA_SUCCESS)
80    {
81        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
82        exit (EXIT_FAILURE);
83    }
84
85    e = cudaGetDevice (&dev);
86    if (e != cudaSuccess)
87    {
88        fprintf (stderr, "cudaGetDevice failed: %d\n", e);
89        exit (EXIT_FAILURE);
90    }
91
92    acc_set_device_num (dev, acc_device_nvidia);
93
94    h_X = (float *) malloc (N * sizeof (float));
95    if (!h_X)
96    {
97        fprintf (stderr, "malloc failed: for h_X\n");
98        exit (EXIT_FAILURE);
99    }
100
101    h_Y1 = (float *) malloc (N * sizeof (float));
102    if (!h_Y1)
103    {
104        fprintf (stderr, "malloc failed: for h_Y1\n");
105        exit (EXIT_FAILURE);
106    }
107
108    h_Y2 = (float *) malloc (N * sizeof (float));
109    if (!h_Y2)
110    {
111        fprintf (stderr, "malloc failed: for h_Y2\n");
112        exit (EXIT_FAILURE);
113    }
114
115    for (i = 0; i < N; i++)
116    {
117        h_X[i] = rand () / (float) RAND_MAX;
118        h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
119    }
120
121    d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
122    if (d_X == NULL)
123    {
124        fprintf (stderr, "copyin error h_X\n");
125        exit (EXIT_FAILURE);
126    }
127
128    context_check (pctx);
129
130    d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
131    if (d_Y == NULL)
132    {
133        fprintf (stderr, "copyin error h_Y1\n");
134        exit (EXIT_FAILURE);
135    }
136
137    context_check (pctx);
138
139    s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
140    if (s != CUBLAS_STATUS_SUCCESS)
141    {
142        fprintf (stderr, "cublasSaxpy failed: %d\n", s);
143        exit (EXIT_FAILURE);
144    }
145
146    context_check (pctx);
147
148    acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
149
150    context_check (pctx);
151
152    saxpy (N, alpha, h_X, h_Y2);
153
154    error_norm = 0;
155    ref_norm = 0;
156
157    for (i = 0; i < N; ++i)
158    {
159        float diff;
160
161        diff = h_Y1[i] - h_Y2[i];
162        error_norm += diff * diff;
163        ref_norm += h_Y2[i] * h_Y2[i];
164    }
165
166    error_norm = (float) sqrt ((double) error_norm);
167    ref_norm = (float) sqrt ((double) ref_norm);
168
169    if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
170    {
171        fprintf (stderr, "math error\n");
172        exit (EXIT_FAILURE);
173    }
174
175    free (h_X);
176    free (h_Y1);
177    free (h_Y2);
178
179    acc_free (d_X);
180    acc_free (d_Y);
181
182    context_check (pctx);
183
184    s = cublasDestroy (h);
185    if (s != CUBLAS_STATUS_SUCCESS)
186    {
187        fprintf (stderr, "cublasDestroy failed: %d\n", s);
188        exit (EXIT_FAILURE);
189    }
190
191    acc_shutdown (acc_device_nvidia);
192
193    r = cuCtxGetCurrent (&ctx);
194    if (r != CUDA_SUCCESS)
195    {
196        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
197        exit (EXIT_FAILURE);
198    }
199
200    if (!ctx)
201    {
202        fprintf (stderr, "Expected context\n");
203        exit (EXIT_FAILURE);
204    }
205
206    if (pctx != ctx)
207    {
208        fprintf (stderr, "Unexpected new context\n");
209        exit (EXIT_FAILURE);
210    }
211
212    return EXIT_SUCCESS;
213}
214