1/* { dg-do run { target openacc_nvidia_accel_selected } } */ 2/* { dg-additional-options "-lcuda" } */ 3 4#include <stdio.h> 5#include <stdlib.h> 6#include <unistd.h> 7#include <openacc.h> 8#include <cuda.h> 9 10int 11main (int argc, char **argv) 12{ 13 CUdevice dev; 14 CUfunction delay2; 15 CUmodule module; 16 CUresult r; 17 int N; 18 int i; 19 CUstream *streams; 20 unsigned long **a, **d_a, *tid, ticks; 21 int nbytes; 22 void *kargs[3]; 23 int clkrate; 24 int devnum, nprocs; 25 26 acc_init (acc_device_nvidia); 27 28 devnum = acc_get_device_num (acc_device_nvidia); 29 30 r = cuDeviceGet (&dev, devnum); 31 if (r != CUDA_SUCCESS) 32 { 33 fprintf (stderr, "cuDeviceGet failed: %d\n", r); 34 abort (); 35 } 36 37 r = 38 cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, 39 dev); 40 if (r != CUDA_SUCCESS) 41 { 42 fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); 43 abort (); 44 } 45 46 r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); 47 if (r != CUDA_SUCCESS) 48 { 49 fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); 50 abort (); 51 } 52 53 r = cuModuleLoad (&module, "subr.ptx"); 54 if (r != CUDA_SUCCESS) 55 { 56 fprintf (stderr, "cuModuleLoad failed: %d\n", r); 57 abort (); 58 } 59 60 r = cuModuleGetFunction (&delay2, module, "delay2"); 61 if (r != CUDA_SUCCESS) 62 { 63 fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); 64 abort (); 65 } 66 67 nbytes = sizeof (int); 68 69 ticks = (unsigned long) (200.0 * clkrate); 70 71 N = nprocs; 72 73 streams = (CUstream *) malloc (N * sizeof (void *)); 74 75 a = (unsigned long **) malloc (N * sizeof (unsigned long *)); 76 d_a = (unsigned long **) malloc (N * sizeof (unsigned long *)); 77 tid = (unsigned long *) malloc (N * sizeof (unsigned long)); 78 79 for (i = 0; i < N; i++) 80 { 81 a[i] = (unsigned long *) malloc (sizeof (unsigned long)); 82 *a[i] = N; 83 d_a[i] = (unsigned long *) acc_malloc (nbytes); 84 tid[i] = i; 85 86 acc_map_data (a[i], d_a[i], nbytes); 87 88 streams[i] = (CUstream) acc_get_cuda_stream (i); 89 if (streams[i] != NULL) 90 abort (); 91 92 r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); 93 if (r != CUDA_SUCCESS) 94 { 95 fprintf (stderr, "cuStreamCreate failed: %d\n", r); 96 abort (); 97 } 98 99 if (!acc_set_cuda_stream (i, streams[i])) 100 abort (); 101 } 102 103 for (i = 0; i < N; i++) 104 { 105 kargs[0] = (void *) &d_a[i]; 106 kargs[1] = (void *) &ticks; 107 kargs[2] = (void *) &tid[i]; 108 109 r = cuLaunchKernel (delay2, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); 110 if (r != CUDA_SUCCESS) 111 { 112 fprintf (stderr, "cuLaunchKernel failed: %d\n", r); 113 abort (); 114 } 115 116 ticks = (unsigned long) (50.0 * clkrate); 117 } 118 119 acc_wait_all_async (0); 120 121 for (i = 0; i < N; i++) 122 { 123 acc_copyout (a[i], nbytes); 124 if (*a[i] != i) 125 abort (); 126 } 127 128 free (streams); 129 130 for (i = 0; i < N; i++) 131 { 132 free (a[i]); 133 } 134 135 free (a); 136 free (d_a); 137 free (tid); 138 139 acc_shutdown (acc_device_nvidia); 140 141 exit (0); 142} 143 144/* { dg-output "" } */ 145