1/* Plugin for offload execution on Intel MIC devices.
2
3   Copyright (C) 2014 Free Software Foundation, Inc.
4
5   Contributed by Ilya Verbin <ilya.verbin@intel.com>.
6
7   This file is part of the GNU Offloading and Multi Processing Library
8   (libgomp).
9
10   Libgomp is free software; you can redistribute it and/or modify it
11   under the terms of the GNU General Public License as published by
12   the Free Software Foundation; either version 3, or (at your option)
13   any later version.
14
15   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18   more details.
19
20   Under Section 7 of GPL version 3, you are granted additional
21   permissions described in the GCC Runtime Library Exception, version
22   3.1, as published by the Free Software Foundation.
23
24   You should have received a copy of the GNU General Public License and
25   a copy of the GCC Runtime Library Exception along with this program;
26   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27   <http://www.gnu.org/licenses/>.  */
28
29/* Host side part of a libgomp plugin.  */
30
31#include <stdint.h>
32#include <stdio.h>
33#include <stdlib.h>
34#include <string.h>
35#include <utility>
36#include <vector>
37#include <map>
38#include "libgomp-plugin.h"
39#include "compiler_if_host.h"
40#include "main_target_image.h"
41
42#define LD_LIBRARY_PATH_ENV	"LD_LIBRARY_PATH"
43#define MIC_LD_LIBRARY_PATH_ENV	"MIC_LD_LIBRARY_PATH"
44
45#ifdef DEBUG
46#define TRACE(...)					    \
47{							    \
48fprintf (stderr, "HOST:\t%s:%s ", __FILE__, __FUNCTION__);  \
49fprintf (stderr, __VA_ARGS__);				    \
50fprintf (stderr, "\n");					    \
51}
52#else
53#define TRACE { }
54#endif
55
56
57/* Start/end addresses of functions and global variables on a device.  */
58typedef std::vector<addr_pair> AddrVect;
59
60/* Addresses for one image and all devices.  */
61typedef std::vector<AddrVect> DevAddrVect;
62
63/* Addresses for all images and all devices.  */
64typedef std::map<void *, DevAddrVect> ImgDevAddrMap;
65
66
67/* Total number of available devices.  */
68static int num_devices;
69
70/* Total number of shared libraries with offloading to Intel MIC.  */
71static int num_images;
72
73/* Two dimensional array: one key is a pointer to image,
74   second key is number of device.  Contains a vector of pointer pairs.  */
75static ImgDevAddrMap *address_table;
76
77/* Thread-safe registration of the main image.  */
78static pthread_once_t main_image_is_registered = PTHREAD_ONCE_INIT;
79
80static VarDesc vd_host2tgt = {
81  { 1, 1 },		      /* dst, src			      */
82  { 1, 0 },		      /* in, out			      */
83  1,			      /* alloc_if			      */
84  1,			      /* free_if			      */
85  4,			      /* align				      */
86  0,			      /* mic_offset			      */
87  { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length,
88				 is_stack_buf, sink_addr, alloc_disp,
89				 is_noncont_src, is_noncont_dst	      */
90  0,			      /* offset				      */
91  0,			      /* size				      */
92  1,			      /* count				      */
93  0,			      /* alloc				      */
94  0,			      /* into				      */
95  0			      /* ptr				      */
96};
97
98static VarDesc vd_tgt2host = {
99  { 1, 1 },		      /* dst, src			      */
100  { 0, 1 },		      /* in, out			      */
101  1,			      /* alloc_if			      */
102  1,			      /* free_if			      */
103  4,			      /* align				      */
104  0,			      /* mic_offset			      */
105  { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length,
106				 is_stack_buf, sink_addr, alloc_disp,
107				 is_noncont_src, is_noncont_dst	      */
108  0,			      /* offset				      */
109  0,			      /* size				      */
110  1,			      /* count				      */
111  0,			      /* alloc				      */
112  0,			      /* into				      */
113  0			      /* ptr				      */
114};
115
116
117/* Add path specified in LD_LIBRARY_PATH to MIC_LD_LIBRARY_PATH, which is
118   required by liboffloadmic.  */
119__attribute__((constructor))
120static void
121init (void)
122{
123  const char *ld_lib_path = getenv (LD_LIBRARY_PATH_ENV);
124  const char *mic_lib_path = getenv (MIC_LD_LIBRARY_PATH_ENV);
125
126  if (!ld_lib_path)
127    goto out;
128
129  if (!mic_lib_path)
130    setenv (MIC_LD_LIBRARY_PATH_ENV, ld_lib_path, 1);
131  else
132    {
133      size_t len = strlen (mic_lib_path) + strlen (ld_lib_path) + 2;
134      bool use_alloca = len <= 2048;
135      char *mic_lib_path_new = (char *) (use_alloca ? alloca (len)
136						    : malloc (len));
137      if (!mic_lib_path_new)
138	{
139	  fprintf (stderr, "%s: Can't allocate memory\n", __FILE__);
140	  exit (1);
141	}
142
143      sprintf (mic_lib_path_new, "%s:%s", mic_lib_path, ld_lib_path);
144      setenv (MIC_LD_LIBRARY_PATH_ENV, mic_lib_path_new, 1);
145
146      if (!use_alloca)
147	free (mic_lib_path_new);
148    }
149
150out:
151  address_table = new ImgDevAddrMap;
152  num_devices = _Offload_number_of_devices ();
153}
154
155extern "C" const char *
156GOMP_OFFLOAD_get_name (void)
157{
158  const char *res = "intelmic";
159  TRACE ("(): return %s", res);
160  return res;
161}
162
163extern "C" unsigned int
164GOMP_OFFLOAD_get_caps (void)
165{
166  unsigned int res = GOMP_OFFLOAD_CAP_OPENMP_400;
167  TRACE ("(): return %x", res);
168  return res;
169}
170
171extern "C" enum offload_target_type
172GOMP_OFFLOAD_get_type (void)
173{
174  enum offload_target_type res = OFFLOAD_TARGET_TYPE_INTEL_MIC;
175  TRACE ("(): return %d", res);
176  return res;
177}
178
179extern "C" int
180GOMP_OFFLOAD_get_num_devices (void)
181{
182  TRACE ("(): return %d", num_devices);
183  return num_devices;
184}
185
186static void
187offload (const char *file, uint64_t line, int device, const char *name,
188	 int num_vars, VarDesc *vars, VarDesc2 *vars2)
189{
190  OFFLOAD ofld = __offload_target_acquire1 (&device, file, line);
191  if (ofld)
192    __offload_offload1 (ofld, name, 0, num_vars, vars, vars2, 0, NULL, NULL);
193  else
194    {
195      fprintf (stderr, "%s:%d: Offload target acquire failed\n", file, line);
196      exit (1);
197    }
198}
199
200static void
201register_main_image ()
202{
203  __offload_register_image (&main_target_image);
204}
205
206/* liboffloadmic loads and runs offload_target_main on all available devices
207   during a first call to offload ().  */
208extern "C" void
209GOMP_OFFLOAD_init_device (int device)
210{
211  TRACE ("");
212  pthread_once (&main_image_is_registered, register_main_image);
213  offload (__FILE__, __LINE__, device, "__offload_target_init_proc", 0,
214	   NULL, NULL);
215}
216
217extern "C" void
218GOMP_OFFLOAD_fini_device (int device)
219{
220  TRACE ("");
221  /* Unreachable for GOMP_OFFLOAD_CAP_OPENMP_400.  */
222  abort ();
223}
224
225static void
226get_target_table (int device, int &num_funcs, int &num_vars, void **&table)
227{
228  VarDesc vd1[2] = { vd_tgt2host, vd_tgt2host };
229  vd1[0].ptr = &num_funcs;
230  vd1[0].size = sizeof (num_funcs);
231  vd1[1].ptr = &num_vars;
232  vd1[1].size = sizeof (num_vars);
233  VarDesc2 vd1g[2] = { { "num_funcs", 0 }, { "num_vars", 0 } };
234
235  offload (__FILE__, __LINE__, device, "__offload_target_table_p1", 2,
236	   vd1, vd1g);
237
238  int table_size = num_funcs + 2 * num_vars;
239  if (table_size > 0)
240    {
241      table = new void * [table_size];
242
243      VarDesc vd2;
244      vd2 = vd_tgt2host;
245      vd2.ptr = table;
246      vd2.size = table_size * sizeof (void *);
247      VarDesc2 vd2g = { "table", 0 };
248
249      offload (__FILE__, __LINE__, device, "__offload_target_table_p2", 1,
250	       &vd2, &vd2g);
251    }
252}
253
254/* Offload TARGET_IMAGE to all available devices and fill address_table with
255   corresponding target addresses.  */
256
257static void
258offload_image (void *target_image)
259{
260  struct TargetImage {
261    int64_t size;
262    /* 10 characters is enough for max int value.  */
263    char name[sizeof ("lib0000000000.so")];
264    char data[];
265  } __attribute__ ((packed));
266
267  void *image_start = ((void **) target_image)[0];
268  void *image_end   = ((void **) target_image)[1];
269
270  TRACE ("(target_image = %p { %p, %p })",
271	 target_image, image_start, image_end);
272
273  int64_t image_size = (uintptr_t) image_end - (uintptr_t) image_start;
274  TargetImage *image
275    = (TargetImage *) malloc (sizeof (int64_t) + sizeof ("lib0000000000.so")
276			      + image_size);
277  if (!image)
278    {
279      fprintf (stderr, "%s: Can't allocate memory\n", __FILE__);
280      exit (1);
281    }
282
283  image->size = image_size;
284  sprintf (image->name, "lib%010d.so", num_images++);
285  memcpy (image->data, image_start, image->size);
286
287  TRACE ("() __offload_register_image %s { %p, %d }",
288	 image->name, image_start, image->size);
289  __offload_register_image (image);
290
291  /* Receive tables for target_image from all devices.  */
292  DevAddrVect dev_table;
293  for (int dev = 0; dev < num_devices; dev++)
294    {
295      int num_funcs = 0;
296      int num_vars = 0;
297      void **table = NULL;
298
299      get_target_table (dev, num_funcs, num_vars, table);
300
301      AddrVect curr_dev_table;
302
303      for (int i = 0; i < num_funcs; i++)
304	{
305	  addr_pair tgt_addr;
306	  tgt_addr.start = (uintptr_t) table[i];
307	  tgt_addr.end = tgt_addr.start + 1;
308	  TRACE ("() func %d:\t0x%llx..0x%llx", i,
309		 tgt_addr.start, tgt_addr.end);
310	  curr_dev_table.push_back (tgt_addr);
311	}
312
313      for (int i = 0; i < num_vars; i++)
314	{
315	  addr_pair tgt_addr;
316	  tgt_addr.start = (uintptr_t) table[num_funcs+i*2];
317	  tgt_addr.end = tgt_addr.start + (uintptr_t) table[num_funcs+i*2+1];
318	  TRACE ("() var %d:\t0x%llx..0x%llx", i, tgt_addr.start, tgt_addr.end);
319	  curr_dev_table.push_back (tgt_addr);
320	}
321
322      dev_table.push_back (curr_dev_table);
323    }
324
325  address_table->insert (std::make_pair (target_image, dev_table));
326
327  free (image);
328}
329
330extern "C" int
331GOMP_OFFLOAD_load_image (int device, void *target_image, addr_pair **result)
332{
333  TRACE ("(device = %d, target_image = %p)", device, target_image);
334
335  /* If target_image is already present in address_table, then there is no need
336     to offload it.  */
337  if (address_table->count (target_image) == 0)
338    offload_image (target_image);
339
340  AddrVect *curr_dev_table = &(*address_table)[target_image][device];
341  int table_size = curr_dev_table->size ();
342  addr_pair *table = (addr_pair *) malloc (table_size * sizeof (addr_pair));
343  if (table == NULL)
344    {
345      fprintf (stderr, "%s: Can't allocate memory\n", __FILE__);
346      exit (1);
347    }
348
349  std::copy (curr_dev_table->begin (), curr_dev_table->end (), table);
350  *result = table;
351  return table_size;
352}
353
354extern "C" void
355GOMP_OFFLOAD_unload_image (int device, void *target_image)
356{
357  TRACE ("(device = %d, target_image = %p)", device, target_image);
358
359  /* TODO: Currently liboffloadmic doesn't support __offload_unregister_image
360     for libraries.  */
361
362  address_table->erase (target_image);
363}
364
365extern "C" void *
366GOMP_OFFLOAD_alloc (int device, size_t size)
367{
368  TRACE ("(size = %d)", size);
369
370  void *tgt_ptr;
371  VarDesc vd1[2] = { vd_host2tgt, vd_tgt2host };
372  vd1[0].ptr = &size;
373  vd1[0].size = sizeof (size);
374  vd1[1].ptr = &tgt_ptr;
375  vd1[1].size = sizeof (void *);
376  VarDesc2 vd1g[2] = { { "size", 0 }, { "tgt_ptr", 0 } };
377
378  offload (__FILE__, __LINE__, device, "__offload_target_alloc", 2, vd1, vd1g);
379
380  return tgt_ptr;
381}
382
383extern "C" void
384GOMP_OFFLOAD_free (int device, void *tgt_ptr)
385{
386  TRACE ("(tgt_ptr = %p)", tgt_ptr);
387
388  VarDesc vd1 = vd_host2tgt;
389  vd1.ptr = &tgt_ptr;
390  vd1.size = sizeof (void *);
391  VarDesc2 vd1g = { "tgt_ptr", 0 };
392
393  offload (__FILE__, __LINE__, device, "__offload_target_free", 1, &vd1, &vd1g);
394}
395
396extern "C" void *
397GOMP_OFFLOAD_host2dev (int device, void *tgt_ptr, const void *host_ptr,
398		       size_t size)
399{
400  TRACE ("(tgt_ptr = %p, host_ptr = %p, size = %d)", tgt_ptr, host_ptr, size);
401  if (!size)
402    return tgt_ptr;
403
404  VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
405  vd1[0].ptr = &tgt_ptr;
406  vd1[0].size = sizeof (void *);
407  vd1[1].ptr = &size;
408  vd1[1].size = sizeof (size);
409  VarDesc2 vd1g[2] = { { "tgt_ptr", 0 }, { "size", 0 } };
410
411  offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p1", 2,
412	   vd1, vd1g);
413
414  VarDesc vd2 = vd_host2tgt;
415  vd2.ptr = (void *) host_ptr;
416  vd2.size = size;
417  VarDesc2 vd2g = { "var", 0 };
418
419  offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p2", 1,
420	   &vd2, &vd2g);
421
422  return tgt_ptr;
423}
424
425extern "C" void *
426GOMP_OFFLOAD_dev2host (int device, void *host_ptr, const void *tgt_ptr,
427		       size_t size)
428{
429  TRACE ("(host_ptr = %p, tgt_ptr = %p, size = %d)", host_ptr, tgt_ptr, size);
430  if (!size)
431    return host_ptr;
432
433  VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
434  vd1[0].ptr = &tgt_ptr;
435  vd1[0].size = sizeof (void *);
436  vd1[1].ptr = &size;
437  vd1[1].size = sizeof (size);
438  VarDesc2 vd1g[2] = { { "tgt_ptr", 0 }, { "size", 0 } };
439
440  offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p1", 2,
441	   vd1, vd1g);
442
443  VarDesc vd2 = vd_tgt2host;
444  vd2.ptr = (void *) host_ptr;
445  vd2.size = size;
446  VarDesc2 vd2g = { "var", 0 };
447
448  offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p2", 1,
449	   &vd2, &vd2g);
450
451  return host_ptr;
452}
453
454extern "C" void
455GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars)
456{
457  TRACE ("(tgt_fn = %p, tgt_vars = %p)", tgt_fn, tgt_vars);
458
459  VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
460  vd1[0].ptr = &tgt_fn;
461  vd1[0].size = sizeof (void *);
462  vd1[1].ptr = &tgt_vars;
463  vd1[1].size = sizeof (void *);
464  VarDesc2 vd1g[2] = { { "tgt_fn", 0 }, { "tgt_vars", 0 } };
465
466  offload (__FILE__, __LINE__, device, "__offload_target_run", 2, vd1, vd1g);
467}
468