oacc-mem.c revision 1.1.1.1
1/* OpenACC Runtime initialization routines 2 3 Copyright (C) 2013-2015 Free Software Foundation, Inc. 4 5 Contributed by Mentor Embedded. 6 7 This file is part of the GNU Offloading and Multi Processing Library 8 (libgomp). 9 10 Libgomp is free software; you can redistribute it and/or modify it 11 under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 3, or (at your option) 13 any later version. 14 15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 18 more details. 19 20 Under Section 7 of GPL version 3, you are granted additional 21 permissions described in the GCC Runtime Library Exception, version 22 3.1, as published by the Free Software Foundation. 23 24 You should have received a copy of the GNU General Public License and 25 a copy of the GCC Runtime Library Exception along with this program; 26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 27 <http://www.gnu.org/licenses/>. */ 28 29#include "openacc.h" 30#include "config.h" 31#include "libgomp.h" 32#include "gomp-constants.h" 33#include "oacc-int.h" 34#include "splay-tree.h" 35#include <stdint.h> 36#include <assert.h> 37 38/* Return block containing [H->S), or NULL if not contained. */ 39 40static splay_tree_key 41lookup_host (struct gomp_device_descr *dev, void *h, size_t s) 42{ 43 struct splay_tree_key_s node; 44 splay_tree_key key; 45 46 node.host_start = (uintptr_t) h; 47 node.host_end = (uintptr_t) h + s; 48 49 gomp_mutex_lock (&dev->lock); 50 key = splay_tree_lookup (&dev->mem_map, &node); 51 gomp_mutex_unlock (&dev->lock); 52 53 return key; 54} 55 56/* Return block containing [D->S), or NULL if not contained. 57 The list isn't ordered by device address, so we have to iterate 58 over the whole array. This is not expected to be a common 59 operation. */ 60 61static splay_tree_key 62lookup_dev (struct target_mem_desc *tgt, void *d, size_t s) 63{ 64 int i; 65 struct target_mem_desc *t; 66 67 if (!tgt) 68 return NULL; 69 70 gomp_mutex_lock (&tgt->device_descr->lock); 71 72 for (t = tgt; t != NULL; t = t->prev) 73 { 74 if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s) 75 break; 76 } 77 78 gomp_mutex_unlock (&tgt->device_descr->lock); 79 80 if (!t) 81 return NULL; 82 83 for (i = 0; i < t->list_count; i++) 84 { 85 void * offset; 86 87 splay_tree_key k = &t->array[i].key; 88 offset = d - t->tgt_start + k->tgt_offset; 89 90 if (k->host_start + offset <= (void *) k->host_end) 91 return k; 92 } 93 94 return NULL; 95} 96 97/* OpenACC is silent on how memory exhaustion is indicated. We return 98 NULL. */ 99 100void * 101acc_malloc (size_t s) 102{ 103 if (!s) 104 return NULL; 105 106 goacc_lazy_initialize (); 107 108 struct goacc_thread *thr = goacc_thread (); 109 110 assert (thr->dev); 111 112 return thr->dev->alloc_func (thr->dev->target_id, s); 113} 114 115/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event 116 the device address is mapped. We choose to check if it mapped, 117 and if it is, to unmap it. */ 118void 119acc_free (void *d) 120{ 121 splay_tree_key k; 122 struct goacc_thread *thr = goacc_thread (); 123 124 if (!d) 125 return; 126 127 assert (thr && thr->dev); 128 129 /* We don't have to call lazy open here, as the ptr value must have 130 been returned by acc_malloc. It's not permitted to pass NULL in 131 (unless you got that null from acc_malloc). */ 132 if ((k = lookup_dev (thr->dev->openacc.data_environ, d, 1))) 133 { 134 void *offset; 135 136 offset = d - k->tgt->tgt_start + k->tgt_offset; 137 138 acc_unmap_data ((void *)(k->host_start + offset)); 139 } 140 141 thr->dev->free_func (thr->dev->target_id, d); 142} 143 144void 145acc_memcpy_to_device (void *d, void *h, size_t s) 146{ 147 /* No need to call lazy open here, as the device pointer must have 148 been obtained from a routine that did that. */ 149 struct goacc_thread *thr = goacc_thread (); 150 151 assert (thr && thr->dev); 152 153 thr->dev->host2dev_func (thr->dev->target_id, d, h, s); 154} 155 156void 157acc_memcpy_from_device (void *h, void *d, size_t s) 158{ 159 /* No need to call lazy open here, as the device pointer must have 160 been obtained from a routine that did that. */ 161 struct goacc_thread *thr = goacc_thread (); 162 163 assert (thr && thr->dev); 164 165 thr->dev->dev2host_func (thr->dev->target_id, h, d, s); 166} 167 168/* Return the device pointer that corresponds to host data H. Or NULL 169 if no mapping. */ 170 171void * 172acc_deviceptr (void *h) 173{ 174 splay_tree_key n; 175 void *d; 176 void *offset; 177 178 goacc_lazy_initialize (); 179 180 struct goacc_thread *thr = goacc_thread (); 181 182 n = lookup_host (thr->dev, h, 1); 183 184 if (!n) 185 return NULL; 186 187 offset = h - n->host_start; 188 189 d = n->tgt->tgt_start + n->tgt_offset + offset; 190 191 return d; 192} 193 194/* Return the host pointer that corresponds to device data D. Or NULL 195 if no mapping. */ 196 197void * 198acc_hostptr (void *d) 199{ 200 splay_tree_key n; 201 void *h; 202 void *offset; 203 204 goacc_lazy_initialize (); 205 206 struct goacc_thread *thr = goacc_thread (); 207 208 n = lookup_dev (thr->dev->openacc.data_environ, d, 1); 209 210 if (!n) 211 return NULL; 212 213 offset = d - n->tgt->tgt_start + n->tgt_offset; 214 215 h = n->host_start + offset; 216 217 return h; 218} 219 220/* Return 1 if host data [H,+S] is present on the device. */ 221 222int 223acc_is_present (void *h, size_t s) 224{ 225 splay_tree_key n; 226 227 if (!s || !h) 228 return 0; 229 230 goacc_lazy_initialize (); 231 232 struct goacc_thread *thr = goacc_thread (); 233 struct gomp_device_descr *acc_dev = thr->dev; 234 235 n = lookup_host (acc_dev, h, s); 236 237 if (n && ((uintptr_t)h < n->host_start 238 || (uintptr_t)h + s > n->host_end 239 || s > n->host_end - n->host_start)) 240 n = NULL; 241 242 return n != NULL; 243} 244 245/* Create a mapping for host [H,+S] -> device [D,+S] */ 246 247void 248acc_map_data (void *h, void *d, size_t s) 249{ 250 struct target_mem_desc *tgt; 251 size_t mapnum = 1; 252 void *hostaddrs = h; 253 void *devaddrs = d; 254 size_t sizes = s; 255 unsigned short kinds = GOMP_MAP_ALLOC; 256 257 goacc_lazy_initialize (); 258 259 struct goacc_thread *thr = goacc_thread (); 260 struct gomp_device_descr *acc_dev = thr->dev; 261 262 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 263 { 264 if (d != h) 265 gomp_fatal ("cannot map data on shared-memory system"); 266 267 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false); 268 } 269 else 270 { 271 struct goacc_thread *thr = goacc_thread (); 272 273 if (!d || !h || !s) 274 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map", 275 (void *)h, (int)s, (void *)d, (int)s); 276 277 if (lookup_host (acc_dev, h, s)) 278 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h, 279 (int)s); 280 281 if (lookup_dev (thr->dev->openacc.data_environ, d, s)) 282 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d, 283 (int)s); 284 285 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, 286 &kinds, true, false); 287 } 288 289 tgt->prev = acc_dev->openacc.data_environ; 290 acc_dev->openacc.data_environ = tgt; 291} 292 293void 294acc_unmap_data (void *h) 295{ 296 struct goacc_thread *thr = goacc_thread (); 297 struct gomp_device_descr *acc_dev = thr->dev; 298 299 /* No need to call lazy open, as the address must have been mapped. */ 300 301 size_t host_size; 302 splay_tree_key n = lookup_host (acc_dev, h, 1); 303 struct target_mem_desc *t; 304 305 if (!n) 306 gomp_fatal ("%p is not a mapped block", (void *)h); 307 308 host_size = n->host_end - n->host_start; 309 310 if (n->host_start != (uintptr_t) h) 311 gomp_fatal ("[%p,%d] surrounds1 %p", 312 (void *) n->host_start, (int) host_size, (void *) h); 313 314 t = n->tgt; 315 316 if (t->refcount == 2) 317 { 318 struct target_mem_desc *tp; 319 320 /* This is the last reference, so pull the descriptor off the 321 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from 322 freeing the device memory. */ 323 t->tgt_end = 0; 324 t->to_free = 0; 325 326 gomp_mutex_lock (&acc_dev->lock); 327 328 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; 329 tp = t, t = t->prev) 330 if (n->tgt == t) 331 { 332 if (tp) 333 tp->prev = t->prev; 334 else 335 acc_dev->openacc.data_environ = t->prev; 336 337 break; 338 } 339 340 gomp_mutex_unlock (&acc_dev->lock); 341 } 342 343 gomp_unmap_vars (t, true); 344} 345 346#define FLAG_PRESENT (1 << 0) 347#define FLAG_CREATE (1 << 1) 348#define FLAG_COPY (1 << 2) 349 350static void * 351present_create_copy (unsigned f, void *h, size_t s) 352{ 353 void *d; 354 splay_tree_key n; 355 356 if (!h || !s) 357 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s); 358 359 goacc_lazy_initialize (); 360 361 struct goacc_thread *thr = goacc_thread (); 362 struct gomp_device_descr *acc_dev = thr->dev; 363 364 n = lookup_host (acc_dev, h, s); 365 if (n) 366 { 367 /* Present. */ 368 d = (void *) (n->tgt->tgt_start + n->tgt_offset); 369 370 if (!(f & FLAG_PRESENT)) 371 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]", 372 (void *)h, (int)s, (void *)d, (int)s); 373 if ((h + s) > (void *)n->host_end) 374 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); 375 } 376 else if (!(f & FLAG_CREATE)) 377 { 378 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); 379 } 380 else 381 { 382 struct target_mem_desc *tgt; 383 size_t mapnum = 1; 384 unsigned short kinds; 385 void *hostaddrs = h; 386 387 if (f & FLAG_COPY) 388 kinds = GOMP_MAP_TO; 389 else 390 kinds = GOMP_MAP_ALLOC; 391 392 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true, 393 false); 394 395 gomp_mutex_lock (&acc_dev->lock); 396 397 d = tgt->to_free; 398 tgt->prev = acc_dev->openacc.data_environ; 399 acc_dev->openacc.data_environ = tgt; 400 401 gomp_mutex_unlock (&acc_dev->lock); 402 } 403 404 return d; 405} 406 407void * 408acc_create (void *h, size_t s) 409{ 410 return present_create_copy (FLAG_CREATE, h, s); 411} 412 413void * 414acc_copyin (void *h, size_t s) 415{ 416 return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s); 417} 418 419void * 420acc_present_or_create (void *h, size_t s) 421{ 422 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s); 423} 424 425void * 426acc_present_or_copyin (void *h, size_t s) 427{ 428 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s); 429} 430 431#define FLAG_COPYOUT (1 << 0) 432 433static void 434delete_copyout (unsigned f, void *h, size_t s) 435{ 436 size_t host_size; 437 splay_tree_key n; 438 void *d; 439 struct goacc_thread *thr = goacc_thread (); 440 struct gomp_device_descr *acc_dev = thr->dev; 441 442 n = lookup_host (acc_dev, h, s); 443 444 /* No need to call lazy open, as the data must already have been 445 mapped. */ 446 447 if (!n) 448 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s); 449 450 d = (void *) (n->tgt->tgt_start + n->tgt_offset); 451 452 host_size = n->host_end - n->host_start; 453 454 if (n->host_start != (uintptr_t) h || host_size != s) 455 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]", 456 (void *) n->host_start, (int) host_size, (void *) h, (int) s); 457 458 if (f & FLAG_COPYOUT) 459 acc_dev->dev2host_func (acc_dev->target_id, h, d, s); 460 461 acc_unmap_data (h); 462 463 acc_dev->free_func (acc_dev->target_id, d); 464} 465 466void 467acc_delete (void *h , size_t s) 468{ 469 delete_copyout (0, h, s); 470} 471 472void acc_copyout (void *h, size_t s) 473{ 474 delete_copyout (FLAG_COPYOUT, h, s); 475} 476 477static void 478update_dev_host (int is_dev, void *h, size_t s) 479{ 480 splay_tree_key n; 481 void *d; 482 struct goacc_thread *thr = goacc_thread (); 483 struct gomp_device_descr *acc_dev = thr->dev; 484 485 n = lookup_host (acc_dev, h, s); 486 487 /* No need to call lazy open, as the data must already have been 488 mapped. */ 489 490 if (!n) 491 gomp_fatal ("[%p,%d] is not mapped", h, (int)s); 492 493 d = (void *) (n->tgt->tgt_start + n->tgt_offset); 494 495 if (is_dev) 496 acc_dev->host2dev_func (acc_dev->target_id, d, h, s); 497 else 498 acc_dev->dev2host_func (acc_dev->target_id, h, d, s); 499} 500 501void 502acc_update_device (void *h, size_t s) 503{ 504 update_dev_host (1, h, s); 505} 506 507void 508acc_update_self (void *h, size_t s) 509{ 510 update_dev_host (0, h, s); 511} 512 513void 514gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, 515 void *kinds) 516{ 517 struct target_mem_desc *tgt; 518 struct goacc_thread *thr = goacc_thread (); 519 struct gomp_device_descr *acc_dev = thr->dev; 520 521 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); 522 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, 523 NULL, sizes, kinds, true, false); 524 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); 525 tgt->prev = acc_dev->openacc.data_environ; 526 acc_dev->openacc.data_environ = tgt; 527} 528 529void 530gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) 531{ 532 struct goacc_thread *thr = goacc_thread (); 533 struct gomp_device_descr *acc_dev = thr->dev; 534 splay_tree_key n; 535 struct target_mem_desc *t; 536 int minrefs = (mapnum == 1) ? 2 : 3; 537 538 n = lookup_host (acc_dev, h, 1); 539 540 if (!n) 541 gomp_fatal ("%p is not a mapped block", (void *)h); 542 543 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); 544 545 t = n->tgt; 546 547 struct target_mem_desc *tp; 548 549 gomp_mutex_lock (&acc_dev->lock); 550 551 if (t->refcount == minrefs) 552 { 553 /* This is the last reference, so pull the descriptor off the 554 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from 555 freeing the device memory. */ 556 t->tgt_end = 0; 557 t->to_free = 0; 558 559 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; 560 tp = t, t = t->prev) 561 { 562 if (n->tgt == t) 563 { 564 if (tp) 565 tp->prev = t->prev; 566 else 567 acc_dev->openacc.data_environ = t->prev; 568 break; 569 } 570 } 571 } 572 573 if (force_copyfrom) 574 t->list[0]->copy_from = 1; 575 576 gomp_mutex_unlock (&acc_dev->lock); 577 578 /* If running synchronously, unmap immediately. */ 579 if (async < acc_async_noval) 580 gomp_unmap_vars (t, true); 581 else 582 { 583 gomp_copy_from_async (t); 584 acc_dev->openacc.register_async_cleanup_func (t); 585 } 586 587 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); 588} 589