oacc-mem.c revision 1.1.1.3
1/* OpenACC Runtime initialization routines 2 3 Copyright (C) 2013-2016 Free Software Foundation, Inc. 4 5 Contributed by Mentor Embedded. 6 7 This file is part of the GNU Offloading and Multi Processing Library 8 (libgomp). 9 10 Libgomp is free software; you can redistribute it and/or modify it 11 under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 3, or (at your option) 13 any later version. 14 15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 18 more details. 19 20 Under Section 7 of GPL version 3, you are granted additional 21 permissions described in the GCC Runtime Library Exception, version 22 3.1, as published by the Free Software Foundation. 23 24 You should have received a copy of the GNU General Public License and 25 a copy of the GCC Runtime Library Exception along with this program; 26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 27 <http://www.gnu.org/licenses/>. */ 28 29#include "openacc.h" 30#include "config.h" 31#include "libgomp.h" 32#include "gomp-constants.h" 33#include "oacc-int.h" 34#include <stdint.h> 35#include <assert.h> 36 37/* Return block containing [H->S), or NULL if not contained. The device lock 38 for DEV must be locked on entry, and remains locked on exit. */ 39 40static splay_tree_key 41lookup_host (struct gomp_device_descr *dev, void *h, size_t s) 42{ 43 struct splay_tree_key_s node; 44 splay_tree_key key; 45 46 node.host_start = (uintptr_t) h; 47 node.host_end = (uintptr_t) h + s; 48 49 key = splay_tree_lookup (&dev->mem_map, &node); 50 51 return key; 52} 53 54/* Return block containing [D->S), or NULL if not contained. 55 The list isn't ordered by device address, so we have to iterate 56 over the whole array. This is not expected to be a common 57 operation. The device lock associated with TGT must be locked on entry, and 58 remains locked on exit. */ 59 60static splay_tree_key 61lookup_dev (struct target_mem_desc *tgt, void *d, size_t s) 62{ 63 int i; 64 struct target_mem_desc *t; 65 66 if (!tgt) 67 return NULL; 68 69 for (t = tgt; t != NULL; t = t->prev) 70 { 71 if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s) 72 break; 73 } 74 75 if (!t) 76 return NULL; 77 78 for (i = 0; i < t->list_count; i++) 79 { 80 void * offset; 81 82 splay_tree_key k = &t->array[i].key; 83 offset = d - t->tgt_start + k->tgt_offset; 84 85 if (k->host_start + offset <= (void *) k->host_end) 86 return k; 87 } 88 89 return NULL; 90} 91 92/* OpenACC is silent on how memory exhaustion is indicated. We return 93 NULL. */ 94 95void * 96acc_malloc (size_t s) 97{ 98 if (!s) 99 return NULL; 100 101 goacc_lazy_initialize (); 102 103 struct goacc_thread *thr = goacc_thread (); 104 105 assert (thr->dev); 106 107 return thr->dev->alloc_func (thr->dev->target_id, s); 108} 109 110/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event 111 the device address is mapped. We choose to check if it mapped, 112 and if it is, to unmap it. */ 113void 114acc_free (void *d) 115{ 116 splay_tree_key k; 117 118 if (!d) 119 return; 120 121 struct goacc_thread *thr = goacc_thread (); 122 123 assert (thr && thr->dev); 124 125 struct gomp_device_descr *acc_dev = thr->dev; 126 127 gomp_mutex_lock (&acc_dev->lock); 128 129 /* We don't have to call lazy open here, as the ptr value must have 130 been returned by acc_malloc. It's not permitted to pass NULL in 131 (unless you got that null from acc_malloc). */ 132 if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1))) 133 { 134 void *offset; 135 136 offset = d - k->tgt->tgt_start + k->tgt_offset; 137 138 gomp_mutex_unlock (&acc_dev->lock); 139 140 acc_unmap_data ((void *)(k->host_start + offset)); 141 } 142 else 143 gomp_mutex_unlock (&acc_dev->lock); 144 145 acc_dev->free_func (acc_dev->target_id, d); 146} 147 148void 149acc_memcpy_to_device (void *d, void *h, size_t s) 150{ 151 /* No need to call lazy open here, as the device pointer must have 152 been obtained from a routine that did that. */ 153 struct goacc_thread *thr = goacc_thread (); 154 155 assert (thr && thr->dev); 156 157 thr->dev->host2dev_func (thr->dev->target_id, d, h, s); 158} 159 160void 161acc_memcpy_from_device (void *h, void *d, size_t s) 162{ 163 /* No need to call lazy open here, as the device pointer must have 164 been obtained from a routine that did that. */ 165 struct goacc_thread *thr = goacc_thread (); 166 167 assert (thr && thr->dev); 168 169 thr->dev->dev2host_func (thr->dev->target_id, h, d, s); 170} 171 172/* Return the device pointer that corresponds to host data H. Or NULL 173 if no mapping. */ 174 175void * 176acc_deviceptr (void *h) 177{ 178 splay_tree_key n; 179 void *d; 180 void *offset; 181 182 goacc_lazy_initialize (); 183 184 struct goacc_thread *thr = goacc_thread (); 185 struct gomp_device_descr *dev = thr->dev; 186 187 gomp_mutex_lock (&dev->lock); 188 189 n = lookup_host (dev, h, 1); 190 191 if (!n) 192 { 193 gomp_mutex_unlock (&dev->lock); 194 return NULL; 195 } 196 197 offset = h - n->host_start; 198 199 d = n->tgt->tgt_start + n->tgt_offset + offset; 200 201 gomp_mutex_unlock (&dev->lock); 202 203 return d; 204} 205 206/* Return the host pointer that corresponds to device data D. Or NULL 207 if no mapping. */ 208 209void * 210acc_hostptr (void *d) 211{ 212 splay_tree_key n; 213 void *h; 214 void *offset; 215 216 goacc_lazy_initialize (); 217 218 struct goacc_thread *thr = goacc_thread (); 219 struct gomp_device_descr *acc_dev = thr->dev; 220 221 gomp_mutex_lock (&acc_dev->lock); 222 223 n = lookup_dev (acc_dev->openacc.data_environ, d, 1); 224 225 if (!n) 226 { 227 gomp_mutex_unlock (&acc_dev->lock); 228 return NULL; 229 } 230 231 offset = d - n->tgt->tgt_start + n->tgt_offset; 232 233 h = n->host_start + offset; 234 235 gomp_mutex_unlock (&acc_dev->lock); 236 237 return h; 238} 239 240/* Return 1 if host data [H,+S] is present on the device. */ 241 242int 243acc_is_present (void *h, size_t s) 244{ 245 splay_tree_key n; 246 247 if (!s || !h) 248 return 0; 249 250 goacc_lazy_initialize (); 251 252 struct goacc_thread *thr = goacc_thread (); 253 struct gomp_device_descr *acc_dev = thr->dev; 254 255 gomp_mutex_lock (&acc_dev->lock); 256 257 n = lookup_host (acc_dev, h, s); 258 259 if (n && ((uintptr_t)h < n->host_start 260 || (uintptr_t)h + s > n->host_end 261 || s > n->host_end - n->host_start)) 262 n = NULL; 263 264 gomp_mutex_unlock (&acc_dev->lock); 265 266 return n != NULL; 267} 268 269/* Create a mapping for host [H,+S] -> device [D,+S] */ 270 271void 272acc_map_data (void *h, void *d, size_t s) 273{ 274 struct target_mem_desc *tgt; 275 size_t mapnum = 1; 276 void *hostaddrs = h; 277 void *devaddrs = d; 278 size_t sizes = s; 279 unsigned short kinds = GOMP_MAP_ALLOC; 280 281 goacc_lazy_initialize (); 282 283 struct goacc_thread *thr = goacc_thread (); 284 struct gomp_device_descr *acc_dev = thr->dev; 285 286 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 287 { 288 if (d != h) 289 gomp_fatal ("cannot map data on shared-memory system"); 290 291 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, 292 GOMP_MAP_VARS_OPENACC); 293 } 294 else 295 { 296 struct goacc_thread *thr = goacc_thread (); 297 298 if (!d || !h || !s) 299 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map", 300 (void *)h, (int)s, (void *)d, (int)s); 301 302 gomp_mutex_lock (&acc_dev->lock); 303 304 if (lookup_host (acc_dev, h, s)) 305 { 306 gomp_mutex_unlock (&acc_dev->lock); 307 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h, 308 (int)s); 309 } 310 311 if (lookup_dev (thr->dev->openacc.data_environ, d, s)) 312 { 313 gomp_mutex_unlock (&acc_dev->lock); 314 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d, 315 (int)s); 316 } 317 318 gomp_mutex_unlock (&acc_dev->lock); 319 320 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, 321 &kinds, true, GOMP_MAP_VARS_OPENACC); 322 } 323 324 gomp_mutex_lock (&acc_dev->lock); 325 tgt->prev = acc_dev->openacc.data_environ; 326 acc_dev->openacc.data_environ = tgt; 327 gomp_mutex_unlock (&acc_dev->lock); 328} 329 330void 331acc_unmap_data (void *h) 332{ 333 struct goacc_thread *thr = goacc_thread (); 334 struct gomp_device_descr *acc_dev = thr->dev; 335 336 /* No need to call lazy open, as the address must have been mapped. */ 337 338 size_t host_size; 339 340 gomp_mutex_lock (&acc_dev->lock); 341 342 splay_tree_key n = lookup_host (acc_dev, h, 1); 343 struct target_mem_desc *t; 344 345 if (!n) 346 { 347 gomp_mutex_unlock (&acc_dev->lock); 348 gomp_fatal ("%p is not a mapped block", (void *)h); 349 } 350 351 host_size = n->host_end - n->host_start; 352 353 if (n->host_start != (uintptr_t) h) 354 { 355 gomp_mutex_unlock (&acc_dev->lock); 356 gomp_fatal ("[%p,%d] surrounds %p", 357 (void *) n->host_start, (int) host_size, (void *) h); 358 } 359 360 t = n->tgt; 361 362 if (t->refcount == 2) 363 { 364 struct target_mem_desc *tp; 365 366 /* This is the last reference, so pull the descriptor off the 367 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from 368 freeing the device memory. */ 369 t->tgt_end = 0; 370 t->to_free = 0; 371 372 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; 373 tp = t, t = t->prev) 374 if (n->tgt == t) 375 { 376 if (tp) 377 tp->prev = t->prev; 378 else 379 acc_dev->openacc.data_environ = t->prev; 380 381 break; 382 } 383 } 384 385 gomp_mutex_unlock (&acc_dev->lock); 386 387 gomp_unmap_vars (t, true); 388} 389 390#define FLAG_PRESENT (1 << 0) 391#define FLAG_CREATE (1 << 1) 392#define FLAG_COPY (1 << 2) 393 394static void * 395present_create_copy (unsigned f, void *h, size_t s) 396{ 397 void *d; 398 splay_tree_key n; 399 400 if (!h || !s) 401 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s); 402 403 goacc_lazy_initialize (); 404 405 struct goacc_thread *thr = goacc_thread (); 406 struct gomp_device_descr *acc_dev = thr->dev; 407 408 gomp_mutex_lock (&acc_dev->lock); 409 410 n = lookup_host (acc_dev, h, s); 411 if (n) 412 { 413 /* Present. */ 414 d = (void *) (n->tgt->tgt_start + n->tgt_offset); 415 416 if (!(f & FLAG_PRESENT)) 417 { 418 gomp_mutex_unlock (&acc_dev->lock); 419 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]", 420 (void *)h, (int)s, (void *)d, (int)s); 421 } 422 if ((h + s) > (void *)n->host_end) 423 { 424 gomp_mutex_unlock (&acc_dev->lock); 425 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); 426 } 427 428 gomp_mutex_unlock (&acc_dev->lock); 429 } 430 else if (!(f & FLAG_CREATE)) 431 { 432 gomp_mutex_unlock (&acc_dev->lock); 433 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); 434 } 435 else 436 { 437 struct target_mem_desc *tgt; 438 size_t mapnum = 1; 439 unsigned short kinds; 440 void *hostaddrs = h; 441 442 if (f & FLAG_COPY) 443 kinds = GOMP_MAP_TO; 444 else 445 kinds = GOMP_MAP_ALLOC; 446 447 gomp_mutex_unlock (&acc_dev->lock); 448 449 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true, 450 GOMP_MAP_VARS_OPENACC); 451 452 gomp_mutex_lock (&acc_dev->lock); 453 454 d = tgt->to_free; 455 tgt->prev = acc_dev->openacc.data_environ; 456 acc_dev->openacc.data_environ = tgt; 457 458 gomp_mutex_unlock (&acc_dev->lock); 459 } 460 461 return d; 462} 463 464void * 465acc_create (void *h, size_t s) 466{ 467 return present_create_copy (FLAG_CREATE, h, s); 468} 469 470void * 471acc_copyin (void *h, size_t s) 472{ 473 return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s); 474} 475 476void * 477acc_present_or_create (void *h, size_t s) 478{ 479 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s); 480} 481 482void * 483acc_present_or_copyin (void *h, size_t s) 484{ 485 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s); 486} 487 488#define FLAG_COPYOUT (1 << 0) 489 490static void 491delete_copyout (unsigned f, void *h, size_t s) 492{ 493 size_t host_size; 494 splay_tree_key n; 495 void *d; 496 struct goacc_thread *thr = goacc_thread (); 497 struct gomp_device_descr *acc_dev = thr->dev; 498 499 gomp_mutex_lock (&acc_dev->lock); 500 501 n = lookup_host (acc_dev, h, s); 502 503 /* No need to call lazy open, as the data must already have been 504 mapped. */ 505 506 if (!n) 507 { 508 gomp_mutex_unlock (&acc_dev->lock); 509 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s); 510 } 511 512 d = (void *) (n->tgt->tgt_start + n->tgt_offset 513 + (uintptr_t) h - n->host_start); 514 515 host_size = n->host_end - n->host_start; 516 517 if (n->host_start != (uintptr_t) h || host_size != s) 518 { 519 gomp_mutex_unlock (&acc_dev->lock); 520 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]", 521 (void *) n->host_start, (int) host_size, (void *) h, (int) s); 522 } 523 524 gomp_mutex_unlock (&acc_dev->lock); 525 526 if (f & FLAG_COPYOUT) 527 acc_dev->dev2host_func (acc_dev->target_id, h, d, s); 528 529 acc_unmap_data (h); 530 531 acc_dev->free_func (acc_dev->target_id, d); 532} 533 534void 535acc_delete (void *h , size_t s) 536{ 537 delete_copyout (0, h, s); 538} 539 540void acc_copyout (void *h, size_t s) 541{ 542 delete_copyout (FLAG_COPYOUT, h, s); 543} 544 545static void 546update_dev_host (int is_dev, void *h, size_t s) 547{ 548 splay_tree_key n; 549 void *d; 550 551 goacc_lazy_initialize (); 552 553 struct goacc_thread *thr = goacc_thread (); 554 struct gomp_device_descr *acc_dev = thr->dev; 555 556 gomp_mutex_lock (&acc_dev->lock); 557 558 n = lookup_host (acc_dev, h, s); 559 560 if (!n) 561 { 562 gomp_mutex_unlock (&acc_dev->lock); 563 gomp_fatal ("[%p,%d] is not mapped", h, (int)s); 564 } 565 566 d = (void *) (n->tgt->tgt_start + n->tgt_offset 567 + (uintptr_t) h - n->host_start); 568 569 gomp_mutex_unlock (&acc_dev->lock); 570 571 if (is_dev) 572 acc_dev->host2dev_func (acc_dev->target_id, d, h, s); 573 else 574 acc_dev->dev2host_func (acc_dev->target_id, h, d, s); 575} 576 577void 578acc_update_device (void *h, size_t s) 579{ 580 update_dev_host (1, h, s); 581} 582 583void 584acc_update_self (void *h, size_t s) 585{ 586 update_dev_host (0, h, s); 587} 588 589void 590gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, 591 void *kinds) 592{ 593 struct target_mem_desc *tgt; 594 struct goacc_thread *thr = goacc_thread (); 595 struct gomp_device_descr *acc_dev = thr->dev; 596 597 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); 598 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, 599 NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); 600 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); 601 602 gomp_mutex_lock (&acc_dev->lock); 603 tgt->prev = acc_dev->openacc.data_environ; 604 acc_dev->openacc.data_environ = tgt; 605 gomp_mutex_unlock (&acc_dev->lock); 606} 607 608void 609gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) 610{ 611 struct goacc_thread *thr = goacc_thread (); 612 struct gomp_device_descr *acc_dev = thr->dev; 613 splay_tree_key n; 614 struct target_mem_desc *t; 615 int minrefs = (mapnum == 1) ? 2 : 3; 616 617 gomp_mutex_lock (&acc_dev->lock); 618 619 n = lookup_host (acc_dev, h, 1); 620 621 if (!n) 622 { 623 gomp_mutex_unlock (&acc_dev->lock); 624 gomp_fatal ("%p is not a mapped block", (void *)h); 625 } 626 627 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); 628 629 t = n->tgt; 630 631 struct target_mem_desc *tp; 632 633 if (t->refcount == minrefs) 634 { 635 /* This is the last reference, so pull the descriptor off the 636 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from 637 freeing the device memory. */ 638 t->tgt_end = 0; 639 t->to_free = 0; 640 641 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; 642 tp = t, t = t->prev) 643 { 644 if (n->tgt == t) 645 { 646 if (tp) 647 tp->prev = t->prev; 648 else 649 acc_dev->openacc.data_environ = t->prev; 650 break; 651 } 652 } 653 } 654 655 if (force_copyfrom) 656 t->list[0].copy_from = 1; 657 658 gomp_mutex_unlock (&acc_dev->lock); 659 660 /* If running synchronously, unmap immediately. */ 661 if (async < acc_async_noval) 662 gomp_unmap_vars (t, true); 663 else 664 { 665 gomp_copy_from_async (t); 666 acc_dev->openacc.register_async_cleanup_func (t); 667 } 668 669 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); 670} 671