oacc-mem.c revision 1.1.1.4
1/* OpenACC Runtime initialization routines 2 3 Copyright (C) 2013-2017 Free Software Foundation, Inc. 4 5 Contributed by Mentor Embedded. 6 7 This file is part of the GNU Offloading and Multi Processing Library 8 (libgomp). 9 10 Libgomp is free software; you can redistribute it and/or modify it 11 under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 3, or (at your option) 13 any later version. 14 15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 18 more details. 19 20 Under Section 7 of GPL version 3, you are granted additional 21 permissions described in the GCC Runtime Library Exception, version 22 3.1, as published by the Free Software Foundation. 23 24 You should have received a copy of the GNU General Public License and 25 a copy of the GCC Runtime Library Exception along with this program; 26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 27 <http://www.gnu.org/licenses/>. */ 28 29#include "openacc.h" 30#include "config.h" 31#include "libgomp.h" 32#include "gomp-constants.h" 33#include "oacc-int.h" 34#include <stdint.h> 35#include <string.h> 36#include <assert.h> 37 38/* Return block containing [H->S), or NULL if not contained. The device lock 39 for DEV must be locked on entry, and remains locked on exit. */ 40 41static splay_tree_key 42lookup_host (struct gomp_device_descr *dev, void *h, size_t s) 43{ 44 struct splay_tree_key_s node; 45 splay_tree_key key; 46 47 node.host_start = (uintptr_t) h; 48 node.host_end = (uintptr_t) h + s; 49 50 key = splay_tree_lookup (&dev->mem_map, &node); 51 52 return key; 53} 54 55/* Return block containing [D->S), or NULL if not contained. 56 The list isn't ordered by device address, so we have to iterate 57 over the whole array. This is not expected to be a common 58 operation. The device lock associated with TGT must be locked on entry, and 59 remains locked on exit. */ 60 61static splay_tree_key 62lookup_dev (struct target_mem_desc *tgt, void *d, size_t s) 63{ 64 int i; 65 struct target_mem_desc *t; 66 67 if (!tgt) 68 return NULL; 69 70 for (t = tgt; t != NULL; t = t->prev) 71 { 72 if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s) 73 break; 74 } 75 76 if (!t) 77 return NULL; 78 79 for (i = 0; i < t->list_count; i++) 80 { 81 void * offset; 82 83 splay_tree_key k = &t->array[i].key; 84 offset = d - t->tgt_start + k->tgt_offset; 85 86 if (k->host_start + offset <= (void *) k->host_end) 87 return k; 88 } 89 90 return NULL; 91} 92 93/* OpenACC is silent on how memory exhaustion is indicated. We return 94 NULL. */ 95 96void * 97acc_malloc (size_t s) 98{ 99 if (!s) 100 return NULL; 101 102 goacc_lazy_initialize (); 103 104 struct goacc_thread *thr = goacc_thread (); 105 106 assert (thr->dev); 107 108 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 109 return malloc (s); 110 111 return thr->dev->alloc_func (thr->dev->target_id, s); 112} 113 114/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event 115 the device address is mapped. We choose to check if it mapped, 116 and if it is, to unmap it. */ 117void 118acc_free (void *d) 119{ 120 splay_tree_key k; 121 122 if (!d) 123 return; 124 125 struct goacc_thread *thr = goacc_thread (); 126 127 assert (thr && thr->dev); 128 129 struct gomp_device_descr *acc_dev = thr->dev; 130 131 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 132 return free (d); 133 134 gomp_mutex_lock (&acc_dev->lock); 135 136 /* We don't have to call lazy open here, as the ptr value must have 137 been returned by acc_malloc. It's not permitted to pass NULL in 138 (unless you got that null from acc_malloc). */ 139 if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1))) 140 { 141 void *offset; 142 143 offset = d - k->tgt->tgt_start + k->tgt_offset; 144 145 gomp_mutex_unlock (&acc_dev->lock); 146 147 acc_unmap_data ((void *)(k->host_start + offset)); 148 } 149 else 150 gomp_mutex_unlock (&acc_dev->lock); 151 152 if (!acc_dev->free_func (acc_dev->target_id, d)) 153 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__); 154} 155 156void 157acc_memcpy_to_device (void *d, void *h, size_t s) 158{ 159 /* No need to call lazy open here, as the device pointer must have 160 been obtained from a routine that did that. */ 161 struct goacc_thread *thr = goacc_thread (); 162 163 assert (thr && thr->dev); 164 165 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 166 { 167 memmove (d, h, s); 168 return; 169 } 170 171 if (!thr->dev->host2dev_func (thr->dev->target_id, d, h, s)) 172 gomp_fatal ("error in %s", __FUNCTION__); 173} 174 175void 176acc_memcpy_from_device (void *h, void *d, size_t s) 177{ 178 /* No need to call lazy open here, as the device pointer must have 179 been obtained from a routine that did that. */ 180 struct goacc_thread *thr = goacc_thread (); 181 182 assert (thr && thr->dev); 183 184 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 185 { 186 memmove (h, d, s); 187 return; 188 } 189 190 if (!thr->dev->dev2host_func (thr->dev->target_id, h, d, s)) 191 gomp_fatal ("error in %s", __FUNCTION__); 192} 193 194/* Return the device pointer that corresponds to host data H. Or NULL 195 if no mapping. */ 196 197void * 198acc_deviceptr (void *h) 199{ 200 splay_tree_key n; 201 void *d; 202 void *offset; 203 204 goacc_lazy_initialize (); 205 206 struct goacc_thread *thr = goacc_thread (); 207 struct gomp_device_descr *dev = thr->dev; 208 209 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 210 return h; 211 212 gomp_mutex_lock (&dev->lock); 213 214 n = lookup_host (dev, h, 1); 215 216 if (!n) 217 { 218 gomp_mutex_unlock (&dev->lock); 219 return NULL; 220 } 221 222 offset = h - n->host_start; 223 224 d = n->tgt->tgt_start + n->tgt_offset + offset; 225 226 gomp_mutex_unlock (&dev->lock); 227 228 return d; 229} 230 231/* Return the host pointer that corresponds to device data D. Or NULL 232 if no mapping. */ 233 234void * 235acc_hostptr (void *d) 236{ 237 splay_tree_key n; 238 void *h; 239 void *offset; 240 241 goacc_lazy_initialize (); 242 243 struct goacc_thread *thr = goacc_thread (); 244 struct gomp_device_descr *acc_dev = thr->dev; 245 246 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 247 return d; 248 249 gomp_mutex_lock (&acc_dev->lock); 250 251 n = lookup_dev (acc_dev->openacc.data_environ, d, 1); 252 253 if (!n) 254 { 255 gomp_mutex_unlock (&acc_dev->lock); 256 return NULL; 257 } 258 259 offset = d - n->tgt->tgt_start + n->tgt_offset; 260 261 h = n->host_start + offset; 262 263 gomp_mutex_unlock (&acc_dev->lock); 264 265 return h; 266} 267 268/* Return 1 if host data [H,+S] is present on the device. */ 269 270int 271acc_is_present (void *h, size_t s) 272{ 273 splay_tree_key n; 274 275 if (!s || !h) 276 return 0; 277 278 goacc_lazy_initialize (); 279 280 struct goacc_thread *thr = goacc_thread (); 281 struct gomp_device_descr *acc_dev = thr->dev; 282 283 if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 284 return h != NULL; 285 286 gomp_mutex_lock (&acc_dev->lock); 287 288 n = lookup_host (acc_dev, h, s); 289 290 if (n && ((uintptr_t)h < n->host_start 291 || (uintptr_t)h + s > n->host_end 292 || s > n->host_end - n->host_start)) 293 n = NULL; 294 295 gomp_mutex_unlock (&acc_dev->lock); 296 297 return n != NULL; 298} 299 300/* Create a mapping for host [H,+S] -> device [D,+S] */ 301 302void 303acc_map_data (void *h, void *d, size_t s) 304{ 305 struct target_mem_desc *tgt = NULL; 306 size_t mapnum = 1; 307 void *hostaddrs = h; 308 void *devaddrs = d; 309 size_t sizes = s; 310 unsigned short kinds = GOMP_MAP_ALLOC; 311 312 goacc_lazy_initialize (); 313 314 struct goacc_thread *thr = goacc_thread (); 315 struct gomp_device_descr *acc_dev = thr->dev; 316 317 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 318 { 319 if (d != h) 320 gomp_fatal ("cannot map data on shared-memory system"); 321 } 322 else 323 { 324 struct goacc_thread *thr = goacc_thread (); 325 326 if (!d || !h || !s) 327 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map", 328 (void *)h, (int)s, (void *)d, (int)s); 329 330 gomp_mutex_lock (&acc_dev->lock); 331 332 if (lookup_host (acc_dev, h, s)) 333 { 334 gomp_mutex_unlock (&acc_dev->lock); 335 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h, 336 (int)s); 337 } 338 339 if (lookup_dev (thr->dev->openacc.data_environ, d, s)) 340 { 341 gomp_mutex_unlock (&acc_dev->lock); 342 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d, 343 (int)s); 344 } 345 346 gomp_mutex_unlock (&acc_dev->lock); 347 348 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, 349 &kinds, true, GOMP_MAP_VARS_OPENACC); 350 } 351 352 gomp_mutex_lock (&acc_dev->lock); 353 tgt->prev = acc_dev->openacc.data_environ; 354 acc_dev->openacc.data_environ = tgt; 355 gomp_mutex_unlock (&acc_dev->lock); 356} 357 358void 359acc_unmap_data (void *h) 360{ 361 struct goacc_thread *thr = goacc_thread (); 362 struct gomp_device_descr *acc_dev = thr->dev; 363 364 /* No need to call lazy open, as the address must have been mapped. */ 365 366 /* This is a no-op on shared-memory targets. */ 367 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 368 return; 369 370 size_t host_size; 371 372 gomp_mutex_lock (&acc_dev->lock); 373 374 splay_tree_key n = lookup_host (acc_dev, h, 1); 375 struct target_mem_desc *t; 376 377 if (!n) 378 { 379 gomp_mutex_unlock (&acc_dev->lock); 380 gomp_fatal ("%p is not a mapped block", (void *)h); 381 } 382 383 host_size = n->host_end - n->host_start; 384 385 if (n->host_start != (uintptr_t) h) 386 { 387 gomp_mutex_unlock (&acc_dev->lock); 388 gomp_fatal ("[%p,%d] surrounds %p", 389 (void *) n->host_start, (int) host_size, (void *) h); 390 } 391 392 t = n->tgt; 393 394 if (t->refcount == 2) 395 { 396 struct target_mem_desc *tp; 397 398 /* This is the last reference, so pull the descriptor off the 399 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from 400 freeing the device memory. */ 401 t->tgt_end = 0; 402 t->to_free = 0; 403 404 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; 405 tp = t, t = t->prev) 406 if (n->tgt == t) 407 { 408 if (tp) 409 tp->prev = t->prev; 410 else 411 acc_dev->openacc.data_environ = t->prev; 412 413 break; 414 } 415 } 416 417 gomp_mutex_unlock (&acc_dev->lock); 418 419 gomp_unmap_vars (t, true); 420} 421 422#define FLAG_PRESENT (1 << 0) 423#define FLAG_CREATE (1 << 1) 424#define FLAG_COPY (1 << 2) 425 426static void * 427present_create_copy (unsigned f, void *h, size_t s) 428{ 429 void *d; 430 splay_tree_key n; 431 432 if (!h || !s) 433 gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s); 434 435 goacc_lazy_initialize (); 436 437 struct goacc_thread *thr = goacc_thread (); 438 struct gomp_device_descr *acc_dev = thr->dev; 439 440 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 441 return h; 442 443 gomp_mutex_lock (&acc_dev->lock); 444 445 n = lookup_host (acc_dev, h, s); 446 if (n) 447 { 448 /* Present. */ 449 d = (void *) (n->tgt->tgt_start + n->tgt_offset); 450 451 if (!(f & FLAG_PRESENT)) 452 { 453 gomp_mutex_unlock (&acc_dev->lock); 454 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]", 455 (void *)h, (int)s, (void *)d, (int)s); 456 } 457 if ((h + s) > (void *)n->host_end) 458 { 459 gomp_mutex_unlock (&acc_dev->lock); 460 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); 461 } 462 463 gomp_mutex_unlock (&acc_dev->lock); 464 } 465 else if (!(f & FLAG_CREATE)) 466 { 467 gomp_mutex_unlock (&acc_dev->lock); 468 gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s); 469 } 470 else 471 { 472 struct target_mem_desc *tgt; 473 size_t mapnum = 1; 474 unsigned short kinds; 475 void *hostaddrs = h; 476 477 if (f & FLAG_COPY) 478 kinds = GOMP_MAP_TO; 479 else 480 kinds = GOMP_MAP_ALLOC; 481 482 gomp_mutex_unlock (&acc_dev->lock); 483 484 tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true, 485 GOMP_MAP_VARS_OPENACC); 486 487 gomp_mutex_lock (&acc_dev->lock); 488 489 d = tgt->to_free; 490 tgt->prev = acc_dev->openacc.data_environ; 491 acc_dev->openacc.data_environ = tgt; 492 493 gomp_mutex_unlock (&acc_dev->lock); 494 } 495 496 return d; 497} 498 499void * 500acc_create (void *h, size_t s) 501{ 502 return present_create_copy (FLAG_CREATE, h, s); 503} 504 505void * 506acc_copyin (void *h, size_t s) 507{ 508 return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s); 509} 510 511void * 512acc_present_or_create (void *h, size_t s) 513{ 514 return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s); 515} 516 517void * 518acc_present_or_copyin (void *h, size_t s) 519{ 520 return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s); 521} 522 523#define FLAG_COPYOUT (1 << 0) 524 525static void 526delete_copyout (unsigned f, void *h, size_t s, const char *libfnname) 527{ 528 size_t host_size; 529 splay_tree_key n; 530 void *d; 531 struct goacc_thread *thr = goacc_thread (); 532 struct gomp_device_descr *acc_dev = thr->dev; 533 534 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 535 return; 536 537 gomp_mutex_lock (&acc_dev->lock); 538 539 n = lookup_host (acc_dev, h, s); 540 541 /* No need to call lazy open, as the data must already have been 542 mapped. */ 543 544 if (!n) 545 { 546 gomp_mutex_unlock (&acc_dev->lock); 547 gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s); 548 } 549 550 d = (void *) (n->tgt->tgt_start + n->tgt_offset 551 + (uintptr_t) h - n->host_start); 552 553 host_size = n->host_end - n->host_start; 554 555 if (n->host_start != (uintptr_t) h || host_size != s) 556 { 557 gomp_mutex_unlock (&acc_dev->lock); 558 gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]", 559 (void *) n->host_start, (int) host_size, (void *) h, (int) s); 560 } 561 562 gomp_mutex_unlock (&acc_dev->lock); 563 564 if (f & FLAG_COPYOUT) 565 acc_dev->dev2host_func (acc_dev->target_id, h, d, s); 566 567 acc_unmap_data (h); 568 569 if (!acc_dev->free_func (acc_dev->target_id, d)) 570 gomp_fatal ("error in freeing device memory in %s", libfnname); 571} 572 573void 574acc_delete (void *h , size_t s) 575{ 576 delete_copyout (0, h, s, __FUNCTION__); 577} 578 579void 580acc_copyout (void *h, size_t s) 581{ 582 delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__); 583} 584 585static void 586update_dev_host (int is_dev, void *h, size_t s) 587{ 588 splay_tree_key n; 589 void *d; 590 591 goacc_lazy_initialize (); 592 593 struct goacc_thread *thr = goacc_thread (); 594 struct gomp_device_descr *acc_dev = thr->dev; 595 596 if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 597 return; 598 599 gomp_mutex_lock (&acc_dev->lock); 600 601 n = lookup_host (acc_dev, h, s); 602 603 if (!n) 604 { 605 gomp_mutex_unlock (&acc_dev->lock); 606 gomp_fatal ("[%p,%d] is not mapped", h, (int)s); 607 } 608 609 d = (void *) (n->tgt->tgt_start + n->tgt_offset 610 + (uintptr_t) h - n->host_start); 611 612 if (is_dev) 613 acc_dev->host2dev_func (acc_dev->target_id, d, h, s); 614 else 615 acc_dev->dev2host_func (acc_dev->target_id, h, d, s); 616 617 gomp_mutex_unlock (&acc_dev->lock); 618} 619 620void 621acc_update_device (void *h, size_t s) 622{ 623 update_dev_host (1, h, s); 624} 625 626void 627acc_update_self (void *h, size_t s) 628{ 629 update_dev_host (0, h, s); 630} 631 632void 633gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, 634 void *kinds) 635{ 636 struct target_mem_desc *tgt; 637 struct goacc_thread *thr = goacc_thread (); 638 struct gomp_device_descr *acc_dev = thr->dev; 639 640 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); 641 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, 642 NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); 643 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); 644 645 gomp_mutex_lock (&acc_dev->lock); 646 tgt->prev = acc_dev->openacc.data_environ; 647 acc_dev->openacc.data_environ = tgt; 648 gomp_mutex_unlock (&acc_dev->lock); 649} 650 651void 652gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum) 653{ 654 struct goacc_thread *thr = goacc_thread (); 655 struct gomp_device_descr *acc_dev = thr->dev; 656 splay_tree_key n; 657 struct target_mem_desc *t; 658 int minrefs = (mapnum == 1) ? 2 : 3; 659 660 gomp_mutex_lock (&acc_dev->lock); 661 662 n = lookup_host (acc_dev, h, 1); 663 664 if (!n) 665 { 666 gomp_mutex_unlock (&acc_dev->lock); 667 gomp_fatal ("%p is not a mapped block", (void *)h); 668 } 669 670 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); 671 672 t = n->tgt; 673 674 struct target_mem_desc *tp; 675 676 if (t->refcount == minrefs) 677 { 678 /* This is the last reference, so pull the descriptor off the 679 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from 680 freeing the device memory. */ 681 t->tgt_end = 0; 682 t->to_free = 0; 683 684 for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL; 685 tp = t, t = t->prev) 686 { 687 if (n->tgt == t) 688 { 689 if (tp) 690 tp->prev = t->prev; 691 else 692 acc_dev->openacc.data_environ = t->prev; 693 break; 694 } 695 } 696 } 697 698 if (force_copyfrom) 699 t->list[0].copy_from = 1; 700 701 gomp_mutex_unlock (&acc_dev->lock); 702 703 /* If running synchronously, unmap immediately. */ 704 if (async < acc_async_noval) 705 gomp_unmap_vars (t, true); 706 else 707 t->device_descr->openacc.register_async_cleanup_func (t, async); 708 709 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); 710} 711