1/* Implementation of commonly needed HSAIL related functions and methods. 2 Copyright (C) 2013-2020 Free Software Foundation, Inc. 3 Contributed by Martin Jambor <mjambor@suse.cz> and 4 Martin Liska <mliska@suse.cz>. 5 6This file is part of GCC. 7 8GCC is free software; you can redistribute it and/or modify 9it under the terms of the GNU General Public License as published by 10the Free Software Foundation; either version 3, or (at your option) 11any later version. 12 13GCC is distributed in the hope that it will be useful, 14but WITHOUT ANY WARRANTY; without even the implied warranty of 15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16GNU General Public License for more details. 17 18You should have received a copy of the GNU General Public License 19along with GCC; see the file COPYING3. If not see 20<http://www.gnu.org/licenses/>. */ 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "tm.h" 26#include "is-a.h" 27#include "hash-set.h" 28#include "hash-map.h" 29#include "vec.h" 30#include "tree.h" 31#include "dumpfile.h" 32#include "gimple-pretty-print.h" 33#include "diagnostic-core.h" 34#include "alloc-pool.h" 35#include "cgraph.h" 36#include "print-tree.h" 37#include "stringpool.h" 38#include "symbol-summary.h" 39#include "hsa-common.h" 40#include "internal-fn.h" 41#include "ctype.h" 42#include "builtins.h" 43#include "stringpool.h" 44#include "attribs.h" 45 46/* Structure containing intermediate HSA representation of the generated 47 function. */ 48class hsa_function_representation *hsa_cfun; 49 50/* Element of the mapping vector between a host decl and an HSA kernel. */ 51 52struct GTY(()) hsa_decl_kernel_map_element 53{ 54 /* The decl of the host function. */ 55 tree decl; 56 /* Name of the HSA kernel in BRIG. */ 57 char * GTY((skip)) name; 58 /* Size of OMP data, if the kernel contains a kernel dispatch. */ 59 unsigned omp_data_size; 60 /* True if the function is gridified kernel. */ 61 bool gridified_kernel_p; 62}; 63 64/* Mapping between decls and corresponding HSA kernels in this compilation 65 unit. */ 66 67static GTY (()) vec<hsa_decl_kernel_map_element, va_gc> 68 *hsa_decl_kernel_mapping; 69 70/* Mapping between decls and corresponding HSA kernels 71 called by the function. */ 72hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies; 73 74/* Hash function to lookup a symbol for a decl. */ 75hash_table <hsa_noop_symbol_hasher> *hsa_global_variable_symbols; 76 77/* HSA summaries. */ 78hsa_summary_t *hsa_summaries = NULL; 79 80/* HSA number of threads. */ 81hsa_symbol *hsa_num_threads = NULL; 82 83/* HSA function that cannot be expanded to HSAIL. */ 84hash_set <tree> *hsa_failed_functions = NULL; 85 86/* True if compilation unit-wide data are already allocated and initialized. */ 87static bool compilation_unit_data_initialized; 88 89/* Return true if FNDECL represents an HSA-callable function. */ 90 91bool 92hsa_callable_function_p (tree fndecl) 93{ 94 return (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl)) 95 && !lookup_attribute ("oacc function", DECL_ATTRIBUTES (fndecl))); 96} 97 98/* Allocate HSA structures that are used when dealing with different 99 functions. */ 100 101void 102hsa_init_compilation_unit_data (void) 103{ 104 if (compilation_unit_data_initialized) 105 return; 106 107 compilation_unit_data_initialized = true; 108 109 hsa_global_variable_symbols = new hash_table <hsa_noop_symbol_hasher> (8); 110 hsa_failed_functions = new hash_set <tree> (); 111 hsa_emitted_internal_decls = new hash_table <hsa_internal_fn_hasher> (2); 112} 113 114/* Free data structures that are used when dealing with different 115 functions. */ 116 117void 118hsa_deinit_compilation_unit_data (void) 119{ 120 gcc_assert (compilation_unit_data_initialized); 121 122 delete hsa_failed_functions; 123 delete hsa_emitted_internal_decls; 124 125 for (hash_table <hsa_noop_symbol_hasher>::iterator it 126 = hsa_global_variable_symbols->begin (); 127 it != hsa_global_variable_symbols->end (); 128 ++it) 129 { 130 hsa_symbol *sym = *it; 131 delete sym; 132 } 133 134 delete hsa_global_variable_symbols; 135 136 if (hsa_num_threads) 137 { 138 delete hsa_num_threads; 139 hsa_num_threads = NULL; 140 } 141 142 compilation_unit_data_initialized = false; 143} 144 145/* Return true if we are generating large HSA machine model. */ 146 147bool 148hsa_machine_large_p (void) 149{ 150 /* FIXME: I suppose this is technically wrong but should work for me now. */ 151 return (GET_MODE_BITSIZE (Pmode) == 64); 152} 153 154/* Return the HSA profile we are using. */ 155 156bool 157hsa_full_profile_p (void) 158{ 159 return true; 160} 161 162/* Return true if a register in operand number OPNUM of instruction 163 is an output. False if it is an input. */ 164 165bool 166hsa_insn_basic::op_output_p (unsigned opnum) 167{ 168 switch (m_opcode) 169 { 170 case HSA_OPCODE_PHI: 171 case BRIG_OPCODE_CBR: 172 case BRIG_OPCODE_SBR: 173 case BRIG_OPCODE_ST: 174 case BRIG_OPCODE_SIGNALNORET: 175 case BRIG_OPCODE_DEBUGTRAP: 176 /* FIXME: There are probably missing cases here, double check. */ 177 return false; 178 case BRIG_OPCODE_EXPAND: 179 /* Example: expand_v4_b32_b128 (dest0, dest1, dest2, dest3), src0. */ 180 return opnum < operand_count () - 1; 181 default: 182 return opnum == 0; 183 } 184} 185 186/* Return true if OPCODE is an floating-point bit instruction opcode. */ 187 188bool 189hsa_opcode_floating_bit_insn_p (BrigOpcode16_t opcode) 190{ 191 switch (opcode) 192 { 193 case BRIG_OPCODE_NEG: 194 case BRIG_OPCODE_ABS: 195 case BRIG_OPCODE_CLASS: 196 case BRIG_OPCODE_COPYSIGN: 197 return true; 198 default: 199 return false; 200 } 201} 202 203/* Return the number of destination operands for this INSN. */ 204 205unsigned 206hsa_insn_basic::input_count () 207{ 208 switch (m_opcode) 209 { 210 default: 211 return 1; 212 213 case BRIG_OPCODE_NOP: 214 return 0; 215 216 case BRIG_OPCODE_EXPAND: 217 return 2; 218 219 case BRIG_OPCODE_LD: 220 /* ld_v[234] not yet handled. */ 221 return 1; 222 223 case BRIG_OPCODE_ST: 224 return 0; 225 226 case BRIG_OPCODE_ATOMICNORET: 227 return 0; 228 229 case BRIG_OPCODE_SIGNAL: 230 return 1; 231 232 case BRIG_OPCODE_SIGNALNORET: 233 return 0; 234 235 case BRIG_OPCODE_MEMFENCE: 236 return 0; 237 238 case BRIG_OPCODE_RDIMAGE: 239 case BRIG_OPCODE_LDIMAGE: 240 case BRIG_OPCODE_STIMAGE: 241 case BRIG_OPCODE_QUERYIMAGE: 242 case BRIG_OPCODE_QUERYSAMPLER: 243 sorry ("HSA image ops not handled"); 244 return 0; 245 246 case BRIG_OPCODE_CBR: 247 case BRIG_OPCODE_BR: 248 return 0; 249 250 case BRIG_OPCODE_SBR: 251 return 0; /* ??? */ 252 253 case BRIG_OPCODE_WAVEBARRIER: 254 return 0; /* ??? */ 255 256 case BRIG_OPCODE_BARRIER: 257 case BRIG_OPCODE_ARRIVEFBAR: 258 case BRIG_OPCODE_INITFBAR: 259 case BRIG_OPCODE_JOINFBAR: 260 case BRIG_OPCODE_LEAVEFBAR: 261 case BRIG_OPCODE_RELEASEFBAR: 262 case BRIG_OPCODE_WAITFBAR: 263 return 0; 264 265 case BRIG_OPCODE_LDF: 266 return 1; 267 268 case BRIG_OPCODE_ACTIVELANECOUNT: 269 case BRIG_OPCODE_ACTIVELANEID: 270 case BRIG_OPCODE_ACTIVELANEMASK: 271 case BRIG_OPCODE_ACTIVELANEPERMUTE: 272 return 1; /* ??? */ 273 274 case BRIG_OPCODE_CALL: 275 case BRIG_OPCODE_SCALL: 276 case BRIG_OPCODE_ICALL: 277 return 0; 278 279 case BRIG_OPCODE_RET: 280 return 0; 281 282 case BRIG_OPCODE_ALLOCA: 283 return 1; 284 285 case BRIG_OPCODE_CLEARDETECTEXCEPT: 286 return 0; 287 288 case BRIG_OPCODE_SETDETECTEXCEPT: 289 return 0; 290 291 case BRIG_OPCODE_PACKETCOMPLETIONSIG: 292 case BRIG_OPCODE_PACKETID: 293 case BRIG_OPCODE_CASQUEUEWRITEINDEX: 294 case BRIG_OPCODE_LDQUEUEREADINDEX: 295 case BRIG_OPCODE_LDQUEUEWRITEINDEX: 296 case BRIG_OPCODE_STQUEUEREADINDEX: 297 case BRIG_OPCODE_STQUEUEWRITEINDEX: 298 return 1; /* ??? */ 299 300 case BRIG_OPCODE_ADDQUEUEWRITEINDEX: 301 return 1; 302 303 case BRIG_OPCODE_DEBUGTRAP: 304 return 0; 305 306 case BRIG_OPCODE_GROUPBASEPTR: 307 case BRIG_OPCODE_KERNARGBASEPTR: 308 return 1; /* ??? */ 309 310 case HSA_OPCODE_ARG_BLOCK: 311 return 0; 312 313 case BRIG_KIND_DIRECTIVE_COMMENT: 314 return 0; 315 } 316} 317 318/* Return the number of source operands for this INSN. */ 319 320unsigned 321hsa_insn_basic::num_used_ops () 322{ 323 gcc_checking_assert (input_count () <= operand_count ()); 324 325 return operand_count () - input_count (); 326} 327 328/* Set alignment to VALUE. */ 329 330void 331hsa_insn_mem::set_align (BrigAlignment8_t value) 332{ 333 /* TODO: Perhaps remove this dump later on: */ 334 if (dump_file && (dump_flags & TDF_DETAILS) && value < m_align) 335 { 336 fprintf (dump_file, "Decreasing alignment to %u in instruction ", value); 337 dump_hsa_insn (dump_file, this); 338 } 339 m_align = value; 340} 341 342/* Return size of HSA type T in bits. */ 343 344unsigned 345hsa_type_bit_size (BrigType16_t t) 346{ 347 switch (t) 348 { 349 case BRIG_TYPE_B1: 350 return 1; 351 352 case BRIG_TYPE_U8: 353 case BRIG_TYPE_S8: 354 case BRIG_TYPE_B8: 355 return 8; 356 357 case BRIG_TYPE_U16: 358 case BRIG_TYPE_S16: 359 case BRIG_TYPE_B16: 360 case BRIG_TYPE_F16: 361 return 16; 362 363 case BRIG_TYPE_U32: 364 case BRIG_TYPE_S32: 365 case BRIG_TYPE_B32: 366 case BRIG_TYPE_F32: 367 case BRIG_TYPE_U8X4: 368 case BRIG_TYPE_U16X2: 369 case BRIG_TYPE_S8X4: 370 case BRIG_TYPE_S16X2: 371 case BRIG_TYPE_F16X2: 372 return 32; 373 374 case BRIG_TYPE_U64: 375 case BRIG_TYPE_S64: 376 case BRIG_TYPE_F64: 377 case BRIG_TYPE_B64: 378 case BRIG_TYPE_U8X8: 379 case BRIG_TYPE_U16X4: 380 case BRIG_TYPE_U32X2: 381 case BRIG_TYPE_S8X8: 382 case BRIG_TYPE_S16X4: 383 case BRIG_TYPE_S32X2: 384 case BRIG_TYPE_F16X4: 385 case BRIG_TYPE_F32X2: 386 387 return 64; 388 389 case BRIG_TYPE_B128: 390 case BRIG_TYPE_U8X16: 391 case BRIG_TYPE_U16X8: 392 case BRIG_TYPE_U32X4: 393 case BRIG_TYPE_U64X2: 394 case BRIG_TYPE_S8X16: 395 case BRIG_TYPE_S16X8: 396 case BRIG_TYPE_S32X4: 397 case BRIG_TYPE_S64X2: 398 case BRIG_TYPE_F16X8: 399 case BRIG_TYPE_F32X4: 400 case BRIG_TYPE_F64X2: 401 return 128; 402 403 default: 404 gcc_assert (hsa_seen_error ()); 405 return t; 406 } 407} 408 409/* Return BRIG bit-type with BITSIZE length. */ 410 411BrigType16_t 412hsa_bittype_for_bitsize (unsigned bitsize) 413{ 414 switch (bitsize) 415 { 416 case 1: 417 return BRIG_TYPE_B1; 418 case 8: 419 return BRIG_TYPE_B8; 420 case 16: 421 return BRIG_TYPE_B16; 422 case 32: 423 return BRIG_TYPE_B32; 424 case 64: 425 return BRIG_TYPE_B64; 426 case 128: 427 return BRIG_TYPE_B128; 428 default: 429 gcc_unreachable (); 430 } 431} 432 433/* Return BRIG unsigned int type with BITSIZE length. */ 434 435BrigType16_t 436hsa_uint_for_bitsize (unsigned bitsize) 437{ 438 switch (bitsize) 439 { 440 case 8: 441 return BRIG_TYPE_U8; 442 case 16: 443 return BRIG_TYPE_U16; 444 case 32: 445 return BRIG_TYPE_U32; 446 case 64: 447 return BRIG_TYPE_U64; 448 default: 449 gcc_unreachable (); 450 } 451} 452 453/* Return BRIG float type with BITSIZE length. */ 454 455BrigType16_t 456hsa_float_for_bitsize (unsigned bitsize) 457{ 458 switch (bitsize) 459 { 460 case 16: 461 return BRIG_TYPE_F16; 462 case 32: 463 return BRIG_TYPE_F32; 464 case 64: 465 return BRIG_TYPE_F64; 466 default: 467 gcc_unreachable (); 468 } 469} 470 471/* Return HSA bit-type with the same size as the type T. */ 472 473BrigType16_t 474hsa_bittype_for_type (BrigType16_t t) 475{ 476 return hsa_bittype_for_bitsize (hsa_type_bit_size (t)); 477} 478 479/* Return HSA unsigned integer type with the same size as the type T. */ 480 481BrigType16_t 482hsa_unsigned_type_for_type (BrigType16_t t) 483{ 484 return hsa_uint_for_bitsize (hsa_type_bit_size (t)); 485} 486 487/* Return true if TYPE is a packed HSA type. */ 488 489bool 490hsa_type_packed_p (BrigType16_t type) 491{ 492 return (type & BRIG_TYPE_PACK_MASK) != BRIG_TYPE_PACK_NONE; 493} 494 495/* Return true if and only if TYPE is a floating point number type. */ 496 497bool 498hsa_type_float_p (BrigType16_t type) 499{ 500 switch (type & BRIG_TYPE_BASE_MASK) 501 { 502 case BRIG_TYPE_F16: 503 case BRIG_TYPE_F32: 504 case BRIG_TYPE_F64: 505 return true; 506 default: 507 return false; 508 } 509} 510 511/* Return true if and only if TYPE is an integer number type. */ 512 513bool 514hsa_type_integer_p (BrigType16_t type) 515{ 516 switch (type & BRIG_TYPE_BASE_MASK) 517 { 518 case BRIG_TYPE_U8: 519 case BRIG_TYPE_U16: 520 case BRIG_TYPE_U32: 521 case BRIG_TYPE_U64: 522 case BRIG_TYPE_S8: 523 case BRIG_TYPE_S16: 524 case BRIG_TYPE_S32: 525 case BRIG_TYPE_S64: 526 return true; 527 default: 528 return false; 529 } 530} 531 532/* Return true if and only if TYPE is an bit-type. */ 533 534bool 535hsa_btype_p (BrigType16_t type) 536{ 537 switch (type & BRIG_TYPE_BASE_MASK) 538 { 539 case BRIG_TYPE_B8: 540 case BRIG_TYPE_B16: 541 case BRIG_TYPE_B32: 542 case BRIG_TYPE_B64: 543 case BRIG_TYPE_B128: 544 return true; 545 default: 546 return false; 547 } 548} 549 550 551/* Return HSA alignment encoding alignment to N bits. */ 552 553BrigAlignment8_t 554hsa_alignment_encoding (unsigned n) 555{ 556 gcc_assert (n >= 8 && !(n & (n - 1))); 557 if (n >= 256) 558 return BRIG_ALIGNMENT_32; 559 560 switch (n) 561 { 562 case 8: 563 return BRIG_ALIGNMENT_1; 564 case 16: 565 return BRIG_ALIGNMENT_2; 566 case 32: 567 return BRIG_ALIGNMENT_4; 568 case 64: 569 return BRIG_ALIGNMENT_8; 570 case 128: 571 return BRIG_ALIGNMENT_16; 572 default: 573 gcc_unreachable (); 574 } 575} 576 577/* Return HSA alignment encoding alignment of T got 578 by get_object_alignment. */ 579 580BrigAlignment8_t 581hsa_object_alignment (tree t) 582{ 583 return hsa_alignment_encoding (get_object_alignment (t)); 584} 585 586/* Return byte alignment for given BrigAlignment8_t value. */ 587 588unsigned 589hsa_byte_alignment (BrigAlignment8_t alignment) 590{ 591 gcc_assert (alignment != BRIG_ALIGNMENT_NONE); 592 593 return 1 << (alignment - 1); 594} 595 596/* Return natural alignment of HSA TYPE. */ 597 598BrigAlignment8_t 599hsa_natural_alignment (BrigType16_t type) 600{ 601 return hsa_alignment_encoding (hsa_type_bit_size (type & ~BRIG_TYPE_ARRAY)); 602} 603 604/* Call the correct destructor of a HSA instruction. */ 605 606void 607hsa_destroy_insn (hsa_insn_basic *insn) 608{ 609 if (hsa_insn_phi *phi = dyn_cast <hsa_insn_phi *> (insn)) 610 phi->~hsa_insn_phi (); 611 else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn)) 612 br->~hsa_insn_cbr (); 613 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn)) 614 cmp->~hsa_insn_cmp (); 615 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn)) 616 mem->~hsa_insn_mem (); 617 else if (hsa_insn_atomic *atomic = dyn_cast <hsa_insn_atomic *> (insn)) 618 atomic->~hsa_insn_atomic (); 619 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn)) 620 seg->~hsa_insn_seg (); 621 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn)) 622 call->~hsa_insn_call (); 623 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn)) 624 block->~hsa_insn_arg_block (); 625 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn)) 626 sbr->~hsa_insn_sbr (); 627 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn)) 628 br->~hsa_insn_br (); 629 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn)) 630 comment->~hsa_insn_comment (); 631 else 632 insn->~hsa_insn_basic (); 633} 634 635/* Call the correct destructor of a HSA operand. */ 636 637void 638hsa_destroy_operand (hsa_op_base *op) 639{ 640 if (hsa_op_code_list *list = dyn_cast <hsa_op_code_list *> (op)) 641 list->~hsa_op_code_list (); 642 else if (hsa_op_operand_list *list = dyn_cast <hsa_op_operand_list *> (op)) 643 list->~hsa_op_operand_list (); 644 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op)) 645 reg->~hsa_op_reg (); 646 else if (hsa_op_immed *immed = dyn_cast <hsa_op_immed *> (op)) 647 immed->~hsa_op_immed (); 648 else 649 op->~hsa_op_base (); 650} 651 652/* Create a mapping between the original function DECL and kernel name NAME. */ 653 654void 655hsa_add_kern_decl_mapping (tree decl, char *name, unsigned omp_data_size, 656 bool gridified_kernel_p) 657{ 658 hsa_decl_kernel_map_element dkm; 659 dkm.decl = decl; 660 dkm.name = name; 661 dkm.omp_data_size = omp_data_size; 662 dkm.gridified_kernel_p = gridified_kernel_p; 663 vec_safe_push (hsa_decl_kernel_mapping, dkm); 664} 665 666/* Return the number of kernel decl name mappings. */ 667 668unsigned 669hsa_get_number_decl_kernel_mappings (void) 670{ 671 return vec_safe_length (hsa_decl_kernel_mapping); 672} 673 674/* Return the decl in the Ith kernel decl name mapping. */ 675 676tree 677hsa_get_decl_kernel_mapping_decl (unsigned i) 678{ 679 return (*hsa_decl_kernel_mapping)[i].decl; 680} 681 682/* Return the name in the Ith kernel decl name mapping. */ 683 684char * 685hsa_get_decl_kernel_mapping_name (unsigned i) 686{ 687 return (*hsa_decl_kernel_mapping)[i].name; 688} 689 690/* Return maximum OMP size for kernel decl name mapping. */ 691 692unsigned 693hsa_get_decl_kernel_mapping_omp_size (unsigned i) 694{ 695 return (*hsa_decl_kernel_mapping)[i].omp_data_size; 696} 697 698/* Return if the function is gridified kernel in decl name mapping. */ 699 700bool 701hsa_get_decl_kernel_mapping_gridified (unsigned i) 702{ 703 return (*hsa_decl_kernel_mapping)[i].gridified_kernel_p; 704} 705 706/* Free the mapping between original decls and kernel names. */ 707 708void 709hsa_free_decl_kernel_mapping (void) 710{ 711 if (hsa_decl_kernel_mapping == NULL) 712 return; 713 714 for (unsigned i = 0; i < hsa_decl_kernel_mapping->length (); ++i) 715 free ((*hsa_decl_kernel_mapping)[i].name); 716 ggc_free (hsa_decl_kernel_mapping); 717} 718 719/* Add new kernel dependency. */ 720 721void 722hsa_add_kernel_dependency (tree caller, const char *called_function) 723{ 724 if (hsa_decl_kernel_dependencies == NULL) 725 hsa_decl_kernel_dependencies = new hash_map<tree, vec<const char *> *> (); 726 727 vec <const char *> *s = NULL; 728 vec <const char *> **slot = hsa_decl_kernel_dependencies->get (caller); 729 if (slot == NULL) 730 { 731 s = new vec <const char *> (); 732 hsa_decl_kernel_dependencies->put (caller, s); 733 } 734 else 735 s = *slot; 736 737 s->safe_push (called_function); 738} 739 740/* Expansion to HSA needs a few gc roots to hold types, constructors etc. In 741 order to minimize the number of GTY roots, we'll root them all in the 742 following array. The individual elements should only be accessed by the 743 very simple getters (of a pointer-to-tree) below. */ 744 745static GTY(()) tree hsa_tree_gt_roots[3]; 746 747tree * 748hsa_get_ctor_statements (void) 749{ 750 return &hsa_tree_gt_roots[0]; 751} 752 753tree * 754hsa_get_dtor_statements (void) 755{ 756 return &hsa_tree_gt_roots[1]; 757} 758 759tree * 760hsa_get_kernel_dispatch_type (void) 761{ 762 return &hsa_tree_gt_roots[2]; 763} 764 765/* Modify the name P in-place so that it is a valid HSA identifier. */ 766 767void 768hsa_sanitize_name (char *p) 769{ 770 for (; *p; p++) 771 if (*p == '.' || *p == '-') 772 *p = '_'; 773} 774 775/* Clone the name P, set trailing ampersand and sanitize the name. */ 776 777char * 778hsa_brig_function_name (const char *p) 779{ 780 unsigned len = strlen (p); 781 char *buf = XNEWVEC (char, len + 2); 782 783 buf[0] = '&'; 784 buf[len + 1] = '\0'; 785 memcpy (buf + 1, p, len); 786 787 hsa_sanitize_name (buf); 788 return buf; 789} 790 791/* Add a flatten attribute and disable vectorization for gpu implementation 792 function decl GDECL. */ 793 794void hsa_summary_t::process_gpu_implementation_attributes (tree gdecl) 795{ 796 DECL_ATTRIBUTES (gdecl) 797 = tree_cons (get_identifier ("flatten"), NULL_TREE, 798 DECL_ATTRIBUTES (gdecl)); 799 800 tree fn_opts = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl); 801 if (fn_opts == NULL_TREE) 802 fn_opts = optimization_default_node; 803 fn_opts = copy_node (fn_opts); 804 TREE_OPTIMIZATION (fn_opts)->x_flag_tree_loop_vectorize = false; 805 TREE_OPTIMIZATION (fn_opts)->x_flag_tree_slp_vectorize = false; 806 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl) = fn_opts; 807} 808 809void 810hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host, 811 hsa_function_kind kind, bool gridified_kernel_p) 812{ 813 hsa_function_summary *gpu_summary = get_create (gpu); 814 hsa_function_summary *host_summary = get_create (host); 815 816 gpu_summary->m_kind = kind; 817 host_summary->m_kind = kind; 818 819 gpu_summary->m_gpu_implementation_p = true; 820 host_summary->m_gpu_implementation_p = false; 821 822 gpu_summary->m_gridified_kernel_p = gridified_kernel_p; 823 host_summary->m_gridified_kernel_p = gridified_kernel_p; 824 825 gpu_summary->m_bound_function = host; 826 host_summary->m_bound_function = gpu; 827 828 process_gpu_implementation_attributes (gpu->decl); 829 830 /* Create reference between a kernel and a corresponding host implementation 831 to quarantee LTO streaming to a same LTRANS. */ 832 if (kind == HSA_KERNEL) 833 gpu->create_reference (host, IPA_REF_ADDR); 834} 835 836/* Add a HOST function to HSA summaries. */ 837 838void 839hsa_register_kernel (cgraph_node *host) 840{ 841 if (hsa_summaries == NULL) 842 hsa_summaries = new hsa_summary_t (symtab); 843 hsa_function_summary *s = hsa_summaries->get_create (host); 844 s->m_kind = HSA_KERNEL; 845} 846 847/* Add a pair of functions to HSA summaries. GPU is an HSA implementation of 848 a HOST function. */ 849 850void 851hsa_register_kernel (cgraph_node *gpu, cgraph_node *host) 852{ 853 if (hsa_summaries == NULL) 854 hsa_summaries = new hsa_summary_t (symtab); 855 hsa_summaries->link_functions (gpu, host, HSA_KERNEL, true); 856} 857 858/* Return true if expansion of the current HSA function has already failed. */ 859 860bool 861hsa_seen_error (void) 862{ 863 return hsa_cfun->m_seen_error; 864} 865 866/* Mark current HSA function as failed. */ 867 868void 869hsa_fail_cfun (void) 870{ 871 hsa_failed_functions->add (hsa_cfun->m_decl); 872 hsa_cfun->m_seen_error = true; 873} 874 875char * 876hsa_internal_fn::name () 877{ 878 char *name = xstrdup (internal_fn_name (m_fn)); 879 for (char *ptr = name; *ptr; ptr++) 880 *ptr = TOLOWER (*ptr); 881 882 const char *suffix = NULL; 883 if (m_type_bit_size == 32) 884 suffix = "f"; 885 886 if (suffix) 887 { 888 char *name2 = concat (name, suffix, NULL); 889 free (name); 890 name = name2; 891 } 892 893 hsa_sanitize_name (name); 894 return name; 895} 896 897unsigned 898hsa_internal_fn::get_arity () 899{ 900 switch (m_fn) 901 { 902 case IFN_ACOS: 903 case IFN_ASIN: 904 case IFN_ATAN: 905 case IFN_COS: 906 case IFN_EXP: 907 case IFN_EXP10: 908 case IFN_EXP2: 909 case IFN_EXPM1: 910 case IFN_LOG: 911 case IFN_LOG10: 912 case IFN_LOG1P: 913 case IFN_LOG2: 914 case IFN_LOGB: 915 case IFN_SIGNIFICAND: 916 case IFN_SIN: 917 case IFN_SQRT: 918 case IFN_TAN: 919 case IFN_CEIL: 920 case IFN_FLOOR: 921 case IFN_NEARBYINT: 922 case IFN_RINT: 923 case IFN_ROUND: 924 case IFN_TRUNC: 925 return 1; 926 case IFN_ATAN2: 927 case IFN_COPYSIGN: 928 case IFN_FMOD: 929 case IFN_POW: 930 case IFN_REMAINDER: 931 case IFN_SCALB: 932 case IFN_LDEXP: 933 return 2; 934 case IFN_CLRSB: 935 case IFN_CLZ: 936 case IFN_CTZ: 937 case IFN_FFS: 938 case IFN_PARITY: 939 case IFN_POPCOUNT: 940 default: 941 /* As we produce sorry message for unknown internal functions, 942 reaching this label is definitely a bug. */ 943 gcc_unreachable (); 944 } 945} 946 947BrigType16_t 948hsa_internal_fn::get_argument_type (int n) 949{ 950 switch (m_fn) 951 { 952 case IFN_ACOS: 953 case IFN_ASIN: 954 case IFN_ATAN: 955 case IFN_COS: 956 case IFN_EXP: 957 case IFN_EXP10: 958 case IFN_EXP2: 959 case IFN_EXPM1: 960 case IFN_LOG: 961 case IFN_LOG10: 962 case IFN_LOG1P: 963 case IFN_LOG2: 964 case IFN_LOGB: 965 case IFN_SIGNIFICAND: 966 case IFN_SIN: 967 case IFN_SQRT: 968 case IFN_TAN: 969 case IFN_CEIL: 970 case IFN_FLOOR: 971 case IFN_NEARBYINT: 972 case IFN_RINT: 973 case IFN_ROUND: 974 case IFN_TRUNC: 975 case IFN_ATAN2: 976 case IFN_COPYSIGN: 977 case IFN_FMOD: 978 case IFN_POW: 979 case IFN_REMAINDER: 980 case IFN_SCALB: 981 return hsa_float_for_bitsize (m_type_bit_size); 982 case IFN_LDEXP: 983 { 984 if (n == -1 || n == 0) 985 return hsa_float_for_bitsize (m_type_bit_size); 986 else 987 return BRIG_TYPE_S32; 988 } 989 default: 990 /* As we produce sorry message for unknown internal functions, 991 reaching this label is definitely a bug. */ 992 gcc_unreachable (); 993 } 994} 995 996#include "gt-hsa-common.h" 997