NeonEmitter.cpp revision 263508
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This tablegen backend is responsible for emitting arm_neon.h, which includes 11// a declaration and definition of each function specified by the ARM NEON 12// compiler interface. See ARM document DUI0348B. 13// 14// Each NEON instruction is implemented in terms of 1 or more functions which 15// are suffixed with the element type of the input vectors. Functions may be 16// implemented in terms of generic vector operations such as +, *, -, etc. or 17// by calling a __builtin_-prefixed function which will be handled by clang's 18// CodeGen library. 19// 20// Additional validation code can be generated by this file when runHeader() is 21// called, rather than the normal run() entry point. A complete set of tests 22// for Neon intrinsics can be generated by calling the runTests() entry point. 23// 24//===----------------------------------------------------------------------===// 25 26#include "llvm/ADT/DenseMap.h" 27#include "llvm/ADT/SmallString.h" 28#include "llvm/ADT/SmallVector.h" 29#include "llvm/ADT/StringExtras.h" 30#include "llvm/ADT/StringMap.h" 31#include "llvm/Support/ErrorHandling.h" 32#include "llvm/TableGen/Error.h" 33#include "llvm/TableGen/Record.h" 34#include "llvm/TableGen/TableGenBackend.h" 35#include <string> 36using namespace llvm; 37 38enum OpKind { 39 OpNone, 40 OpUnavailable, 41 OpAdd, 42 OpAddl, 43 OpAddlHi, 44 OpAddw, 45 OpAddwHi, 46 OpSub, 47 OpSubl, 48 OpSublHi, 49 OpSubw, 50 OpSubwHi, 51 OpMul, 52 OpMla, 53 OpMlal, 54 OpMullHi, 55 OpMullHiN, 56 OpMlalHi, 57 OpMlalHiN, 58 OpMls, 59 OpMlsl, 60 OpMlslHi, 61 OpMlslHiN, 62 OpMulN, 63 OpMlaN, 64 OpMlsN, 65 OpFMlaN, 66 OpFMlsN, 67 OpMlalN, 68 OpMlslN, 69 OpMulLane, 70 OpMulXLane, 71 OpMullLane, 72 OpMullHiLane, 73 OpMlaLane, 74 OpMlsLane, 75 OpMlalLane, 76 OpMlalHiLane, 77 OpMlslLane, 78 OpMlslHiLane, 79 OpQDMullLane, 80 OpQDMullHiLane, 81 OpQDMlalLane, 82 OpQDMlalHiLane, 83 OpQDMlslLane, 84 OpQDMlslHiLane, 85 OpQDMulhLane, 86 OpQRDMulhLane, 87 OpFMSLane, 88 OpFMSLaneQ, 89 OpTrn1, 90 OpZip1, 91 OpUzp1, 92 OpTrn2, 93 OpZip2, 94 OpUzp2, 95 OpEq, 96 OpGe, 97 OpLe, 98 OpGt, 99 OpLt, 100 OpNeg, 101 OpNot, 102 OpAnd, 103 OpOr, 104 OpXor, 105 OpAndNot, 106 OpOrNot, 107 OpCast, 108 OpConcat, 109 OpDup, 110 OpDupLane, 111 OpHi, 112 OpLo, 113 OpSelect, 114 OpRev16, 115 OpRev32, 116 OpRev64, 117 OpXtnHi, 118 OpSqxtunHi, 119 OpQxtnHi, 120 OpFcvtnHi, 121 OpFcvtlHi, 122 OpFcvtxnHi, 123 OpReinterpret, 124 OpAddhnHi, 125 OpRAddhnHi, 126 OpSubhnHi, 127 OpRSubhnHi, 128 OpAbdl, 129 OpAbdlHi, 130 OpAba, 131 OpAbal, 132 OpAbalHi, 133 OpQDMullHi, 134 OpQDMullHiN, 135 OpQDMlalHi, 136 OpQDMlalHiN, 137 OpQDMlslHi, 138 OpQDMlslHiN, 139 OpDiv, 140 OpLongHi, 141 OpNarrowHi, 142 OpMovlHi, 143 OpCopyLane, 144 OpCopyQLane, 145 OpCopyLaneQ, 146 OpScalarMulLane, 147 OpScalarMulLaneQ, 148 OpScalarMulXLane, 149 OpScalarMulXLaneQ, 150 OpScalarVMulXLane, 151 OpScalarVMulXLaneQ, 152 OpScalarQDMullLane, 153 OpScalarQDMullLaneQ, 154 OpScalarQDMulHiLane, 155 OpScalarQDMulHiLaneQ, 156 OpScalarQRDMulHiLane, 157 OpScalarQRDMulHiLaneQ, 158 OpScalarGetLane, 159 OpScalarSetLane 160}; 161 162enum ClassKind { 163 ClassNone, 164 ClassI, // generic integer instruction, e.g., "i8" suffix 165 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix 166 ClassW, // width-specific instruction, e.g., "8" suffix 167 ClassB, // bitcast arguments with enum argument to specify type 168 ClassL, // Logical instructions which are op instructions 169 // but we need to not emit any suffix for in our 170 // tests. 171 ClassNoTest // Instructions which we do not test since they are 172 // not TRUE instructions. 173}; 174 175/// NeonTypeFlags - Flags to identify the types for overloaded Neon 176/// builtins. These must be kept in sync with the flags in 177/// include/clang/Basic/TargetBuiltins.h. 178namespace { 179class NeonTypeFlags { 180 enum { 181 EltTypeMask = 0xf, 182 UnsignedFlag = 0x10, 183 QuadFlag = 0x20 184 }; 185 uint32_t Flags; 186 187public: 188 enum EltType { 189 Int8, 190 Int16, 191 Int32, 192 Int64, 193 Poly8, 194 Poly16, 195 Poly64, 196 Float16, 197 Float32, 198 Float64 199 }; 200 201 NeonTypeFlags(unsigned F) : Flags(F) {} 202 NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) { 203 if (IsUnsigned) 204 Flags |= UnsignedFlag; 205 if (IsQuad) 206 Flags |= QuadFlag; 207 } 208 209 uint32_t getFlags() const { return Flags; } 210}; 211} // end anonymous namespace 212 213namespace { 214class NeonEmitter { 215 RecordKeeper &Records; 216 StringMap<OpKind> OpMap; 217 DenseMap<Record*, ClassKind> ClassMap; 218 219public: 220 NeonEmitter(RecordKeeper &R) : Records(R) { 221 OpMap["OP_NONE"] = OpNone; 222 OpMap["OP_UNAVAILABLE"] = OpUnavailable; 223 OpMap["OP_ADD"] = OpAdd; 224 OpMap["OP_ADDL"] = OpAddl; 225 OpMap["OP_ADDLHi"] = OpAddlHi; 226 OpMap["OP_ADDW"] = OpAddw; 227 OpMap["OP_ADDWHi"] = OpAddwHi; 228 OpMap["OP_SUB"] = OpSub; 229 OpMap["OP_SUBL"] = OpSubl; 230 OpMap["OP_SUBLHi"] = OpSublHi; 231 OpMap["OP_SUBW"] = OpSubw; 232 OpMap["OP_SUBWHi"] = OpSubwHi; 233 OpMap["OP_MUL"] = OpMul; 234 OpMap["OP_MLA"] = OpMla; 235 OpMap["OP_MLAL"] = OpMlal; 236 OpMap["OP_MULLHi"] = OpMullHi; 237 OpMap["OP_MULLHi_N"] = OpMullHiN; 238 OpMap["OP_MLALHi"] = OpMlalHi; 239 OpMap["OP_MLALHi_N"] = OpMlalHiN; 240 OpMap["OP_MLS"] = OpMls; 241 OpMap["OP_MLSL"] = OpMlsl; 242 OpMap["OP_MLSLHi"] = OpMlslHi; 243 OpMap["OP_MLSLHi_N"] = OpMlslHiN; 244 OpMap["OP_MUL_N"] = OpMulN; 245 OpMap["OP_MLA_N"] = OpMlaN; 246 OpMap["OP_MLS_N"] = OpMlsN; 247 OpMap["OP_FMLA_N"] = OpFMlaN; 248 OpMap["OP_FMLS_N"] = OpFMlsN; 249 OpMap["OP_MLAL_N"] = OpMlalN; 250 OpMap["OP_MLSL_N"] = OpMlslN; 251 OpMap["OP_MUL_LN"]= OpMulLane; 252 OpMap["OP_MULX_LN"]= OpMulXLane; 253 OpMap["OP_MULL_LN"] = OpMullLane; 254 OpMap["OP_MULLHi_LN"] = OpMullHiLane; 255 OpMap["OP_MLA_LN"]= OpMlaLane; 256 OpMap["OP_MLS_LN"]= OpMlsLane; 257 OpMap["OP_MLAL_LN"] = OpMlalLane; 258 OpMap["OP_MLALHi_LN"] = OpMlalHiLane; 259 OpMap["OP_MLSL_LN"] = OpMlslLane; 260 OpMap["OP_MLSLHi_LN"] = OpMlslHiLane; 261 OpMap["OP_QDMULL_LN"] = OpQDMullLane; 262 OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane; 263 OpMap["OP_QDMLAL_LN"] = OpQDMlalLane; 264 OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane; 265 OpMap["OP_QDMLSL_LN"] = OpQDMlslLane; 266 OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane; 267 OpMap["OP_QDMULH_LN"] = OpQDMulhLane; 268 OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane; 269 OpMap["OP_FMS_LN"] = OpFMSLane; 270 OpMap["OP_FMS_LNQ"] = OpFMSLaneQ; 271 OpMap["OP_TRN1"] = OpTrn1; 272 OpMap["OP_ZIP1"] = OpZip1; 273 OpMap["OP_UZP1"] = OpUzp1; 274 OpMap["OP_TRN2"] = OpTrn2; 275 OpMap["OP_ZIP2"] = OpZip2; 276 OpMap["OP_UZP2"] = OpUzp2; 277 OpMap["OP_EQ"] = OpEq; 278 OpMap["OP_GE"] = OpGe; 279 OpMap["OP_LE"] = OpLe; 280 OpMap["OP_GT"] = OpGt; 281 OpMap["OP_LT"] = OpLt; 282 OpMap["OP_NEG"] = OpNeg; 283 OpMap["OP_NOT"] = OpNot; 284 OpMap["OP_AND"] = OpAnd; 285 OpMap["OP_OR"] = OpOr; 286 OpMap["OP_XOR"] = OpXor; 287 OpMap["OP_ANDN"] = OpAndNot; 288 OpMap["OP_ORN"] = OpOrNot; 289 OpMap["OP_CAST"] = OpCast; 290 OpMap["OP_CONC"] = OpConcat; 291 OpMap["OP_HI"] = OpHi; 292 OpMap["OP_LO"] = OpLo; 293 OpMap["OP_DUP"] = OpDup; 294 OpMap["OP_DUP_LN"] = OpDupLane; 295 OpMap["OP_SEL"] = OpSelect; 296 OpMap["OP_REV16"] = OpRev16; 297 OpMap["OP_REV32"] = OpRev32; 298 OpMap["OP_REV64"] = OpRev64; 299 OpMap["OP_XTN"] = OpXtnHi; 300 OpMap["OP_SQXTUN"] = OpSqxtunHi; 301 OpMap["OP_QXTN"] = OpQxtnHi; 302 OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi; 303 OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi; 304 OpMap["OP_VCVTX_HI"] = OpFcvtxnHi; 305 OpMap["OP_REINT"] = OpReinterpret; 306 OpMap["OP_ADDHNHi"] = OpAddhnHi; 307 OpMap["OP_RADDHNHi"] = OpRAddhnHi; 308 OpMap["OP_SUBHNHi"] = OpSubhnHi; 309 OpMap["OP_RSUBHNHi"] = OpRSubhnHi; 310 OpMap["OP_ABDL"] = OpAbdl; 311 OpMap["OP_ABDLHi"] = OpAbdlHi; 312 OpMap["OP_ABA"] = OpAba; 313 OpMap["OP_ABAL"] = OpAbal; 314 OpMap["OP_ABALHi"] = OpAbalHi; 315 OpMap["OP_QDMULLHi"] = OpQDMullHi; 316 OpMap["OP_QDMULLHi_N"] = OpQDMullHiN; 317 OpMap["OP_QDMLALHi"] = OpQDMlalHi; 318 OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN; 319 OpMap["OP_QDMLSLHi"] = OpQDMlslHi; 320 OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN; 321 OpMap["OP_DIV"] = OpDiv; 322 OpMap["OP_LONG_HI"] = OpLongHi; 323 OpMap["OP_NARROW_HI"] = OpNarrowHi; 324 OpMap["OP_MOVL_HI"] = OpMovlHi; 325 OpMap["OP_COPY_LN"] = OpCopyLane; 326 OpMap["OP_COPYQ_LN"] = OpCopyQLane; 327 OpMap["OP_COPY_LNQ"] = OpCopyLaneQ; 328 OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane; 329 OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ; 330 OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane; 331 OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ; 332 OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane; 333 OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ; 334 OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane; 335 OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ; 336 OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane; 337 OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ; 338 OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane; 339 OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ; 340 OpMap["OP_SCALAR_GET_LN"] = OpScalarGetLane; 341 OpMap["OP_SCALAR_SET_LN"] = OpScalarSetLane; 342 343 Record *SI = R.getClass("SInst"); 344 Record *II = R.getClass("IInst"); 345 Record *WI = R.getClass("WInst"); 346 Record *SOpI = R.getClass("SOpInst"); 347 Record *IOpI = R.getClass("IOpInst"); 348 Record *WOpI = R.getClass("WOpInst"); 349 Record *LOpI = R.getClass("LOpInst"); 350 Record *NoTestOpI = R.getClass("NoTestOpInst"); 351 352 ClassMap[SI] = ClassS; 353 ClassMap[II] = ClassI; 354 ClassMap[WI] = ClassW; 355 ClassMap[SOpI] = ClassS; 356 ClassMap[IOpI] = ClassI; 357 ClassMap[WOpI] = ClassW; 358 ClassMap[LOpI] = ClassL; 359 ClassMap[NoTestOpI] = ClassNoTest; 360 } 361 362 // run - Emit arm_neon.h.inc 363 void run(raw_ostream &o); 364 365 // runHeader - Emit all the __builtin prototypes used in arm_neon.h 366 void runHeader(raw_ostream &o); 367 368 // runTests - Emit tests for all the Neon intrinsics. 369 void runTests(raw_ostream &o); 370 371private: 372 void emitIntrinsic(raw_ostream &OS, Record *R, 373 StringMap<ClassKind> &EmittedMap); 374 void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap, 375 bool isA64GenBuiltinDef); 376 void genOverloadTypeCheckCode(raw_ostream &OS, 377 StringMap<ClassKind> &A64IntrinsicMap, 378 bool isA64TypeCheck); 379 void genIntrinsicRangeCheckCode(raw_ostream &OS, 380 StringMap<ClassKind> &A64IntrinsicMap, 381 bool isA64RangeCheck); 382 void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, 383 bool isA64TestGen); 384}; 385} // end anonymous namespace 386 387/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs, 388/// which each StringRef representing a single type declared in the string. 389/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing 390/// 2xfloat and 4xfloat respectively. 391static void ParseTypes(Record *r, std::string &s, 392 SmallVectorImpl<StringRef> &TV) { 393 const char *data = s.data(); 394 int len = 0; 395 396 for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) { 397 if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U' 398 || data[len] == 'H' || data[len] == 'S') 399 continue; 400 401 switch (data[len]) { 402 case 'c': 403 case 's': 404 case 'i': 405 case 'l': 406 case 'h': 407 case 'f': 408 case 'd': 409 break; 410 default: 411 PrintFatalError(r->getLoc(), 412 "Unexpected letter: " + std::string(data + len, 1)); 413 } 414 TV.push_back(StringRef(data, len + 1)); 415 data += len + 1; 416 len = -1; 417 } 418} 419 420/// Widen - Convert a type code into the next wider type. char -> short, 421/// short -> int, etc. 422static char Widen(const char t) { 423 switch (t) { 424 case 'c': 425 return 's'; 426 case 's': 427 return 'i'; 428 case 'i': 429 return 'l'; 430 case 'h': 431 return 'f'; 432 case 'f': 433 return 'd'; 434 default: 435 PrintFatalError("unhandled type in widen!"); 436 } 437} 438 439/// Narrow - Convert a type code into the next smaller type. short -> char, 440/// float -> half float, etc. 441static char Narrow(const char t) { 442 switch (t) { 443 case 's': 444 return 'c'; 445 case 'i': 446 return 's'; 447 case 'l': 448 return 'i'; 449 case 'f': 450 return 'h'; 451 case 'd': 452 return 'f'; 453 default: 454 PrintFatalError("unhandled type in narrow!"); 455 } 456} 457 458static std::string GetNarrowTypestr(StringRef ty) 459{ 460 std::string s; 461 for (size_t i = 0, end = ty.size(); i < end; i++) { 462 switch (ty[i]) { 463 case 's': 464 s += 'c'; 465 break; 466 case 'i': 467 s += 's'; 468 break; 469 case 'l': 470 s += 'i'; 471 break; 472 default: 473 s += ty[i]; 474 break; 475 } 476 } 477 478 return s; 479} 480 481/// For a particular StringRef, return the base type code, and whether it has 482/// the quad-vector, polynomial, or unsigned modifiers set. 483static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) { 484 unsigned off = 0; 485 // ignore scalar. 486 if (ty[off] == 'S') { 487 ++off; 488 } 489 // remember quad. 490 if (ty[off] == 'Q' || ty[off] == 'H') { 491 quad = true; 492 ++off; 493 } 494 495 // remember poly. 496 if (ty[off] == 'P') { 497 poly = true; 498 ++off; 499 } 500 501 // remember unsigned. 502 if (ty[off] == 'U') { 503 usgn = true; 504 ++off; 505 } 506 507 // base type to get the type string for. 508 return ty[off]; 509} 510 511/// ModType - Transform a type code and its modifiers based on a mod code. The 512/// mod code definitions may be found at the top of arm_neon.td. 513static char ModType(const char mod, char type, bool &quad, bool &poly, 514 bool &usgn, bool &scal, bool &cnst, bool &pntr) { 515 switch (mod) { 516 case 't': 517 if (poly) { 518 poly = false; 519 usgn = true; 520 } 521 break; 522 case 'b': 523 scal = true; 524 case 'u': 525 usgn = true; 526 poly = false; 527 if (type == 'f') 528 type = 'i'; 529 if (type == 'd') 530 type = 'l'; 531 break; 532 case '$': 533 scal = true; 534 case 'x': 535 usgn = false; 536 poly = false; 537 if (type == 'f') 538 type = 'i'; 539 if (type == 'd') 540 type = 'l'; 541 break; 542 case 'o': 543 scal = true; 544 type = 'd'; 545 usgn = false; 546 break; 547 case 'y': 548 scal = true; 549 case 'f': 550 if (type == 'h') 551 quad = true; 552 type = 'f'; 553 usgn = false; 554 break; 555 case 'F': 556 type = 'd'; 557 usgn = false; 558 break; 559 case 'g': 560 quad = false; 561 break; 562 case 'B': 563 case 'C': 564 case 'D': 565 case 'j': 566 quad = true; 567 break; 568 case 'w': 569 type = Widen(type); 570 quad = true; 571 break; 572 case 'n': 573 type = Widen(type); 574 break; 575 case 'i': 576 type = 'i'; 577 scal = true; 578 break; 579 case 'l': 580 type = 'l'; 581 scal = true; 582 usgn = true; 583 break; 584 case 'z': 585 type = Narrow(type); 586 scal = true; 587 break; 588 case 'r': 589 type = Widen(type); 590 scal = true; 591 break; 592 case 's': 593 case 'a': 594 scal = true; 595 break; 596 case 'k': 597 quad = true; 598 break; 599 case 'c': 600 cnst = true; 601 case 'p': 602 pntr = true; 603 scal = true; 604 break; 605 case 'h': 606 type = Narrow(type); 607 if (type == 'h') 608 quad = false; 609 break; 610 case 'q': 611 type = Narrow(type); 612 quad = true; 613 break; 614 case 'e': 615 type = Narrow(type); 616 usgn = true; 617 break; 618 case 'm': 619 type = Narrow(type); 620 quad = false; 621 break; 622 default: 623 break; 624 } 625 return type; 626} 627 628static bool IsMultiVecProto(const char p) { 629 return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D')); 630} 631 632/// TypeString - for a modifier and type, generate the name of the typedef for 633/// that type. QUc -> uint8x8_t. 634static std::string TypeString(const char mod, StringRef typestr) { 635 bool quad = false; 636 bool poly = false; 637 bool usgn = false; 638 bool scal = false; 639 bool cnst = false; 640 bool pntr = false; 641 642 if (mod == 'v') 643 return "void"; 644 if (mod == 'i') 645 return "int"; 646 647 // base type to get the type string for. 648 char type = ClassifyType(typestr, quad, poly, usgn); 649 650 // Based on the modifying character, change the type and width if necessary. 651 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 652 653 SmallString<128> s; 654 655 if (usgn) 656 s.push_back('u'); 657 658 switch (type) { 659 case 'c': 660 s += poly ? "poly8" : "int8"; 661 if (scal) 662 break; 663 s += quad ? "x16" : "x8"; 664 break; 665 case 's': 666 s += poly ? "poly16" : "int16"; 667 if (scal) 668 break; 669 s += quad ? "x8" : "x4"; 670 break; 671 case 'i': 672 s += "int32"; 673 if (scal) 674 break; 675 s += quad ? "x4" : "x2"; 676 break; 677 case 'l': 678 s += (poly && !usgn)? "poly64" : "int64"; 679 if (scal) 680 break; 681 s += quad ? "x2" : "x1"; 682 break; 683 case 'h': 684 s += "float16"; 685 if (scal) 686 break; 687 s += quad ? "x8" : "x4"; 688 break; 689 case 'f': 690 s += "float32"; 691 if (scal) 692 break; 693 s += quad ? "x4" : "x2"; 694 break; 695 case 'd': 696 s += "float64"; 697 if (scal) 698 break; 699 s += quad ? "x2" : "x1"; 700 break; 701 702 default: 703 PrintFatalError("unhandled type!"); 704 } 705 706 if (mod == '2' || mod == 'B') 707 s += "x2"; 708 if (mod == '3' || mod == 'C') 709 s += "x3"; 710 if (mod == '4' || mod == 'D') 711 s += "x4"; 712 713 // Append _t, finishing the type string typedef type. 714 s += "_t"; 715 716 if (cnst) 717 s += " const"; 718 719 if (pntr) 720 s += " *"; 721 722 return s.str(); 723} 724 725/// BuiltinTypeString - for a modifier and type, generate the clang 726/// BuiltinsARM.def prototype code for the function. See the top of clang's 727/// Builtins.def for a description of the type strings. 728static std::string BuiltinTypeString(const char mod, StringRef typestr, 729 ClassKind ck, bool ret) { 730 bool quad = false; 731 bool poly = false; 732 bool usgn = false; 733 bool scal = false; 734 bool cnst = false; 735 bool pntr = false; 736 737 if (mod == 'v') 738 return "v"; // void 739 if (mod == 'i') 740 return "i"; // int 741 742 // base type to get the type string for. 743 char type = ClassifyType(typestr, quad, poly, usgn); 744 745 // Based on the modifying character, change the type and width if necessary. 746 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 747 748 // All pointers are void* pointers. Change type to 'v' now. 749 if (pntr) { 750 usgn = false; 751 poly = false; 752 type = 'v'; 753 } 754 // Treat half-float ('h') types as unsigned short ('s') types. 755 if (type == 'h') { 756 type = 's'; 757 usgn = true; 758 } 759 usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && 760 scal && type != 'f' && type != 'd'); 761 762 if (scal) { 763 SmallString<128> s; 764 765 if (usgn) 766 s.push_back('U'); 767 else if (type == 'c') 768 s.push_back('S'); // make chars explicitly signed 769 770 if (type == 'l') // 64-bit long 771 s += "LLi"; 772 else 773 s.push_back(type); 774 775 if (cnst) 776 s.push_back('C'); 777 if (pntr) 778 s.push_back('*'); 779 return s.str(); 780 } 781 782 // Since the return value must be one type, return a vector type of the 783 // appropriate width which we will bitcast. An exception is made for 784 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like 785 // fashion, storing them to a pointer arg. 786 if (ret) { 787 if (IsMultiVecProto(mod)) 788 return "vv*"; // void result with void* first argument 789 if (mod == 'f' || (ck != ClassB && type == 'f')) 790 return quad ? "V4f" : "V2f"; 791 if (mod == 'F' || (ck != ClassB && type == 'd')) 792 return quad ? "V2d" : "V1d"; 793 if (ck != ClassB && type == 's') 794 return quad ? "V8s" : "V4s"; 795 if (ck != ClassB && type == 'i') 796 return quad ? "V4i" : "V2i"; 797 if (ck != ClassB && type == 'l') 798 return quad ? "V2LLi" : "V1LLi"; 799 800 return quad ? "V16Sc" : "V8Sc"; 801 } 802 803 // Non-return array types are passed as individual vectors. 804 if (mod == '2' || mod == 'B') 805 return quad ? "V16ScV16Sc" : "V8ScV8Sc"; 806 if (mod == '3' || mod == 'C') 807 return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc"; 808 if (mod == '4' || mod == 'D') 809 return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc"; 810 811 if (mod == 'f' || (ck != ClassB && type == 'f')) 812 return quad ? "V4f" : "V2f"; 813 if (mod == 'F' || (ck != ClassB && type == 'd')) 814 return quad ? "V2d" : "V1d"; 815 if (ck != ClassB && type == 's') 816 return quad ? "V8s" : "V4s"; 817 if (ck != ClassB && type == 'i') 818 return quad ? "V4i" : "V2i"; 819 if (ck != ClassB && type == 'l') 820 return quad ? "V2LLi" : "V1LLi"; 821 822 return quad ? "V16Sc" : "V8Sc"; 823} 824 825/// InstructionTypeCode - Computes the ARM argument character code and 826/// quad status for a specific type string and ClassKind. 827static void InstructionTypeCode(const StringRef &typeStr, 828 const ClassKind ck, 829 bool &quad, 830 std::string &typeCode) { 831 bool poly = false; 832 bool usgn = false; 833 char type = ClassifyType(typeStr, quad, poly, usgn); 834 835 switch (type) { 836 case 'c': 837 switch (ck) { 838 case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break; 839 case ClassI: typeCode = "i8"; break; 840 case ClassW: typeCode = "8"; break; 841 default: break; 842 } 843 break; 844 case 's': 845 switch (ck) { 846 case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break; 847 case ClassI: typeCode = "i16"; break; 848 case ClassW: typeCode = "16"; break; 849 default: break; 850 } 851 break; 852 case 'i': 853 switch (ck) { 854 case ClassS: typeCode = usgn ? "u32" : "s32"; break; 855 case ClassI: typeCode = "i32"; break; 856 case ClassW: typeCode = "32"; break; 857 default: break; 858 } 859 break; 860 case 'l': 861 switch (ck) { 862 case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break; 863 case ClassI: typeCode = "i64"; break; 864 case ClassW: typeCode = "64"; break; 865 default: break; 866 } 867 break; 868 case 'h': 869 switch (ck) { 870 case ClassS: 871 case ClassI: typeCode = "f16"; break; 872 case ClassW: typeCode = "16"; break; 873 default: break; 874 } 875 break; 876 case 'f': 877 switch (ck) { 878 case ClassS: 879 case ClassI: typeCode = "f32"; break; 880 case ClassW: typeCode = "32"; break; 881 default: break; 882 } 883 break; 884 case 'd': 885 switch (ck) { 886 case ClassS: 887 case ClassI: 888 typeCode += "f64"; 889 break; 890 case ClassW: 891 PrintFatalError("unhandled type!"); 892 default: 893 break; 894 } 895 break; 896 default: 897 PrintFatalError("unhandled type!"); 898 } 899} 900 901static char Insert_BHSD_Suffix(StringRef typestr){ 902 unsigned off = 0; 903 if(typestr[off++] == 'S'){ 904 while(typestr[off] == 'Q' || typestr[off] == 'H'|| 905 typestr[off] == 'P' || typestr[off] == 'U') 906 ++off; 907 switch (typestr[off]){ 908 default : break; 909 case 'c' : return 'b'; 910 case 's' : return 'h'; 911 case 'i' : 912 case 'f' : return 's'; 913 case 'l' : 914 case 'd' : return 'd'; 915 } 916 } 917 return 0; 918} 919 920static bool endsWith_xN(std::string const &name) { 921 if (name.length() > 3) { 922 if (name.compare(name.length() - 3, 3, "_x2") == 0 || 923 name.compare(name.length() - 3, 3, "_x3") == 0 || 924 name.compare(name.length() - 3, 3, "_x4") == 0) 925 return true; 926 } 927 return false; 928} 929 930/// MangleName - Append a type or width suffix to a base neon function name, 931/// and insert a 'q' in the appropriate location if type string starts with 'Q'. 932/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc. 933/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used. 934static std::string MangleName(const std::string &name, StringRef typestr, 935 ClassKind ck) { 936 if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64" || 937 name == "vcvt_f64_f32") 938 return name; 939 940 bool quad = false; 941 std::string typeCode = ""; 942 943 InstructionTypeCode(typestr, ck, quad, typeCode); 944 945 std::string s = name; 946 947 if (typeCode.size() > 0) { 948 // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN. 949 if (endsWith_xN(s)) 950 s.insert(s.length() - 3, "_" + typeCode); 951 else 952 s += "_" + typeCode; 953 } 954 955 if (ck == ClassB) 956 s += "_v"; 957 958 // Insert a 'q' before the first '_' character so that it ends up before 959 // _lane or _n on vector-scalar operations. 960 if (typestr.find("Q") != StringRef::npos) { 961 size_t pos = s.find('_'); 962 s = s.insert(pos, "q"); 963 } 964 char ins = Insert_BHSD_Suffix(typestr); 965 if(ins){ 966 size_t pos = s.find('_'); 967 s = s.insert(pos, &ins, 1); 968 } 969 970 return s; 971} 972 973static void PreprocessInstruction(const StringRef &Name, 974 const std::string &InstName, 975 std::string &Prefix, 976 bool &HasNPostfix, 977 bool &HasLanePostfix, 978 bool &HasDupPostfix, 979 bool &IsSpecialVCvt, 980 size_t &TBNumber) { 981 // All of our instruction name fields from arm_neon.td are of the form 982 // <instructionname>_... 983 // Thus we grab our instruction name via computation of said Prefix. 984 const size_t PrefixEnd = Name.find_first_of('_'); 985 // If InstName is passed in, we use that instead of our name Prefix. 986 Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName; 987 988 const StringRef Postfix = Name.slice(PrefixEnd, Name.size()); 989 990 HasNPostfix = Postfix.count("_n"); 991 HasLanePostfix = Postfix.count("_lane"); 992 HasDupPostfix = Postfix.count("_dup"); 993 IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt"); 994 995 if (InstName.compare("vtbl") == 0 || 996 InstName.compare("vtbx") == 0) { 997 // If we have a vtblN/vtbxN instruction, use the instruction's ASCII 998 // encoding to get its true value. 999 TBNumber = Name[Name.size()-1] - 48; 1000 } 1001} 1002 1003/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have 1004/// extracted, generate a FileCheck pattern for a Load Or Store 1005static void 1006GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef, 1007 const std::string& OutTypeCode, 1008 const bool &IsQuad, 1009 const bool &HasDupPostfix, 1010 const bool &HasLanePostfix, 1011 const size_t Count, 1012 std::string &RegisterSuffix) { 1013 const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1"); 1014 // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang 1015 // will output a series of v{ld,st}1s, so we have to handle it specially. 1016 if ((Count == 3 || Count == 4) && IsQuad) { 1017 RegisterSuffix += "{"; 1018 for (size_t i = 0; i < Count; i++) { 1019 RegisterSuffix += "d{{[0-9]+}}"; 1020 if (HasDupPostfix) { 1021 RegisterSuffix += "[]"; 1022 } 1023 if (HasLanePostfix) { 1024 RegisterSuffix += "[{{[0-9]+}}]"; 1025 } 1026 if (i < Count-1) { 1027 RegisterSuffix += ", "; 1028 } 1029 } 1030 RegisterSuffix += "}"; 1031 } else { 1032 1033 // Handle normal loads and stores. 1034 RegisterSuffix += "{"; 1035 for (size_t i = 0; i < Count; i++) { 1036 RegisterSuffix += "d{{[0-9]+}}"; 1037 if (HasDupPostfix) { 1038 RegisterSuffix += "[]"; 1039 } 1040 if (HasLanePostfix) { 1041 RegisterSuffix += "[{{[0-9]+}}]"; 1042 } 1043 if (IsQuad && !HasLanePostfix) { 1044 RegisterSuffix += ", d{{[0-9]+}}"; 1045 if (HasDupPostfix) { 1046 RegisterSuffix += "[]"; 1047 } 1048 } 1049 if (i < Count-1) { 1050 RegisterSuffix += ", "; 1051 } 1052 } 1053 RegisterSuffix += "}, [r{{[0-9]+}}"; 1054 1055 // We only include the alignment hint if we have a vld1.*64 or 1056 // a dup/lane instruction. 1057 if (IsLDSTOne) { 1058 if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") { 1059 RegisterSuffix += ":" + OutTypeCode; 1060 } 1061 } 1062 1063 RegisterSuffix += "]"; 1064 } 1065} 1066 1067static bool HasNPostfixAndScalarArgs(const StringRef &NameRef, 1068 const bool &HasNPostfix) { 1069 return (NameRef.count("vmla") || 1070 NameRef.count("vmlal") || 1071 NameRef.count("vmlsl") || 1072 NameRef.count("vmull") || 1073 NameRef.count("vqdmlal") || 1074 NameRef.count("vqdmlsl") || 1075 NameRef.count("vqdmulh") || 1076 NameRef.count("vqdmull") || 1077 NameRef.count("vqrdmulh")) && HasNPostfix; 1078} 1079 1080static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef, 1081 const bool &HasLanePostfix) { 1082 return (NameRef.count("vmla") || 1083 NameRef.count("vmls") || 1084 NameRef.count("vmlal") || 1085 NameRef.count("vmlsl") || 1086 (NameRef.count("vmul") && NameRef.size() == 3)|| 1087 NameRef.count("vqdmlal") || 1088 NameRef.count("vqdmlsl") || 1089 NameRef.count("vqdmulh") || 1090 NameRef.count("vqrdmulh")) && HasLanePostfix; 1091} 1092 1093static bool IsSpecialLaneMultiply(const StringRef &NameRef, 1094 const bool &HasLanePostfix, 1095 const bool &IsQuad) { 1096 const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh")) 1097 && IsQuad; 1098 const bool IsVMull = NameRef.count("mull") && !IsQuad; 1099 return (IsVMulOrMulh || IsVMull) && HasLanePostfix; 1100} 1101 1102static void NormalizeProtoForRegisterPatternCreation(const std::string &Name, 1103 const std::string &Proto, 1104 const bool &HasNPostfix, 1105 const bool &IsQuad, 1106 const bool &HasLanePostfix, 1107 const bool &HasDupPostfix, 1108 std::string &NormedProto) { 1109 // Handle generic case. 1110 const StringRef NameRef(Name); 1111 for (size_t i = 0, end = Proto.size(); i < end; i++) { 1112 switch (Proto[i]) { 1113 case 'u': 1114 case 'f': 1115 case 'F': 1116 case 'd': 1117 case 's': 1118 case 'x': 1119 case 't': 1120 case 'n': 1121 NormedProto += IsQuad? 'q' : 'd'; 1122 break; 1123 case 'w': 1124 case 'k': 1125 NormedProto += 'q'; 1126 break; 1127 case 'g': 1128 case 'j': 1129 case 'h': 1130 case 'e': 1131 NormedProto += 'd'; 1132 break; 1133 case 'i': 1134 NormedProto += HasLanePostfix? 'a' : 'i'; 1135 break; 1136 case 'a': 1137 if (HasLanePostfix) { 1138 NormedProto += 'a'; 1139 } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) { 1140 NormedProto += IsQuad? 'q' : 'd'; 1141 } else { 1142 NormedProto += 'i'; 1143 } 1144 break; 1145 } 1146 } 1147 1148 // Handle Special Cases. 1149 const bool IsNotVExt = !NameRef.count("vext"); 1150 const bool IsVPADAL = NameRef.count("vpadal"); 1151 const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef, 1152 HasLanePostfix); 1153 const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix, 1154 IsQuad); 1155 1156 if (IsSpecialLaneMul) { 1157 // If 1158 NormedProto[2] = NormedProto[3]; 1159 NormedProto.erase(3); 1160 } else if (NormedProto.size() == 4 && 1161 NormedProto[0] == NormedProto[1] && 1162 IsNotVExt) { 1163 // If NormedProto.size() == 4 and the first two proto characters are the 1164 // same, ignore the first. 1165 NormedProto = NormedProto.substr(1, 3); 1166 } else if (Is5OpLaneAccum) { 1167 // If we have a 5 op lane accumulator operation, we take characters 1,2,4 1168 std::string tmp = NormedProto.substr(1,2); 1169 tmp += NormedProto[4]; 1170 NormedProto = tmp; 1171 } else if (IsVPADAL) { 1172 // If we have VPADAL, ignore the first character. 1173 NormedProto = NormedProto.substr(0, 2); 1174 } else if (NameRef.count("vdup") && NormedProto.size() > 2) { 1175 // If our instruction is a dup instruction, keep only the first and 1176 // last characters. 1177 std::string tmp = ""; 1178 tmp += NormedProto[0]; 1179 tmp += NormedProto[NormedProto.size()-1]; 1180 NormedProto = tmp; 1181 } 1182} 1183 1184/// GenerateRegisterCheckPatterns - Given a bunch of data we have 1185/// extracted, generate a FileCheck pattern to check that an 1186/// instruction's arguments are correct. 1187static void GenerateRegisterCheckPattern(const std::string &Name, 1188 const std::string &Proto, 1189 const std::string &OutTypeCode, 1190 const bool &HasNPostfix, 1191 const bool &IsQuad, 1192 const bool &HasLanePostfix, 1193 const bool &HasDupPostfix, 1194 const size_t &TBNumber, 1195 std::string &RegisterSuffix) { 1196 1197 RegisterSuffix = ""; 1198 1199 const StringRef NameRef(Name); 1200 const StringRef ProtoRef(Proto); 1201 1202 if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) { 1203 return; 1204 } 1205 1206 const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst"); 1207 const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx"); 1208 1209 if (IsLoadStore) { 1210 // Grab N value from v{ld,st}N using its ascii representation. 1211 const size_t Count = NameRef[3] - 48; 1212 1213 GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad, 1214 HasDupPostfix, HasLanePostfix, 1215 Count, RegisterSuffix); 1216 } else if (IsTBXOrTBL) { 1217 RegisterSuffix += "d{{[0-9]+}}, {"; 1218 for (size_t i = 0; i < TBNumber-1; i++) { 1219 RegisterSuffix += "d{{[0-9]+}}, "; 1220 } 1221 RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}"; 1222 } else { 1223 // Handle a normal instruction. 1224 if (NameRef.count("vget") || NameRef.count("vset")) 1225 return; 1226 1227 // We first normalize our proto, since we only need to emit 4 1228 // different types of checks, yet have more than 4 proto types 1229 // that map onto those 4 patterns. 1230 std::string NormalizedProto(""); 1231 NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad, 1232 HasLanePostfix, HasDupPostfix, 1233 NormalizedProto); 1234 1235 for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) { 1236 const char &c = NormalizedProto[i]; 1237 switch (c) { 1238 case 'q': 1239 RegisterSuffix += "q{{[0-9]+}}, "; 1240 break; 1241 1242 case 'd': 1243 RegisterSuffix += "d{{[0-9]+}}, "; 1244 break; 1245 1246 case 'i': 1247 RegisterSuffix += "#{{[0-9]+}}, "; 1248 break; 1249 1250 case 'a': 1251 RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], "; 1252 break; 1253 } 1254 } 1255 1256 // Remove extra ", ". 1257 RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2); 1258 } 1259} 1260 1261/// GenerateChecksForIntrinsic - Given a specific instruction name + 1262/// typestr + class kind, generate the proper set of FileCheck 1263/// Patterns to check for. We could just return a string, but instead 1264/// use a vector since it provides us with the extra flexibility of 1265/// emitting multiple checks, which comes in handy for certain cases 1266/// like mla where we want to check for 2 different instructions. 1267static void GenerateChecksForIntrinsic(const std::string &Name, 1268 const std::string &Proto, 1269 StringRef &OutTypeStr, 1270 StringRef &InTypeStr, 1271 ClassKind Ck, 1272 const std::string &InstName, 1273 bool IsHiddenLOp, 1274 std::vector<std::string>& Result) { 1275 1276 // If Ck is a ClassNoTest instruction, just return so no test is 1277 // emitted. 1278 if(Ck == ClassNoTest) 1279 return; 1280 1281 if (Name == "vcvt_f32_f16") { 1282 Result.push_back("vcvt.f32.f16"); 1283 return; 1284 } 1285 1286 1287 // Now we preprocess our instruction given the data we have to get the 1288 // data that we need. 1289 // Create a StringRef for String Manipulation of our Name. 1290 const StringRef NameRef(Name); 1291 // Instruction Prefix. 1292 std::string Prefix; 1293 // The type code for our out type string. 1294 std::string OutTypeCode; 1295 // To handle our different cases, we need to check for different postfixes. 1296 // Is our instruction a quad instruction. 1297 bool IsQuad = false; 1298 // Our instruction is of the form <instructionname>_n. 1299 bool HasNPostfix = false; 1300 // Our instruction is of the form <instructionname>_lane. 1301 bool HasLanePostfix = false; 1302 // Our instruction is of the form <instructionname>_dup. 1303 bool HasDupPostfix = false; 1304 // Our instruction is a vcvt instruction which requires special handling. 1305 bool IsSpecialVCvt = false; 1306 // If we have a vtbxN or vtblN instruction, this is set to N. 1307 size_t TBNumber = -1; 1308 // Register Suffix 1309 std::string RegisterSuffix; 1310 1311 PreprocessInstruction(NameRef, InstName, Prefix, 1312 HasNPostfix, HasLanePostfix, HasDupPostfix, 1313 IsSpecialVCvt, TBNumber); 1314 1315 InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode); 1316 GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad, 1317 HasLanePostfix, HasDupPostfix, TBNumber, 1318 RegisterSuffix); 1319 1320 // In the following section, we handle a bunch of special cases. You can tell 1321 // a special case by the fact we are returning early. 1322 1323 // If our instruction is a logical instruction without postfix or a 1324 // hidden LOp just return the current Prefix. 1325 if (Ck == ClassL || IsHiddenLOp) { 1326 Result.push_back(Prefix + " " + RegisterSuffix); 1327 return; 1328 } 1329 1330 // If we have a vmov, due to the many different cases, some of which 1331 // vary within the different intrinsics generated for a single 1332 // instruction type, just output a vmov. (e.g. given an instruction 1333 // A, A.u32 might be vmov and A.u8 might be vmov.8). 1334 // 1335 // FIXME: Maybe something can be done about this. The two cases that we care 1336 // about are vmov as an LType and vmov as a WType. 1337 if (Prefix == "vmov") { 1338 Result.push_back(Prefix + " " + RegisterSuffix); 1339 return; 1340 } 1341 1342 // In the following section, we handle special cases. 1343 1344 if (OutTypeCode == "64") { 1345 // If we have a 64 bit vdup/vext and are handling an uint64x1_t 1346 // type, the intrinsic will be optimized away, so just return 1347 // nothing. On the other hand if we are handling an uint64x2_t 1348 // (i.e. quad instruction), vdup/vmov instructions should be 1349 // emitted. 1350 if (Prefix == "vdup" || Prefix == "vext") { 1351 if (IsQuad) { 1352 Result.push_back("{{vmov|vdup}}"); 1353 } 1354 return; 1355 } 1356 1357 // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with 1358 // multiple register operands. 1359 bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3" 1360 || Prefix == "vld4"; 1361 bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3" 1362 || Prefix == "vst4"; 1363 if (MultiLoadPrefix || MultiStorePrefix) { 1364 Result.push_back(NameRef.slice(0, 3).str() + "1.64"); 1365 return; 1366 } 1367 1368 // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of 1369 // emitting said instructions. So return a check for 1370 // vldr/vstr/vmov/str instead. 1371 if (HasLanePostfix || HasDupPostfix) { 1372 if (Prefix == "vst1") { 1373 Result.push_back("{{str|vstr|vmov}}"); 1374 return; 1375 } else if (Prefix == "vld1") { 1376 Result.push_back("{{ldr|vldr|vmov}}"); 1377 return; 1378 } 1379 } 1380 } 1381 1382 // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are 1383 // sometimes disassembled as vtrn.32. We use a regex to handle both 1384 // cases. 1385 if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") { 1386 Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix); 1387 return; 1388 } 1389 1390 // Currently on most ARM processors, we do not use vmla/vmls for 1391 // quad floating point operations. Instead we output vmul + vadd. So 1392 // check if we have one of those instructions and just output a 1393 // check for vmul. 1394 if (OutTypeCode == "f32") { 1395 if (Prefix == "vmls") { 1396 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1397 Result.push_back("vsub." + OutTypeCode); 1398 return; 1399 } else if (Prefix == "vmla") { 1400 Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix); 1401 Result.push_back("vadd." + OutTypeCode); 1402 return; 1403 } 1404 } 1405 1406 // If we have vcvt, get the input type from the instruction name 1407 // (which should be of the form instname_inputtype) and append it 1408 // before the output type. 1409 if (Prefix == "vcvt") { 1410 const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1); 1411 Prefix += "." + inTypeCode; 1412 } 1413 1414 // Append output type code to get our final mangled instruction. 1415 Prefix += "." + OutTypeCode; 1416 1417 Result.push_back(Prefix + " " + RegisterSuffix); 1418} 1419 1420/// UseMacro - Examine the prototype string to determine if the intrinsic 1421/// should be defined as a preprocessor macro instead of an inline function. 1422static bool UseMacro(const std::string &proto) { 1423 // If this builtin takes an immediate argument, we need to #define it rather 1424 // than use a standard declaration, so that SemaChecking can range check 1425 // the immediate passed by the user. 1426 if (proto.find('i') != std::string::npos) 1427 return true; 1428 1429 // Pointer arguments need to use macros to avoid hiding aligned attributes 1430 // from the pointer type. 1431 if (proto.find('p') != std::string::npos || 1432 proto.find('c') != std::string::npos) 1433 return true; 1434 1435 return false; 1436} 1437 1438/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is 1439/// defined as a macro should be accessed directly instead of being first 1440/// assigned to a local temporary. 1441static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) { 1442 // True for constant ints (i), pointers (p) and const pointers (c). 1443 return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c'); 1444} 1445 1446// Generate the string "(argtype a, argtype b, ...)" 1447static std::string GenArgs(const std::string &proto, StringRef typestr, 1448 const std::string &name) { 1449 bool define = UseMacro(proto); 1450 char arg = 'a'; 1451 1452 std::string s; 1453 s += "("; 1454 1455 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1456 if (define) { 1457 // Some macro arguments are used directly instead of being assigned 1458 // to local temporaries; prepend an underscore prefix to make their 1459 // names consistent with the local temporaries. 1460 if (MacroArgUsedDirectly(proto, i)) 1461 s += "__"; 1462 } else { 1463 s += TypeString(proto[i], typestr) + " __"; 1464 } 1465 s.push_back(arg); 1466 //To avoid argument being multiple defined, add extra number for renaming. 1467 if (name == "vcopy_lane" || name == "vcopy_laneq") 1468 s.push_back('1'); 1469 if ((i + 1) < e) 1470 s += ", "; 1471 } 1472 1473 s += ")"; 1474 return s; 1475} 1476 1477// Macro arguments are not type-checked like inline function arguments, so 1478// assign them to local temporaries to get the right type checking. 1479static std::string GenMacroLocals(const std::string &proto, StringRef typestr, 1480 const std::string &name ) { 1481 char arg = 'a'; 1482 std::string s; 1483 bool generatedLocal = false; 1484 1485 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 1486 // Do not create a temporary for an immediate argument. 1487 // That would defeat the whole point of using a macro! 1488 if (MacroArgUsedDirectly(proto, i)) 1489 continue; 1490 generatedLocal = true; 1491 bool extranumber = false; 1492 if (name == "vcopy_lane" || name == "vcopy_laneq") 1493 extranumber = true; 1494 1495 s += TypeString(proto[i], typestr) + " __"; 1496 s.push_back(arg); 1497 if(extranumber) 1498 s.push_back('1'); 1499 s += " = ("; 1500 s.push_back(arg); 1501 if(extranumber) 1502 s.push_back('1'); 1503 s += "); "; 1504 } 1505 1506 if (generatedLocal) 1507 s += "\\\n "; 1508 return s; 1509} 1510 1511// Use the vmovl builtin to sign-extend or zero-extend a vector. 1512static std::string Extend(StringRef typestr, const std::string &a, bool h=0) { 1513 std::string s, high; 1514 high = h ? "_high" : ""; 1515 s = MangleName("vmovl" + high, typestr, ClassS); 1516 s += "(" + a + ")"; 1517 return s; 1518} 1519 1520// Get the high 64-bit part of a vector 1521static std::string GetHigh(const std::string &a, StringRef typestr) { 1522 std::string s; 1523 s = MangleName("vget_high", typestr, ClassS); 1524 s += "(" + a + ")"; 1525 return s; 1526} 1527 1528// Gen operation with two operands and get high 64-bit for both of two operands. 1529static std::string Gen2OpWith2High(StringRef typestr, 1530 const std::string &op, 1531 const std::string &a, 1532 const std::string &b) { 1533 std::string s; 1534 std::string Op1 = GetHigh(a, typestr); 1535 std::string Op2 = GetHigh(b, typestr); 1536 s = MangleName(op, typestr, ClassS); 1537 s += "(" + Op1 + ", " + Op2 + ");"; 1538 return s; 1539} 1540 1541// Gen operation with three operands and get high 64-bit of the latter 1542// two operands. 1543static std::string Gen3OpWith2High(StringRef typestr, 1544 const std::string &op, 1545 const std::string &a, 1546 const std::string &b, 1547 const std::string &c) { 1548 std::string s; 1549 std::string Op1 = GetHigh(b, typestr); 1550 std::string Op2 = GetHigh(c, typestr); 1551 s = MangleName(op, typestr, ClassS); 1552 s += "(" + a + ", " + Op1 + ", " + Op2 + ");"; 1553 return s; 1554} 1555 1556// Gen combine operation by putting a on low 64-bit, and b on high 64-bit. 1557static std::string GenCombine(std::string typestr, 1558 const std::string &a, 1559 const std::string &b) { 1560 std::string s; 1561 s = MangleName("vcombine", typestr, ClassS); 1562 s += "(" + a + ", " + b + ")"; 1563 return s; 1564} 1565 1566static std::string Duplicate(unsigned nElts, StringRef typestr, 1567 const std::string &a) { 1568 std::string s; 1569 1570 s = "(" + TypeString('d', typestr) + "){ "; 1571 for (unsigned i = 0; i != nElts; ++i) { 1572 s += a; 1573 if ((i + 1) < nElts) 1574 s += ", "; 1575 } 1576 s += " }"; 1577 1578 return s; 1579} 1580 1581static std::string SplatLane(unsigned nElts, const std::string &vec, 1582 const std::string &lane) { 1583 std::string s = "__builtin_shufflevector(" + vec + ", " + vec; 1584 for (unsigned i = 0; i < nElts; ++i) 1585 s += ", " + lane; 1586 s += ")"; 1587 return s; 1588} 1589 1590static std::string RemoveHigh(const std::string &name) { 1591 std::string s = name; 1592 std::size_t found = s.find("_high_"); 1593 if (found == std::string::npos) 1594 PrintFatalError("name should contain \"_high_\" for high intrinsics"); 1595 s.replace(found, 5, ""); 1596 return s; 1597} 1598 1599static unsigned GetNumElements(StringRef typestr, bool &quad) { 1600 quad = false; 1601 bool dummy = false; 1602 char type = ClassifyType(typestr, quad, dummy, dummy); 1603 unsigned nElts = 0; 1604 switch (type) { 1605 case 'c': nElts = 8; break; 1606 case 's': nElts = 4; break; 1607 case 'i': nElts = 2; break; 1608 case 'l': nElts = 1; break; 1609 case 'h': nElts = 4; break; 1610 case 'f': nElts = 2; break; 1611 case 'd': 1612 nElts = 1; 1613 break; 1614 default: 1615 PrintFatalError("unhandled type!"); 1616 } 1617 if (quad) nElts <<= 1; 1618 return nElts; 1619} 1620 1621// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd. 1622static std::string GenOpString(const std::string &name, OpKind op, 1623 const std::string &proto, StringRef typestr) { 1624 bool quad; 1625 unsigned nElts = GetNumElements(typestr, quad); 1626 bool define = UseMacro(proto); 1627 1628 std::string ts = TypeString(proto[0], typestr); 1629 std::string s; 1630 if (!define) { 1631 s = "return "; 1632 } 1633 1634 switch(op) { 1635 case OpAdd: 1636 s += "__a + __b;"; 1637 break; 1638 case OpAddl: 1639 s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";"; 1640 break; 1641 case OpAddlHi: 1642 s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";"; 1643 break; 1644 case OpAddw: 1645 s += "__a + " + Extend(typestr, "__b") + ";"; 1646 break; 1647 case OpAddwHi: 1648 s += "__a + " + Extend(typestr, "__b", 1) + ";"; 1649 break; 1650 case OpSub: 1651 s += "__a - __b;"; 1652 break; 1653 case OpSubl: 1654 s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";"; 1655 break; 1656 case OpSublHi: 1657 s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";"; 1658 break; 1659 case OpSubw: 1660 s += "__a - " + Extend(typestr, "__b") + ";"; 1661 break; 1662 case OpSubwHi: 1663 s += "__a - " + Extend(typestr, "__b", 1) + ";"; 1664 break; 1665 case OpMulN: 1666 s += "__a * " + Duplicate(nElts, typestr, "__b") + ";"; 1667 break; 1668 case OpMulLane: 1669 s += "__a * " + SplatLane(nElts, "__b", "__c") + ";"; 1670 break; 1671 case OpMulXLane: 1672 s += MangleName("vmulx", typestr, ClassS) + "(__a, " + 1673 SplatLane(nElts, "__b", "__c") + ");"; 1674 break; 1675 case OpMul: 1676 s += "__a * __b;"; 1677 break; 1678 case OpFMlaN: 1679 s += MangleName("vfma", typestr, ClassS); 1680 s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");"; 1681 break; 1682 case OpFMlsN: 1683 s += MangleName("vfms", typestr, ClassS); 1684 s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");"; 1685 break; 1686 case OpMullLane: 1687 s += MangleName("vmull", typestr, ClassS) + "(__a, " + 1688 SplatLane(nElts, "__b", "__c") + ");"; 1689 break; 1690 case OpMullHiLane: 1691 s += MangleName("vmull", typestr, ClassS) + "(" + 1692 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; 1693 break; 1694 case OpMlaN: 1695 s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1696 break; 1697 case OpMlaLane: 1698 s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1699 break; 1700 case OpMla: 1701 s += "__a + (__b * __c);"; 1702 break; 1703 case OpMlalN: 1704 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1705 Duplicate(nElts, typestr, "__c") + ");"; 1706 break; 1707 case OpMlalLane: 1708 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1709 SplatLane(nElts, "__c", "__d") + ");"; 1710 break; 1711 case OpMlalHiLane: 1712 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" + 1713 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1714 break; 1715 case OpMlal: 1716 s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1717 break; 1718 case OpMullHi: 1719 s += Gen2OpWith2High(typestr, "vmull", "__a", "__b"); 1720 break; 1721 case OpMullHiN: 1722 s += MangleName("vmull_n", typestr, ClassS); 1723 s += "(" + GetHigh("__a", typestr) + ", __b);"; 1724 return s; 1725 case OpMlalHi: 1726 s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c"); 1727 break; 1728 case OpMlalHiN: 1729 s += MangleName("vmlal_n", typestr, ClassS); 1730 s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; 1731 return s; 1732 case OpMlsN: 1733 s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");"; 1734 break; 1735 case OpMlsLane: 1736 s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");"; 1737 break; 1738 case OpFMSLane: 1739 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 1740 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 1741 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 1742 s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; 1743 break; 1744 case OpFMSLaneQ: 1745 s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n "; 1746 s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n "; 1747 s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n "; 1748 s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);"; 1749 break; 1750 case OpMls: 1751 s += "__a - (__b * __c);"; 1752 break; 1753 case OpMlslN: 1754 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1755 Duplicate(nElts, typestr, "__c") + ");"; 1756 break; 1757 case OpMlslLane: 1758 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " + 1759 SplatLane(nElts, "__c", "__d") + ");"; 1760 break; 1761 case OpMlslHiLane: 1762 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" + 1763 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1764 break; 1765 case OpMlsl: 1766 s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);"; 1767 break; 1768 case OpMlslHi: 1769 s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c"); 1770 break; 1771 case OpMlslHiN: 1772 s += MangleName("vmlsl_n", typestr, ClassS); 1773 s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; 1774 break; 1775 case OpQDMullLane: 1776 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + 1777 SplatLane(nElts, "__b", "__c") + ");"; 1778 break; 1779 case OpQDMullHiLane: 1780 s += MangleName("vqdmull", typestr, ClassS) + "(" + 1781 GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");"; 1782 break; 1783 case OpQDMlalLane: 1784 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " + 1785 SplatLane(nElts, "__c", "__d") + ");"; 1786 break; 1787 case OpQDMlalHiLane: 1788 s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " + 1789 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1790 break; 1791 case OpQDMlslLane: 1792 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " + 1793 SplatLane(nElts, "__c", "__d") + ");"; 1794 break; 1795 case OpQDMlslHiLane: 1796 s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " + 1797 GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");"; 1798 break; 1799 case OpQDMulhLane: 1800 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + 1801 SplatLane(nElts, "__b", "__c") + ");"; 1802 break; 1803 case OpQRDMulhLane: 1804 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + 1805 SplatLane(nElts, "__b", "__c") + ");"; 1806 break; 1807 case OpEq: 1808 s += "(" + ts + ")(__a == __b);"; 1809 break; 1810 case OpGe: 1811 s += "(" + ts + ")(__a >= __b);"; 1812 break; 1813 case OpLe: 1814 s += "(" + ts + ")(__a <= __b);"; 1815 break; 1816 case OpGt: 1817 s += "(" + ts + ")(__a > __b);"; 1818 break; 1819 case OpLt: 1820 s += "(" + ts + ")(__a < __b);"; 1821 break; 1822 case OpNeg: 1823 s += " -__a;"; 1824 break; 1825 case OpNot: 1826 s += " ~__a;"; 1827 break; 1828 case OpAnd: 1829 s += "__a & __b;"; 1830 break; 1831 case OpOr: 1832 s += "__a | __b;"; 1833 break; 1834 case OpXor: 1835 s += "__a ^ __b;"; 1836 break; 1837 case OpAndNot: 1838 s += "__a & ~__b;"; 1839 break; 1840 case OpOrNot: 1841 s += "__a | ~__b;"; 1842 break; 1843 case OpCast: 1844 s += "(" + ts + ")__a;"; 1845 break; 1846 case OpConcat: 1847 s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a"; 1848 s += ", (int64x1_t)__b, 0, 1);"; 1849 break; 1850 case OpHi: 1851 // nElts is for the result vector, so the source is twice that number. 1852 s += "__builtin_shufflevector(__a, __a"; 1853 for (unsigned i = nElts; i < nElts * 2; ++i) 1854 s += ", " + utostr(i); 1855 s+= ");"; 1856 break; 1857 case OpLo: 1858 s += "__builtin_shufflevector(__a, __a"; 1859 for (unsigned i = 0; i < nElts; ++i) 1860 s += ", " + utostr(i); 1861 s+= ");"; 1862 break; 1863 case OpDup: 1864 s += Duplicate(nElts, typestr, "__a") + ";"; 1865 break; 1866 case OpDupLane: 1867 s += SplatLane(nElts, "__a", "__b") + ";"; 1868 break; 1869 case OpSelect: 1870 // ((0 & 1) | (~0 & 2)) 1871 s += "(" + ts + ")"; 1872 ts = TypeString(proto[1], typestr); 1873 s += "((__a & (" + ts + ")__b) | "; 1874 s += "(~__a & (" + ts + ")__c));"; 1875 break; 1876 case OpRev16: 1877 s += "__builtin_shufflevector(__a, __a"; 1878 for (unsigned i = 2; i <= nElts; i += 2) 1879 for (unsigned j = 0; j != 2; ++j) 1880 s += ", " + utostr(i - j - 1); 1881 s += ");"; 1882 break; 1883 case OpRev32: { 1884 unsigned WordElts = nElts >> (1 + (int)quad); 1885 s += "__builtin_shufflevector(__a, __a"; 1886 for (unsigned i = WordElts; i <= nElts; i += WordElts) 1887 for (unsigned j = 0; j != WordElts; ++j) 1888 s += ", " + utostr(i - j - 1); 1889 s += ");"; 1890 break; 1891 } 1892 case OpRev64: { 1893 unsigned DblWordElts = nElts >> (int)quad; 1894 s += "__builtin_shufflevector(__a, __a"; 1895 for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts) 1896 for (unsigned j = 0; j != DblWordElts; ++j) 1897 s += ", " + utostr(i - j - 1); 1898 s += ");"; 1899 break; 1900 } 1901 case OpXtnHi: { 1902 s = TypeString(proto[1], typestr) + " __a1 = " + 1903 MangleName("vmovn", typestr, ClassS) + "(__b);\n " + 1904 "return __builtin_shufflevector(__a, __a1"; 1905 for (unsigned i = 0; i < nElts * 4; ++i) 1906 s += ", " + utostr(i); 1907 s += ");"; 1908 break; 1909 } 1910 case OpSqxtunHi: { 1911 s = TypeString(proto[1], typestr) + " __a1 = " + 1912 MangleName("vqmovun", typestr, ClassS) + "(__b);\n " + 1913 "return __builtin_shufflevector(__a, __a1"; 1914 for (unsigned i = 0; i < nElts * 4; ++i) 1915 s += ", " + utostr(i); 1916 s += ");"; 1917 break; 1918 } 1919 case OpQxtnHi: { 1920 s = TypeString(proto[1], typestr) + " __a1 = " + 1921 MangleName("vqmovn", typestr, ClassS) + "(__b);\n " + 1922 "return __builtin_shufflevector(__a, __a1"; 1923 for (unsigned i = 0; i < nElts * 4; ++i) 1924 s += ", " + utostr(i); 1925 s += ");"; 1926 break; 1927 } 1928 case OpFcvtnHi: { 1929 std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16"; 1930 s = TypeString(proto[1], typestr) + " __a1 = " + 1931 MangleName(FName, typestr, ClassS) + "(__b);\n " + 1932 "return __builtin_shufflevector(__a, __a1"; 1933 for (unsigned i = 0; i < nElts * 4; ++i) 1934 s += ", " + utostr(i); 1935 s += ");"; 1936 break; 1937 } 1938 case OpFcvtlHi: { 1939 std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32"; 1940 s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) + 1941 ";\n return " + MangleName(FName, typestr, ClassS) + "(__a1);"; 1942 break; 1943 } 1944 case OpFcvtxnHi: { 1945 s = TypeString(proto[1], typestr) + " __a1 = " + 1946 MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n " + 1947 "return __builtin_shufflevector(__a, __a1"; 1948 for (unsigned i = 0; i < nElts * 4; ++i) 1949 s += ", " + utostr(i); 1950 s += ");"; 1951 break; 1952 } 1953 case OpUzp1: 1954 s += "__builtin_shufflevector(__a, __b"; 1955 for (unsigned i = 0; i < nElts; i++) 1956 s += ", " + utostr(2*i); 1957 s += ");"; 1958 break; 1959 case OpUzp2: 1960 s += "__builtin_shufflevector(__a, __b"; 1961 for (unsigned i = 0; i < nElts; i++) 1962 s += ", " + utostr(2*i+1); 1963 s += ");"; 1964 break; 1965 case OpZip1: 1966 s += "__builtin_shufflevector(__a, __b"; 1967 for (unsigned i = 0; i < (nElts/2); i++) 1968 s += ", " + utostr(i) + ", " + utostr(i+nElts); 1969 s += ");"; 1970 break; 1971 case OpZip2: 1972 s += "__builtin_shufflevector(__a, __b"; 1973 for (unsigned i = nElts/2; i < nElts; i++) 1974 s += ", " + utostr(i) + ", " + utostr(i+nElts); 1975 s += ");"; 1976 break; 1977 case OpTrn1: 1978 s += "__builtin_shufflevector(__a, __b"; 1979 for (unsigned i = 0; i < (nElts/2); i++) 1980 s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts); 1981 s += ");"; 1982 break; 1983 case OpTrn2: 1984 s += "__builtin_shufflevector(__a, __b"; 1985 for (unsigned i = 0; i < (nElts/2); i++) 1986 s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts); 1987 s += ");"; 1988 break; 1989 case OpAbdl: { 1990 std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)"; 1991 if (typestr[0] != 'U') { 1992 // vabd results are always unsigned and must be zero-extended. 1993 std::string utype = "U" + typestr.str(); 1994 s += "(" + TypeString(proto[0], typestr) + ")"; 1995 abd = "(" + TypeString('d', utype) + ")" + abd; 1996 s += Extend(utype, abd) + ";"; 1997 } else { 1998 s += Extend(typestr, abd) + ";"; 1999 } 2000 break; 2001 } 2002 case OpAbdlHi: 2003 s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b"); 2004 break; 2005 case OpAddhnHi: { 2006 std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)"; 2007 s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn); 2008 s += ";"; 2009 break; 2010 } 2011 case OpRAddhnHi: { 2012 std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)"; 2013 s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn); 2014 s += ";"; 2015 break; 2016 } 2017 case OpSubhnHi: { 2018 std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)"; 2019 s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn); 2020 s += ";"; 2021 break; 2022 } 2023 case OpRSubhnHi: { 2024 std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)"; 2025 s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn); 2026 s += ";"; 2027 break; 2028 } 2029 case OpAba: 2030 s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);"; 2031 break; 2032 case OpAbal: 2033 s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);"; 2034 break; 2035 case OpAbalHi: 2036 s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c"); 2037 break; 2038 case OpQDMullHi: 2039 s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b"); 2040 break; 2041 case OpQDMullHiN: 2042 s += MangleName("vqdmull_n", typestr, ClassS); 2043 s += "(" + GetHigh("__a", typestr) + ", __b);"; 2044 return s; 2045 case OpQDMlalHi: 2046 s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c"); 2047 break; 2048 case OpQDMlalHiN: 2049 s += MangleName("vqdmlal_n", typestr, ClassS); 2050 s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; 2051 return s; 2052 case OpQDMlslHi: 2053 s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c"); 2054 break; 2055 case OpQDMlslHiN: 2056 s += MangleName("vqdmlsl_n", typestr, ClassS); 2057 s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; 2058 return s; 2059 case OpDiv: 2060 s += "__a / __b;"; 2061 break; 2062 case OpMovlHi: { 2063 s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 2064 MangleName("vget_high", typestr, ClassS) + "(__a);\n " + s; 2065 s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS); 2066 s += "(__a1, 0);"; 2067 break; 2068 } 2069 case OpLongHi: { 2070 // Another local variable __a1 is needed for calling a Macro, 2071 // or using __a will have naming conflict when Macro expanding. 2072 s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " + 2073 MangleName("vget_high", typestr, ClassS) + "(__a); \\\n"; 2074 s += " (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) + 2075 "(__a1, __b);"; 2076 break; 2077 } 2078 case OpNarrowHi: { 2079 s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " + 2080 MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));"; 2081 break; 2082 } 2083 case OpCopyLane: { 2084 s += TypeString('s', typestr) + " __c2 = " + 2085 MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n " + 2086 MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);"; 2087 break; 2088 } 2089 case OpCopyQLane: { 2090 std::string typeCode = ""; 2091 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2092 s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode + 2093 "(__c1, __d1); \\\n vsetq_lane_" + typeCode + "(__c2, __a1, __b1);"; 2094 break; 2095 } 2096 case OpCopyLaneQ: { 2097 std::string typeCode = ""; 2098 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2099 s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode + 2100 "(__c1, __d1); \\\n vset_lane_" + typeCode + "(__c2, __a1, __b1);"; 2101 break; 2102 } 2103 case OpScalarMulLane: { 2104 std::string typeCode = ""; 2105 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2106 s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode + 2107 "(__b, __c);\\\n __a * __d1;"; 2108 break; 2109 } 2110 case OpScalarMulLaneQ: { 2111 std::string typeCode = ""; 2112 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2113 s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode + 2114 "(__b, __c);\\\n __a * __d1;"; 2115 break; 2116 } 2117 case OpScalarMulXLane: { 2118 bool dummy = false; 2119 char type = ClassifyType(typestr, dummy, dummy, dummy); 2120 if (type == 'f') type = 's'; 2121 std::string typeCode = ""; 2122 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2123 s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode + 2124 "(__b, __c);\\\n vmulx" + type + "_" + 2125 typeCode + "(__a, __d1);"; 2126 break; 2127 } 2128 case OpScalarMulXLaneQ: { 2129 bool dummy = false; 2130 char type = ClassifyType(typestr, dummy, dummy, dummy); 2131 if (type == 'f') type = 's'; 2132 std::string typeCode = ""; 2133 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2134 s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + 2135 typeCode + "(__b, __c);\\\n vmulx" + type + 2136 "_" + typeCode + "(__a, __d1);"; 2137 break; 2138 } 2139 2140 case OpScalarVMulXLane: { 2141 bool dummy = false; 2142 char type = ClassifyType(typestr, dummy, dummy, dummy); 2143 if (type == 'f') type = 's'; 2144 std::string typeCode = ""; 2145 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2146 s += TypeString('s', typestr) + " __d1 = vget_lane_" + 2147 typeCode + "(__a, 0);\\\n" + 2148 " " + TypeString('s', typestr) + " __e1 = vget_lane_" + 2149 typeCode + "(__b, __c);\\\n" + 2150 " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" + 2151 typeCode + "(__d1, __e1);\\\n" + 2152 " " + TypeString('d', typestr) + " __g1;\\\n" + 2153 " vset_lane_" + typeCode + "(__f1, __g1, __c);"; 2154 break; 2155 } 2156 2157 case OpScalarVMulXLaneQ: { 2158 bool dummy = false; 2159 char type = ClassifyType(typestr, dummy, dummy, dummy); 2160 if (type == 'f') type = 's'; 2161 std::string typeCode = ""; 2162 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2163 s += TypeString('s', typestr) + " __d1 = vget_lane_" + 2164 typeCode + "(__a, 0);\\\n" + 2165 " " + TypeString('s', typestr) + " __e1 = vgetq_lane_" + 2166 typeCode + "(__b, __c);\\\n" + 2167 " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" + 2168 typeCode + "(__d1, __e1);\\\n" + 2169 " " + TypeString('d', typestr) + " __g1;\\\n" + 2170 " vset_lane_" + typeCode + "(__f1, __g1, 0);"; 2171 break; 2172 } 2173 case OpScalarQDMullLane: { 2174 std::string typeCode = ""; 2175 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2176 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + 2177 "vget_lane_" + typeCode + "(b, __c));"; 2178 break; 2179 } 2180 case OpScalarQDMullLaneQ: { 2181 std::string typeCode = ""; 2182 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2183 s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + 2184 "vgetq_lane_" + typeCode + "(b, __c));"; 2185 break; 2186 } 2187 case OpScalarQDMulHiLane: { 2188 std::string typeCode = ""; 2189 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2190 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + 2191 "vget_lane_" + typeCode + "(__b, __c));"; 2192 break; 2193 } 2194 case OpScalarQDMulHiLaneQ: { 2195 std::string typeCode = ""; 2196 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2197 s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " + 2198 "vgetq_lane_" + typeCode + "(__b, __c));"; 2199 break; 2200 } 2201 case OpScalarQRDMulHiLane: { 2202 std::string typeCode = ""; 2203 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2204 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + 2205 "vget_lane_" + typeCode + "(__b, __c));"; 2206 break; 2207 } 2208 case OpScalarQRDMulHiLaneQ: { 2209 std::string typeCode = ""; 2210 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2211 s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " + 2212 "vgetq_lane_" + typeCode + "(__b, __c));"; 2213 break; 2214 } 2215 case OpScalarGetLane:{ 2216 std::string typeCode = ""; 2217 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2218 if (quad) { 2219 s += "int16x8_t __a1 = vreinterpretq_s16_f16(__a);\\\n"; 2220 s += " vgetq_lane_s16(__a1, __b);"; 2221 } else { 2222 s += "int16x4_t __a1 = vreinterpret_s16_f16(__a);\\\n"; 2223 s += " vget_lane_s16(__a1, __b);"; 2224 } 2225 break; 2226 } 2227 case OpScalarSetLane:{ 2228 std::string typeCode = ""; 2229 InstructionTypeCode(typestr, ClassS, quad, typeCode); 2230 s += "int16_t __a1 = (int16_t)__a;\\\n"; 2231 if (quad) { 2232 s += " int16x8_t __b1 = vreinterpretq_s16_f16(b);\\\n"; 2233 s += " int16x8_t __b2 = vsetq_lane_s16(__a1, __b1, __c);\\\n"; 2234 s += " vreinterpretq_f16_s16(__b2);"; 2235 } else { 2236 s += " int16x4_t __b1 = vreinterpret_s16_f16(b);\\\n"; 2237 s += " int16x4_t __b2 = vset_lane_s16(__a1, __b1, __c);\\\n"; 2238 s += " vreinterpret_f16_s16(__b2);"; 2239 } 2240 break; 2241 } 2242 2243 default: 2244 PrintFatalError("unknown OpKind!"); 2245 } 2246 return s; 2247} 2248 2249static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { 2250 unsigned mod = proto[0]; 2251 2252 if (mod == 'v' || mod == 'f' || mod == 'F') 2253 mod = proto[1]; 2254 2255 bool quad = false; 2256 bool poly = false; 2257 bool usgn = false; 2258 bool scal = false; 2259 bool cnst = false; 2260 bool pntr = false; 2261 2262 // Base type to get the type string for. 2263 char type = ClassifyType(typestr, quad, poly, usgn); 2264 2265 // Based on the modifying character, change the type and width if necessary. 2266 type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr); 2267 2268 NeonTypeFlags::EltType ET; 2269 switch (type) { 2270 case 'c': 2271 ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8; 2272 break; 2273 case 's': 2274 ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16; 2275 break; 2276 case 'i': 2277 ET = NeonTypeFlags::Int32; 2278 break; 2279 case 'l': 2280 ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64; 2281 break; 2282 case 'h': 2283 ET = NeonTypeFlags::Float16; 2284 break; 2285 case 'f': 2286 ET = NeonTypeFlags::Float32; 2287 break; 2288 case 'd': 2289 ET = NeonTypeFlags::Float64; 2290 break; 2291 default: 2292 PrintFatalError("unhandled type!"); 2293 } 2294 NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g'); 2295 return Flags.getFlags(); 2296} 2297 2298// We don't check 'a' in this function, because for builtin function the 2299// argument matching to 'a' uses a vector type splatted from a scalar type. 2300static bool ProtoHasScalar(const std::string proto) 2301{ 2302 return (proto.find('s') != std::string::npos 2303 || proto.find('z') != std::string::npos 2304 || proto.find('r') != std::string::npos 2305 || proto.find('b') != std::string::npos 2306 || proto.find('$') != std::string::npos 2307 || proto.find('y') != std::string::npos 2308 || proto.find('o') != std::string::npos); 2309} 2310 2311// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a) 2312static std::string GenBuiltin(const std::string &name, const std::string &proto, 2313 StringRef typestr, ClassKind ck) { 2314 std::string s; 2315 2316 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit 2317 // sret-like argument. 2318 bool sret = IsMultiVecProto(proto[0]); 2319 2320 bool define = UseMacro(proto); 2321 2322 // Check if the prototype has a scalar operand with the type of the vector 2323 // elements. If not, bitcasting the args will take care of arg checking. 2324 // The actual signedness etc. will be taken care of with special enums. 2325 if (!ProtoHasScalar(proto)) 2326 ck = ClassB; 2327 2328 if (proto[0] != 'v') { 2329 std::string ts = TypeString(proto[0], typestr); 2330 2331 if (define) { 2332 if (sret) 2333 s += ts + " r; "; 2334 else 2335 s += "(" + ts + ")"; 2336 } else if (sret) { 2337 s += ts + " r; "; 2338 } else { 2339 s += "return (" + ts + ")"; 2340 } 2341 } 2342 2343 bool splat = proto.find('a') != std::string::npos; 2344 2345 s += "__builtin_neon_"; 2346 if (splat) { 2347 // Call the non-splat builtin: chop off the "_n" suffix from the name. 2348 std::string vname(name, 0, name.size()-2); 2349 s += MangleName(vname, typestr, ck); 2350 } else { 2351 s += MangleName(name, typestr, ck); 2352 } 2353 s += "("; 2354 2355 // Pass the address of the return variable as the first argument to sret-like 2356 // builtins. 2357 if (sret) 2358 s += "&r, "; 2359 2360 char arg = 'a'; 2361 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 2362 std::string args = std::string(&arg, 1); 2363 2364 // Use the local temporaries instead of the macro arguments. 2365 args = "__" + args; 2366 2367 bool argQuad = false; 2368 bool argPoly = false; 2369 bool argUsgn = false; 2370 bool argScalar = false; 2371 bool dummy = false; 2372 char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn); 2373 argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar, 2374 dummy, dummy); 2375 2376 // Handle multiple-vector values specially, emitting each subvector as an 2377 // argument to the __builtin. 2378 unsigned NumOfVec = 0; 2379 if (proto[i] >= '2' && proto[i] <= '4') { 2380 NumOfVec = proto[i] - '0'; 2381 } else if (proto[i] >= 'B' && proto[i] <= 'D') { 2382 NumOfVec = proto[i] - 'A' + 1; 2383 } 2384 2385 if (NumOfVec > 0) { 2386 // Check if an explicit cast is needed. 2387 if (argType != 'c' || argPoly || argUsgn) 2388 args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args; 2389 2390 for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) { 2391 s += args + ".val[" + utostr(vi) + "]"; 2392 if ((vi + 1) < ve) 2393 s += ", "; 2394 } 2395 if ((i + 1) < e) 2396 s += ", "; 2397 2398 continue; 2399 } 2400 2401 if (splat && (i + 1) == e) 2402 args = Duplicate(GetNumElements(typestr, argQuad), typestr, args); 2403 2404 // Check if an explicit cast is needed. 2405 if ((splat || !argScalar) && 2406 ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) { 2407 std::string argTypeStr = "c"; 2408 if (ck != ClassB) 2409 argTypeStr = argType; 2410 if (argQuad) 2411 argTypeStr = "Q" + argTypeStr; 2412 args = "(" + TypeString('d', argTypeStr) + ")" + args; 2413 } 2414 2415 s += args; 2416 if ((i + 1) < e) 2417 s += ", "; 2418 } 2419 2420 // Extra constant integer to hold type class enum for this function, e.g. s8 2421 if (ck == ClassB) 2422 s += ", " + utostr(GetNeonEnum(proto, typestr)); 2423 2424 s += ");"; 2425 2426 if (proto[0] != 'v' && sret) { 2427 if (define) 2428 s += " r;"; 2429 else 2430 s += " return r;"; 2431 } 2432 return s; 2433} 2434 2435static std::string GenBuiltinDef(const std::string &name, 2436 const std::string &proto, 2437 StringRef typestr, ClassKind ck) { 2438 std::string s("BUILTIN(__builtin_neon_"); 2439 2440 // If all types are the same size, bitcasting the args will take care 2441 // of arg checking. The actual signedness etc. will be taken care of with 2442 // special enums. 2443 if (!ProtoHasScalar(proto)) 2444 ck = ClassB; 2445 2446 s += MangleName(name, typestr, ck); 2447 s += ", \""; 2448 2449 for (unsigned i = 0, e = proto.size(); i != e; ++i) 2450 s += BuiltinTypeString(proto[i], typestr, ck, i == 0); 2451 2452 // Extra constant integer to hold type class enum for this function, e.g. s8 2453 if (ck == ClassB) 2454 s += "i"; 2455 2456 s += "\", \"n\")"; 2457 return s; 2458} 2459 2460static std::string GenIntrinsic(const std::string &name, 2461 const std::string &proto, 2462 StringRef outTypeStr, StringRef inTypeStr, 2463 OpKind kind, ClassKind classKind) { 2464 assert(!proto.empty() && ""); 2465 bool define = UseMacro(proto) && kind != OpUnavailable; 2466 std::string s; 2467 2468 // static always inline + return type 2469 if (define) 2470 s += "#define "; 2471 else 2472 s += "__ai " + TypeString(proto[0], outTypeStr) + " "; 2473 2474 // Function name with type suffix 2475 std::string mangledName = MangleName(name, outTypeStr, ClassS); 2476 if (outTypeStr != inTypeStr) { 2477 // If the input type is different (e.g., for vreinterpret), append a suffix 2478 // for the input type. String off a "Q" (quad) prefix so that MangleName 2479 // does not insert another "q" in the name. 2480 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 2481 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 2482 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 2483 } 2484 s += mangledName; 2485 2486 // Function arguments 2487 s += GenArgs(proto, inTypeStr, name); 2488 2489 // Definition. 2490 if (define) { 2491 s += " __extension__ ({ \\\n "; 2492 s += GenMacroLocals(proto, inTypeStr, name); 2493 } else if (kind == OpUnavailable) { 2494 s += " __attribute__((unavailable));\n"; 2495 return s; 2496 } else 2497 s += " {\n "; 2498 2499 if (kind != OpNone) 2500 s += GenOpString(name, kind, proto, outTypeStr); 2501 else 2502 s += GenBuiltin(name, proto, outTypeStr, classKind); 2503 if (define) 2504 s += " })"; 2505 else 2506 s += " }"; 2507 s += "\n"; 2508 return s; 2509} 2510 2511/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h 2512/// is comprised of type definitions and function declarations. 2513void NeonEmitter::run(raw_ostream &OS) { 2514 OS << 2515 "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------" 2516 "---===\n" 2517 " *\n" 2518 " * Permission is hereby granted, free of charge, to any person obtaining " 2519 "a copy\n" 2520 " * of this software and associated documentation files (the \"Software\")," 2521 " to deal\n" 2522 " * in the Software without restriction, including without limitation the " 2523 "rights\n" 2524 " * to use, copy, modify, merge, publish, distribute, sublicense, " 2525 "and/or sell\n" 2526 " * copies of the Software, and to permit persons to whom the Software is\n" 2527 " * furnished to do so, subject to the following conditions:\n" 2528 " *\n" 2529 " * The above copyright notice and this permission notice shall be " 2530 "included in\n" 2531 " * all copies or substantial portions of the Software.\n" 2532 " *\n" 2533 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, " 2534 "EXPRESS OR\n" 2535 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF " 2536 "MERCHANTABILITY,\n" 2537 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT " 2538 "SHALL THE\n" 2539 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR " 2540 "OTHER\n" 2541 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, " 2542 "ARISING FROM,\n" 2543 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER " 2544 "DEALINGS IN\n" 2545 " * THE SOFTWARE.\n" 2546 " *\n" 2547 " *===--------------------------------------------------------------------" 2548 "---===\n" 2549 " */\n\n"; 2550 2551 OS << "#ifndef __ARM_NEON_H\n"; 2552 OS << "#define __ARM_NEON_H\n\n"; 2553 2554 OS << "#if !defined(__ARM_NEON__) && !defined(__ARM_NEON)\n"; 2555 OS << "#error \"NEON support not enabled\"\n"; 2556 OS << "#endif\n\n"; 2557 2558 OS << "#include <stdint.h>\n\n"; 2559 2560 // Emit NEON-specific scalar typedefs. 2561 OS << "typedef float float32_t;\n"; 2562 OS << "typedef __fp16 float16_t;\n"; 2563 2564 OS << "#ifdef __aarch64__\n"; 2565 OS << "typedef double float64_t;\n"; 2566 OS << "#endif\n\n"; 2567 2568 // For now, signedness of polynomial types depends on target 2569 OS << "#ifdef __aarch64__\n"; 2570 OS << "typedef uint8_t poly8_t;\n"; 2571 OS << "typedef uint16_t poly16_t;\n"; 2572 OS << "typedef uint64_t poly64_t;\n"; 2573 OS << "#else\n"; 2574 OS << "typedef int8_t poly8_t;\n"; 2575 OS << "typedef int16_t poly16_t;\n"; 2576 OS << "#endif\n"; 2577 2578 // Emit Neon vector typedefs. 2579 std::string TypedefTypes( 2580 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl"); 2581 SmallVector<StringRef, 24> TDTypeVec; 2582 ParseTypes(0, TypedefTypes, TDTypeVec); 2583 2584 // Emit vector typedefs. 2585 bool isA64 = false; 2586 bool preinsert; 2587 bool postinsert; 2588 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2589 bool dummy, quad = false, poly = false; 2590 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2591 preinsert = false; 2592 postinsert = false; 2593 2594 if (type == 'd' || (type == 'l' && poly)) { 2595 preinsert = isA64? false: true; 2596 isA64 = true; 2597 } else { 2598 postinsert = isA64? true: false; 2599 isA64 = false; 2600 } 2601 if (postinsert) 2602 OS << "#endif\n"; 2603 if (preinsert) 2604 OS << "#ifdef __aarch64__\n"; 2605 2606 if (poly) 2607 OS << "typedef __attribute__((neon_polyvector_type("; 2608 else 2609 OS << "typedef __attribute__((neon_vector_type("; 2610 2611 unsigned nElts = GetNumElements(TDTypeVec[i], quad); 2612 OS << utostr(nElts) << "))) "; 2613 if (nElts < 10) 2614 OS << " "; 2615 2616 OS << TypeString('s', TDTypeVec[i]); 2617 OS << " " << TypeString('d', TDTypeVec[i]) << ";\n"; 2618 2619 } 2620 postinsert = isA64? true: false; 2621 if (postinsert) 2622 OS << "#endif\n"; 2623 OS << "\n"; 2624 2625 // Emit struct typedefs. 2626 isA64 = false; 2627 for (unsigned vi = 2; vi != 5; ++vi) { 2628 for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { 2629 bool dummy, quad = false, poly = false; 2630 char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); 2631 preinsert = false; 2632 postinsert = false; 2633 2634 if (type == 'd' || (type == 'l' && poly)) { 2635 preinsert = isA64? false: true; 2636 isA64 = true; 2637 } else { 2638 postinsert = isA64? true: false; 2639 isA64 = false; 2640 } 2641 if (postinsert) 2642 OS << "#endif\n"; 2643 if (preinsert) 2644 OS << "#ifdef __aarch64__\n"; 2645 2646 std::string ts = TypeString('d', TDTypeVec[i]); 2647 std::string vs = TypeString('0' + vi, TDTypeVec[i]); 2648 OS << "typedef struct " << vs << " {\n"; 2649 OS << " " << ts << " val"; 2650 OS << "[" << utostr(vi) << "]"; 2651 OS << ";\n} "; 2652 OS << vs << ";\n"; 2653 OS << "\n"; 2654 } 2655 } 2656 postinsert = isA64? true: false; 2657 if (postinsert) 2658 OS << "#endif\n"; 2659 OS << "\n"; 2660 2661 OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n"; 2662 2663 std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); 2664 2665 StringMap<ClassKind> EmittedMap; 2666 2667 // Emit vmovl, vmull and vabd intrinsics first so they can be used by other 2668 // intrinsics. (Some of the saturating multiply instructions are also 2669 // used to implement the corresponding "_lane" variants, but tablegen 2670 // sorts the records into alphabetical order so that the "_lane" variants 2671 // come after the intrinsics they use.) 2672 emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap); 2673 emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap); 2674 emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap); 2675 emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap); 2676 2677 // ARM intrinsics must be emitted before AArch64 intrinsics to ensure 2678 // common intrinsics appear only once in the output stream. 2679 // The check for uniquiness is done in emitIntrinsic. 2680 // Emit ARM intrinsics. 2681 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2682 Record *R = RV[i]; 2683 2684 // Skip AArch64 intrinsics; they will be emitted at the end. 2685 bool isA64 = R->getValueAsBit("isA64"); 2686 if (isA64) 2687 continue; 2688 2689 if (R->getName() != "VMOVL" && R->getName() != "VMULL" && 2690 R->getName() != "VABD") 2691 emitIntrinsic(OS, R, EmittedMap); 2692 } 2693 2694 // Emit AArch64-specific intrinsics. 2695 OS << "#ifdef __aarch64__\n"; 2696 2697 emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap); 2698 emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap); 2699 emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap); 2700 2701 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2702 Record *R = RV[i]; 2703 2704 // Skip ARM intrinsics already included above. 2705 bool isA64 = R->getValueAsBit("isA64"); 2706 if (!isA64) 2707 continue; 2708 2709 // Skip crypto temporarily, and will emit them all together at the end. 2710 bool isCrypto = R->getValueAsBit("isCrypto"); 2711 if (isCrypto) 2712 continue; 2713 2714 emitIntrinsic(OS, R, EmittedMap); 2715 } 2716 2717 OS << "#ifdef __ARM_FEATURE_CRYPTO\n"; 2718 2719 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2720 Record *R = RV[i]; 2721 2722 // Skip crypto temporarily, and will emit them all together at the end. 2723 bool isCrypto = R->getValueAsBit("isCrypto"); 2724 if (!isCrypto) 2725 continue; 2726 2727 emitIntrinsic(OS, R, EmittedMap); 2728 } 2729 2730 OS << "#endif\n\n"; 2731 2732 OS << "#endif\n\n"; 2733 2734 OS << "#undef __ai\n\n"; 2735 OS << "#endif /* __ARM_NEON_H */\n"; 2736} 2737 2738/// emitIntrinsic - Write out the arm_neon.h header file definitions for the 2739/// intrinsics specified by record R checking for intrinsic uniqueness. 2740void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R, 2741 StringMap<ClassKind> &EmittedMap) { 2742 std::string name = R->getValueAsString("Name"); 2743 std::string Proto = R->getValueAsString("Prototype"); 2744 std::string Types = R->getValueAsString("Types"); 2745 2746 SmallVector<StringRef, 16> TypeVec; 2747 ParseTypes(R, Types, TypeVec); 2748 2749 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 2750 2751 ClassKind classKind = ClassNone; 2752 if (R->getSuperClasses().size() >= 2) 2753 classKind = ClassMap[R->getSuperClasses()[1]]; 2754 if (classKind == ClassNone && kind == OpNone) 2755 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2756 2757 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2758 if (kind == OpReinterpret) { 2759 bool outQuad = false; 2760 bool dummy = false; 2761 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 2762 for (unsigned srcti = 0, srcte = TypeVec.size(); 2763 srcti != srcte; ++srcti) { 2764 bool inQuad = false; 2765 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 2766 if (srcti == ti || inQuad != outQuad) 2767 continue; 2768 std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], 2769 OpCast, ClassS); 2770 if (EmittedMap.count(s)) 2771 continue; 2772 EmittedMap[s] = ClassS; 2773 OS << s; 2774 } 2775 } else { 2776 std::string s = 2777 GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind); 2778 if (EmittedMap.count(s)) 2779 continue; 2780 EmittedMap[s] = classKind; 2781 OS << s; 2782 } 2783 } 2784 OS << "\n"; 2785} 2786 2787static unsigned RangeFromType(const char mod, StringRef typestr) { 2788 // base type to get the type string for. 2789 bool quad = false, dummy = false; 2790 char type = ClassifyType(typestr, quad, dummy, dummy); 2791 type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy); 2792 2793 switch (type) { 2794 case 'c': 2795 return (8 << (int)quad) - 1; 2796 case 'h': 2797 case 's': 2798 return (4 << (int)quad) - 1; 2799 case 'f': 2800 case 'i': 2801 return (2 << (int)quad) - 1; 2802 case 'd': 2803 case 'l': 2804 return (1 << (int)quad) - 1; 2805 default: 2806 PrintFatalError("unhandled type!"); 2807 } 2808} 2809 2810static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) { 2811 // base type to get the type string for. 2812 bool dummy = false; 2813 char type = ClassifyType(typestr, dummy, dummy, dummy); 2814 type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy); 2815 2816 switch (type) { 2817 case 'c': 2818 return 7; 2819 case 'h': 2820 case 's': 2821 return 15; 2822 case 'f': 2823 case 'i': 2824 return 31; 2825 case 'd': 2826 case 'l': 2827 return 63; 2828 default: 2829 PrintFatalError("unhandled type!"); 2830 } 2831} 2832 2833/// Generate the ARM and AArch64 intrinsic range checking code for 2834/// shift/lane immediates, checking for unique declarations. 2835void 2836NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, 2837 StringMap<ClassKind> &A64IntrinsicMap, 2838 bool isA64RangeCheck) { 2839 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2840 StringMap<OpKind> EmittedMap; 2841 2842 // Generate the intrinsic range checking code for shift/lane immediates. 2843 if (isA64RangeCheck) 2844 OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n"; 2845 else 2846 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; 2847 2848 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 2849 Record *R = RV[i]; 2850 2851 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 2852 if (k != OpNone) 2853 continue; 2854 2855 std::string name = R->getValueAsString("Name"); 2856 std::string Proto = R->getValueAsString("Prototype"); 2857 std::string Types = R->getValueAsString("Types"); 2858 std::string Rename = name + "@" + Proto; 2859 2860 // Functions with 'a' (the splat code) in the type prototype should not get 2861 // their own builtin as they use the non-splat variant. 2862 if (Proto.find('a') != std::string::npos) 2863 continue; 2864 2865 // Functions which do not have an immediate do not need to have range 2866 // checking code emitted. 2867 size_t immPos = Proto.find('i'); 2868 if (immPos == std::string::npos) 2869 continue; 2870 2871 SmallVector<StringRef, 16> TypeVec; 2872 ParseTypes(R, Types, TypeVec); 2873 2874 if (R->getSuperClasses().size() < 2) 2875 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 2876 2877 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 2878 if (!ProtoHasScalar(Proto)) 2879 ck = ClassB; 2880 2881 // Do not include AArch64 range checks if not generating code for AArch64. 2882 bool isA64 = R->getValueAsBit("isA64"); 2883 if (!isA64RangeCheck && isA64) 2884 continue; 2885 2886 // Include ARM range checks in AArch64 but only if ARM intrinsics are not 2887 // redefined by AArch64 to handle new types. 2888 if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { 2889 ClassKind &A64CK = A64IntrinsicMap[Rename]; 2890 if (A64CK == ck && ck != ClassNone) 2891 continue; 2892 } 2893 2894 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 2895 std::string namestr, shiftstr, rangestr; 2896 2897 if (R->getValueAsBit("isVCVT_N")) { 2898 // VCVT between floating- and fixed-point values takes an immediate 2899 // in the range [1, 32] for f32, or [1, 64] for f64. 2900 ck = ClassB; 2901 if (name.find("32") != std::string::npos) 2902 rangestr = "l = 1; u = 31"; // upper bound = l + u 2903 else if (name.find("64") != std::string::npos) 2904 rangestr = "l = 1; u = 63"; 2905 else 2906 PrintFatalError(R->getLoc(), 2907 "Fixed point convert name should contains \"32\" or \"64\""); 2908 2909 } else if (R->getValueAsBit("isScalarShift")) { 2910 // Right shifts have an 'r' in the name, left shifts do not. Convert 2911 // instructions have the same bounds and right shifts. 2912 if (name.find('r') != std::string::npos || 2913 name.find("cvt") != std::string::npos) 2914 rangestr = "l = 1; "; 2915 2916 unsigned upBound = RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]); 2917 // Narrow shift has half the upper bound 2918 if (R->getValueAsBit("isScalarNarrowShift")) 2919 upBound /= 2; 2920 2921 rangestr += "u = " + utostr(upBound); 2922 } else if (R->getValueAsBit("isShift")) { 2923 // Builtins which are overloaded by type will need to have their upper 2924 // bound computed at Sema time based on the type constant. 2925 shiftstr = ", true"; 2926 2927 // Right shifts have an 'r' in the name, left shifts do not. 2928 if (name.find('r') != std::string::npos) 2929 rangestr = "l = 1; "; 2930 2931 rangestr += "u = RFT(TV" + shiftstr + ")"; 2932 } else { 2933 // The immediate generally refers to a lane in the preceding argument. 2934 assert(immPos > 0 && "unexpected immediate operand"); 2935 rangestr = 2936 "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti])); 2937 } 2938 // Make sure cases appear only once by uniquing them in a string map. 2939 namestr = MangleName(name, TypeVec[ti], ck); 2940 if (EmittedMap.count(namestr)) 2941 continue; 2942 EmittedMap[namestr] = OpNone; 2943 2944 // Calculate the index of the immediate that should be range checked. 2945 unsigned immidx = 0; 2946 2947 // Builtins that return a struct of multiple vectors have an extra 2948 // leading arg for the struct return. 2949 if (IsMultiVecProto(Proto[0])) 2950 ++immidx; 2951 2952 // Add one to the index for each argument until we reach the immediate 2953 // to be checked. Structs of vectors are passed as multiple arguments. 2954 for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { 2955 switch (Proto[ii]) { 2956 default: 2957 immidx += 1; 2958 break; 2959 case '2': 2960 case 'B': 2961 immidx += 2; 2962 break; 2963 case '3': 2964 case 'C': 2965 immidx += 3; 2966 break; 2967 case '4': 2968 case 'D': 2969 immidx += 4; 2970 break; 2971 case 'i': 2972 ie = ii + 1; 2973 break; 2974 } 2975 } 2976 if (isA64RangeCheck) 2977 OS << "case AArch64::BI__builtin_neon_"; 2978 else 2979 OS << "case ARM::BI__builtin_neon_"; 2980 OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; " 2981 << rangestr << "; break;\n"; 2982 } 2983 } 2984 OS << "#endif\n\n"; 2985} 2986 2987/// Generate the ARM and AArch64 overloaded type checking code for 2988/// SemaChecking.cpp, checking for unique builtin declarations. 2989void 2990NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, 2991 StringMap<ClassKind> &A64IntrinsicMap, 2992 bool isA64TypeCheck) { 2993 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 2994 StringMap<OpKind> EmittedMap; 2995 2996 // Generate the overloaded type checking code for SemaChecking.cpp 2997 if (isA64TypeCheck) 2998 OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n"; 2999 else 3000 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; 3001 3002 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 3003 Record *R = RV[i]; 3004 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 3005 if (k != OpNone) 3006 continue; 3007 3008 std::string Proto = R->getValueAsString("Prototype"); 3009 std::string Types = R->getValueAsString("Types"); 3010 std::string name = R->getValueAsString("Name"); 3011 std::string Rename = name + "@" + Proto; 3012 3013 // Functions with 'a' (the splat code) in the type prototype should not get 3014 // their own builtin as they use the non-splat variant. 3015 if (Proto.find('a') != std::string::npos) 3016 continue; 3017 3018 // Functions which have a scalar argument cannot be overloaded, no need to 3019 // check them if we are emitting the type checking code. 3020 if (ProtoHasScalar(Proto)) 3021 continue; 3022 3023 SmallVector<StringRef, 16> TypeVec; 3024 ParseTypes(R, Types, TypeVec); 3025 3026 if (R->getSuperClasses().size() < 2) 3027 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 3028 3029 // Do not include AArch64 type checks if not generating code for AArch64. 3030 bool isA64 = R->getValueAsBit("isA64"); 3031 if (!isA64TypeCheck && isA64) 3032 continue; 3033 3034 // Include ARM type check in AArch64 but only if ARM intrinsics 3035 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr 3036 // redefined in AArch64 to handle an additional 2 x f64 type. 3037 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 3038 if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) { 3039 ClassKind &A64CK = A64IntrinsicMap[Rename]; 3040 if (A64CK == ck && ck != ClassNone) 3041 continue; 3042 } 3043 3044 int si = -1, qi = -1; 3045 uint64_t mask = 0, qmask = 0; 3046 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 3047 // Generate the switch case(s) for this builtin for the type validation. 3048 bool quad = false, poly = false, usgn = false; 3049 (void) ClassifyType(TypeVec[ti], quad, poly, usgn); 3050 3051 if (quad) { 3052 qi = ti; 3053 qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 3054 } else { 3055 si = ti; 3056 mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]); 3057 } 3058 } 3059 3060 // Check if the builtin function has a pointer or const pointer argument. 3061 int PtrArgNum = -1; 3062 bool HasConstPtr = false; 3063 for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) { 3064 char ArgType = Proto[arg]; 3065 if (ArgType == 'c') { 3066 HasConstPtr = true; 3067 PtrArgNum = arg - 1; 3068 break; 3069 } 3070 if (ArgType == 'p') { 3071 PtrArgNum = arg - 1; 3072 break; 3073 } 3074 } 3075 // For sret builtins, adjust the pointer argument index. 3076 if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0])) 3077 PtrArgNum += 1; 3078 3079 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup, 3080 // and vst1_lane intrinsics. Using a pointer to the vector element 3081 // type with one of those operations causes codegen to select an aligned 3082 // load/store instruction. If you want an unaligned operation, 3083 // the pointer argument needs to have less alignment than element type, 3084 // so just accept any pointer type. 3085 if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") { 3086 PtrArgNum = -1; 3087 HasConstPtr = false; 3088 } 3089 3090 if (mask) { 3091 if (isA64TypeCheck) 3092 OS << "case AArch64::BI__builtin_neon_"; 3093 else 3094 OS << "case ARM::BI__builtin_neon_"; 3095 OS << MangleName(name, TypeVec[si], ClassB) << ": mask = " 3096 << "0x" << utohexstr(mask) << "ULL"; 3097 if (PtrArgNum >= 0) 3098 OS << "; PtrArgNum = " << PtrArgNum; 3099 if (HasConstPtr) 3100 OS << "; HasConstPtr = true"; 3101 OS << "; break;\n"; 3102 } 3103 if (qmask) { 3104 if (isA64TypeCheck) 3105 OS << "case AArch64::BI__builtin_neon_"; 3106 else 3107 OS << "case ARM::BI__builtin_neon_"; 3108 OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = " 3109 << "0x" << utohexstr(qmask) << "ULL"; 3110 if (PtrArgNum >= 0) 3111 OS << "; PtrArgNum = " << PtrArgNum; 3112 if (HasConstPtr) 3113 OS << "; HasConstPtr = true"; 3114 OS << "; break;\n"; 3115 } 3116 } 3117 OS << "#endif\n\n"; 3118} 3119 3120/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def 3121/// declaration of builtins, checking for unique builtin declarations. 3122void NeonEmitter::genBuiltinsDef(raw_ostream &OS, 3123 StringMap<ClassKind> &A64IntrinsicMap, 3124 bool isA64GenBuiltinDef) { 3125 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 3126 StringMap<OpKind> EmittedMap; 3127 3128 // Generate BuiltinsARM.def and BuiltinsAArch64.def 3129 if (isA64GenBuiltinDef) 3130 OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n"; 3131 else 3132 OS << "#ifdef GET_NEON_BUILTINS\n"; 3133 3134 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 3135 Record *R = RV[i]; 3136 OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; 3137 if (k != OpNone) 3138 continue; 3139 3140 std::string Proto = R->getValueAsString("Prototype"); 3141 std::string name = R->getValueAsString("Name"); 3142 std::string Rename = name + "@" + Proto; 3143 3144 // Functions with 'a' (the splat code) in the type prototype should not get 3145 // their own builtin as they use the non-splat variant. 3146 if (Proto.find('a') != std::string::npos) 3147 continue; 3148 3149 std::string Types = R->getValueAsString("Types"); 3150 SmallVector<StringRef, 16> TypeVec; 3151 ParseTypes(R, Types, TypeVec); 3152 3153 if (R->getSuperClasses().size() < 2) 3154 PrintFatalError(R->getLoc(), "Builtin has no class kind"); 3155 3156 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 3157 3158 // Do not include AArch64 BUILTIN() macros if not generating 3159 // code for AArch64 3160 bool isA64 = R->getValueAsBit("isA64"); 3161 if (!isA64GenBuiltinDef && isA64) 3162 continue; 3163 3164 // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics 3165 // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr 3166 // redefined in AArch64 to handle an additional 2 x f64 type. 3167 if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) { 3168 ClassKind &A64CK = A64IntrinsicMap[Rename]; 3169 if (A64CK == ck && ck != ClassNone) 3170 continue; 3171 } 3172 3173 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 3174 // Generate the declaration for this builtin, ensuring 3175 // that each unique BUILTIN() macro appears only once in the output 3176 // stream. 3177 std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); 3178 if (EmittedMap.count(bd)) 3179 continue; 3180 3181 EmittedMap[bd] = OpNone; 3182 OS << bd << "\n"; 3183 } 3184 } 3185 OS << "#endif\n\n"; 3186} 3187 3188/// runHeader - Emit a file with sections defining: 3189/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. 3190/// 2. the SemaChecking code for the type overload checking. 3191/// 3. the SemaChecking code for validation of intrinsic immediate arguments. 3192void NeonEmitter::runHeader(raw_ostream &OS) { 3193 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 3194 3195 // build a map of AArch64 intriniscs to be used in uniqueness checks. 3196 StringMap<ClassKind> A64IntrinsicMap; 3197 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 3198 Record *R = RV[i]; 3199 3200 bool isA64 = R->getValueAsBit("isA64"); 3201 if (!isA64) 3202 continue; 3203 3204 ClassKind CK = ClassNone; 3205 if (R->getSuperClasses().size() >= 2) 3206 CK = ClassMap[R->getSuperClasses()[1]]; 3207 3208 std::string Name = R->getValueAsString("Name"); 3209 std::string Proto = R->getValueAsString("Prototype"); 3210 std::string Rename = Name + "@" + Proto; 3211 if (A64IntrinsicMap.count(Rename)) 3212 continue; 3213 A64IntrinsicMap[Rename] = CK; 3214 } 3215 3216 // Generate BuiltinsARM.def for ARM 3217 genBuiltinsDef(OS, A64IntrinsicMap, false); 3218 3219 // Generate BuiltinsAArch64.def for AArch64 3220 genBuiltinsDef(OS, A64IntrinsicMap, true); 3221 3222 // Generate ARM overloaded type checking code for SemaChecking.cpp 3223 genOverloadTypeCheckCode(OS, A64IntrinsicMap, false); 3224 3225 // Generate AArch64 overloaded type checking code for SemaChecking.cpp 3226 genOverloadTypeCheckCode(OS, A64IntrinsicMap, true); 3227 3228 // Generate ARM range checking code for shift/lane immediates. 3229 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false); 3230 3231 // Generate the AArch64 range checking code for shift/lane immediates. 3232 genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true); 3233} 3234 3235/// GenTest - Write out a test for the intrinsic specified by the name and 3236/// type strings, including the embedded patterns for FileCheck to match. 3237static std::string GenTest(const std::string &name, 3238 const std::string &proto, 3239 StringRef outTypeStr, StringRef inTypeStr, 3240 bool isShift, bool isHiddenLOp, 3241 ClassKind ck, const std::string &InstName, 3242 bool isA64, 3243 std::string & testFuncProto) { 3244 assert(!proto.empty() && ""); 3245 std::string s; 3246 3247 // Function name with type suffix 3248 std::string mangledName = MangleName(name, outTypeStr, ClassS); 3249 if (outTypeStr != inTypeStr) { 3250 // If the input type is different (e.g., for vreinterpret), append a suffix 3251 // for the input type. String off a "Q" (quad) prefix so that MangleName 3252 // does not insert another "q" in the name. 3253 unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0); 3254 StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff); 3255 mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); 3256 } 3257 3258 // todo: GenerateChecksForIntrinsic does not generate CHECK 3259 // for aarch64 instructions yet 3260 std::vector<std::string> FileCheckPatterns; 3261 if (!isA64) { 3262 GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName, 3263 isHiddenLOp, FileCheckPatterns); 3264 s+= "// CHECK_ARM: test_" + mangledName + "\n"; 3265 } 3266 s += "// CHECK_AARCH64: test_" + mangledName + "\n"; 3267 3268 // Emit the FileCheck patterns. 3269 // If for any reason we do not want to emit a check, mangledInst 3270 // will be the empty string. 3271 if (FileCheckPatterns.size()) { 3272 for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(), 3273 e = FileCheckPatterns.end(); 3274 i != e; 3275 ++i) { 3276 s += "// CHECK_ARM: " + *i + "\n"; 3277 } 3278 } 3279 3280 // Emit the start of the test function. 3281 3282 testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; 3283 char arg = 'a'; 3284 std::string comma; 3285 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 3286 // Do not create arguments for values that must be immediate constants. 3287 if (proto[i] == 'i') 3288 continue; 3289 testFuncProto += comma + TypeString(proto[i], inTypeStr) + " "; 3290 testFuncProto.push_back(arg); 3291 comma = ", "; 3292 } 3293 testFuncProto += ")"; 3294 3295 s+= testFuncProto; 3296 s+= " {\n "; 3297 3298 if (proto[0] != 'v') 3299 s += "return "; 3300 s += mangledName + "("; 3301 arg = 'a'; 3302 for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { 3303 if (proto[i] == 'i') { 3304 // For immediate operands, test the maximum value. 3305 if (isShift) 3306 s += "1"; // FIXME 3307 else 3308 // The immediate generally refers to a lane in the preceding argument. 3309 s += utostr(RangeFromType(proto[i-1], inTypeStr)); 3310 } else { 3311 s.push_back(arg); 3312 } 3313 if ((i + 1) < e) 3314 s += ", "; 3315 } 3316 s += ");\n}\n\n"; 3317 return s; 3318} 3319 3320/// Write out all intrinsic tests for the specified target, checking 3321/// for intrinsic test uniqueness. 3322void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, 3323 bool isA64GenTest) { 3324 if (isA64GenTest) 3325 OS << "#ifdef __aarch64__\n"; 3326 3327 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); 3328 for (unsigned i = 0, e = RV.size(); i != e; ++i) { 3329 Record *R = RV[i]; 3330 std::string name = R->getValueAsString("Name"); 3331 std::string Proto = R->getValueAsString("Prototype"); 3332 std::string Types = R->getValueAsString("Types"); 3333 bool isShift = R->getValueAsBit("isShift"); 3334 std::string InstName = R->getValueAsString("InstName"); 3335 bool isHiddenLOp = R->getValueAsBit("isHiddenLInst"); 3336 bool isA64 = R->getValueAsBit("isA64"); 3337 3338 // do not include AArch64 intrinsic test if not generating 3339 // code for AArch64 3340 if (!isA64GenTest && isA64) 3341 continue; 3342 3343 SmallVector<StringRef, 16> TypeVec; 3344 ParseTypes(R, Types, TypeVec); 3345 3346 ClassKind ck = ClassMap[R->getSuperClasses()[1]]; 3347 OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()]; 3348 if (kind == OpUnavailable) 3349 continue; 3350 for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { 3351 if (kind == OpReinterpret) { 3352 bool outQuad = false; 3353 bool dummy = false; 3354 (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy); 3355 for (unsigned srcti = 0, srcte = TypeVec.size(); 3356 srcti != srcte; ++srcti) { 3357 bool inQuad = false; 3358 (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); 3359 if (srcti == ti || inQuad != outQuad) 3360 continue; 3361 std::string testFuncProto; 3362 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], 3363 isShift, isHiddenLOp, ck, InstName, isA64, 3364 testFuncProto); 3365 if (EmittedMap.count(testFuncProto)) 3366 continue; 3367 EmittedMap[testFuncProto] = kind; 3368 OS << s << "\n"; 3369 } 3370 } else { 3371 std::string testFuncProto; 3372 std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, 3373 isHiddenLOp, ck, InstName, isA64, testFuncProto); 3374 if (EmittedMap.count(testFuncProto)) 3375 continue; 3376 EmittedMap[testFuncProto] = kind; 3377 OS << s << "\n"; 3378 } 3379 } 3380 } 3381 3382 if (isA64GenTest) 3383 OS << "#endif\n"; 3384} 3385/// runTests - Write out a complete set of tests for all of the Neon 3386/// intrinsics. 3387void NeonEmitter::runTests(raw_ostream &OS) { 3388 OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi " 3389 "apcs-gnu\\\n" 3390 "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n" 3391 "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n" 3392 "\n" 3393 "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n" 3394 "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n" 3395 "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n" 3396 "\n" 3397 "// REQUIRES: long_tests\n" 3398 "\n" 3399 "#include <arm_neon.h>\n" 3400 "\n"; 3401 3402 // ARM tests must be emitted before AArch64 tests to ensure 3403 // tests for intrinsics that are common to ARM and AArch64 3404 // appear only once in the output stream. 3405 // The check for uniqueness is done in genTargetTest. 3406 StringMap<OpKind> EmittedMap; 3407 3408 genTargetTest(OS, EmittedMap, false); 3409 3410 genTargetTest(OS, EmittedMap, true); 3411} 3412 3413namespace clang { 3414void EmitNeon(RecordKeeper &Records, raw_ostream &OS) { 3415 NeonEmitter(Records).run(OS); 3416} 3417void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { 3418 NeonEmitter(Records).runHeader(OS); 3419} 3420void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { 3421 NeonEmitter(Records).runTests(OS); 3422} 3423} // End namespace clang 3424