NeonEmitter.cpp revision 263508
1//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This tablegen backend is responsible for emitting arm_neon.h, which includes
11// a declaration and definition of each function specified by the ARM NEON
12// compiler interface.  See ARM document DUI0348B.
13//
14// Each NEON instruction is implemented in terms of 1 or more functions which
15// are suffixed with the element type of the input vectors.  Functions may be
16// implemented in terms of generic vector operations such as +, *, -, etc. or
17// by calling a __builtin_-prefixed function which will be handled by clang's
18// CodeGen library.
19//
20// Additional validation code can be generated by this file when runHeader() is
21// called, rather than the normal run() entry point.  A complete set of tests
22// for Neon intrinsics can be generated by calling the runTests() entry point.
23//
24//===----------------------------------------------------------------------===//
25
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/SmallString.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringExtras.h"
30#include "llvm/ADT/StringMap.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/TableGen/Error.h"
33#include "llvm/TableGen/Record.h"
34#include "llvm/TableGen/TableGenBackend.h"
35#include <string>
36using namespace llvm;
37
38enum OpKind {
39  OpNone,
40  OpUnavailable,
41  OpAdd,
42  OpAddl,
43  OpAddlHi,
44  OpAddw,
45  OpAddwHi,
46  OpSub,
47  OpSubl,
48  OpSublHi,
49  OpSubw,
50  OpSubwHi,
51  OpMul,
52  OpMla,
53  OpMlal,
54  OpMullHi,
55  OpMullHiN,
56  OpMlalHi,
57  OpMlalHiN,
58  OpMls,
59  OpMlsl,
60  OpMlslHi,
61  OpMlslHiN,
62  OpMulN,
63  OpMlaN,
64  OpMlsN,
65  OpFMlaN,
66  OpFMlsN,
67  OpMlalN,
68  OpMlslN,
69  OpMulLane,
70  OpMulXLane,
71  OpMullLane,
72  OpMullHiLane,
73  OpMlaLane,
74  OpMlsLane,
75  OpMlalLane,
76  OpMlalHiLane,
77  OpMlslLane,
78  OpMlslHiLane,
79  OpQDMullLane,
80  OpQDMullHiLane,
81  OpQDMlalLane,
82  OpQDMlalHiLane,
83  OpQDMlslLane,
84  OpQDMlslHiLane,
85  OpQDMulhLane,
86  OpQRDMulhLane,
87  OpFMSLane,
88  OpFMSLaneQ,
89  OpTrn1,
90  OpZip1,
91  OpUzp1,
92  OpTrn2,
93  OpZip2,
94  OpUzp2,
95  OpEq,
96  OpGe,
97  OpLe,
98  OpGt,
99  OpLt,
100  OpNeg,
101  OpNot,
102  OpAnd,
103  OpOr,
104  OpXor,
105  OpAndNot,
106  OpOrNot,
107  OpCast,
108  OpConcat,
109  OpDup,
110  OpDupLane,
111  OpHi,
112  OpLo,
113  OpSelect,
114  OpRev16,
115  OpRev32,
116  OpRev64,
117  OpXtnHi,
118  OpSqxtunHi,
119  OpQxtnHi,
120  OpFcvtnHi,
121  OpFcvtlHi,
122  OpFcvtxnHi,
123  OpReinterpret,
124  OpAddhnHi,
125  OpRAddhnHi,
126  OpSubhnHi,
127  OpRSubhnHi,
128  OpAbdl,
129  OpAbdlHi,
130  OpAba,
131  OpAbal,
132  OpAbalHi,
133  OpQDMullHi,
134  OpQDMullHiN,
135  OpQDMlalHi,
136  OpQDMlalHiN,
137  OpQDMlslHi,
138  OpQDMlslHiN,
139  OpDiv,
140  OpLongHi,
141  OpNarrowHi,
142  OpMovlHi,
143  OpCopyLane,
144  OpCopyQLane,
145  OpCopyLaneQ,
146  OpScalarMulLane,
147  OpScalarMulLaneQ,
148  OpScalarMulXLane,
149  OpScalarMulXLaneQ,
150  OpScalarVMulXLane,
151  OpScalarVMulXLaneQ,
152  OpScalarQDMullLane,
153  OpScalarQDMullLaneQ,
154  OpScalarQDMulHiLane,
155  OpScalarQDMulHiLaneQ,
156  OpScalarQRDMulHiLane,
157  OpScalarQRDMulHiLaneQ,
158  OpScalarGetLane,
159  OpScalarSetLane
160};
161
162enum ClassKind {
163  ClassNone,
164  ClassI,           // generic integer instruction, e.g., "i8" suffix
165  ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
166  ClassW,           // width-specific instruction, e.g., "8" suffix
167  ClassB,           // bitcast arguments with enum argument to specify type
168  ClassL,           // Logical instructions which are op instructions
169                    // but we need to not emit any suffix for in our
170                    // tests.
171  ClassNoTest       // Instructions which we do not test since they are
172                    // not TRUE instructions.
173};
174
175/// NeonTypeFlags - Flags to identify the types for overloaded Neon
176/// builtins.  These must be kept in sync with the flags in
177/// include/clang/Basic/TargetBuiltins.h.
178namespace {
179class NeonTypeFlags {
180  enum {
181    EltTypeMask = 0xf,
182    UnsignedFlag = 0x10,
183    QuadFlag = 0x20
184  };
185  uint32_t Flags;
186
187public:
188  enum EltType {
189    Int8,
190    Int16,
191    Int32,
192    Int64,
193    Poly8,
194    Poly16,
195    Poly64,
196    Float16,
197    Float32,
198    Float64
199  };
200
201  NeonTypeFlags(unsigned F) : Flags(F) {}
202  NeonTypeFlags(EltType ET, bool IsUnsigned, bool IsQuad) : Flags(ET) {
203    if (IsUnsigned)
204      Flags |= UnsignedFlag;
205    if (IsQuad)
206      Flags |= QuadFlag;
207  }
208
209  uint32_t getFlags() const { return Flags; }
210};
211} // end anonymous namespace
212
213namespace {
214class NeonEmitter {
215  RecordKeeper &Records;
216  StringMap<OpKind> OpMap;
217  DenseMap<Record*, ClassKind> ClassMap;
218
219public:
220  NeonEmitter(RecordKeeper &R) : Records(R) {
221    OpMap["OP_NONE"]  = OpNone;
222    OpMap["OP_UNAVAILABLE"] = OpUnavailable;
223    OpMap["OP_ADD"]   = OpAdd;
224    OpMap["OP_ADDL"]  = OpAddl;
225    OpMap["OP_ADDLHi"] = OpAddlHi;
226    OpMap["OP_ADDW"]  = OpAddw;
227    OpMap["OP_ADDWHi"] = OpAddwHi;
228    OpMap["OP_SUB"]   = OpSub;
229    OpMap["OP_SUBL"]  = OpSubl;
230    OpMap["OP_SUBLHi"] = OpSublHi;
231    OpMap["OP_SUBW"]  = OpSubw;
232    OpMap["OP_SUBWHi"] = OpSubwHi;
233    OpMap["OP_MUL"]   = OpMul;
234    OpMap["OP_MLA"]   = OpMla;
235    OpMap["OP_MLAL"]  = OpMlal;
236    OpMap["OP_MULLHi"]  = OpMullHi;
237    OpMap["OP_MULLHi_N"]  = OpMullHiN;
238    OpMap["OP_MLALHi"]  = OpMlalHi;
239    OpMap["OP_MLALHi_N"]  = OpMlalHiN;
240    OpMap["OP_MLS"]   = OpMls;
241    OpMap["OP_MLSL"]  = OpMlsl;
242    OpMap["OP_MLSLHi"] = OpMlslHi;
243    OpMap["OP_MLSLHi_N"] = OpMlslHiN;
244    OpMap["OP_MUL_N"] = OpMulN;
245    OpMap["OP_MLA_N"] = OpMlaN;
246    OpMap["OP_MLS_N"] = OpMlsN;
247    OpMap["OP_FMLA_N"] = OpFMlaN;
248    OpMap["OP_FMLS_N"] = OpFMlsN;
249    OpMap["OP_MLAL_N"] = OpMlalN;
250    OpMap["OP_MLSL_N"] = OpMlslN;
251    OpMap["OP_MUL_LN"]= OpMulLane;
252    OpMap["OP_MULX_LN"]= OpMulXLane;
253    OpMap["OP_MULL_LN"] = OpMullLane;
254    OpMap["OP_MULLHi_LN"] = OpMullHiLane;
255    OpMap["OP_MLA_LN"]= OpMlaLane;
256    OpMap["OP_MLS_LN"]= OpMlsLane;
257    OpMap["OP_MLAL_LN"] = OpMlalLane;
258    OpMap["OP_MLALHi_LN"] = OpMlalHiLane;
259    OpMap["OP_MLSL_LN"] = OpMlslLane;
260    OpMap["OP_MLSLHi_LN"] = OpMlslHiLane;
261    OpMap["OP_QDMULL_LN"] = OpQDMullLane;
262    OpMap["OP_QDMULLHi_LN"] = OpQDMullHiLane;
263    OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
264    OpMap["OP_QDMLALHi_LN"] = OpQDMlalHiLane;
265    OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
266    OpMap["OP_QDMLSLHi_LN"] = OpQDMlslHiLane;
267    OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
268    OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
269    OpMap["OP_FMS_LN"] = OpFMSLane;
270    OpMap["OP_FMS_LNQ"] = OpFMSLaneQ;
271    OpMap["OP_TRN1"]  = OpTrn1;
272    OpMap["OP_ZIP1"]  = OpZip1;
273    OpMap["OP_UZP1"]  = OpUzp1;
274    OpMap["OP_TRN2"]  = OpTrn2;
275    OpMap["OP_ZIP2"]  = OpZip2;
276    OpMap["OP_UZP2"]  = OpUzp2;
277    OpMap["OP_EQ"]    = OpEq;
278    OpMap["OP_GE"]    = OpGe;
279    OpMap["OP_LE"]    = OpLe;
280    OpMap["OP_GT"]    = OpGt;
281    OpMap["OP_LT"]    = OpLt;
282    OpMap["OP_NEG"]   = OpNeg;
283    OpMap["OP_NOT"]   = OpNot;
284    OpMap["OP_AND"]   = OpAnd;
285    OpMap["OP_OR"]    = OpOr;
286    OpMap["OP_XOR"]   = OpXor;
287    OpMap["OP_ANDN"]  = OpAndNot;
288    OpMap["OP_ORN"]   = OpOrNot;
289    OpMap["OP_CAST"]  = OpCast;
290    OpMap["OP_CONC"]  = OpConcat;
291    OpMap["OP_HI"]    = OpHi;
292    OpMap["OP_LO"]    = OpLo;
293    OpMap["OP_DUP"]   = OpDup;
294    OpMap["OP_DUP_LN"] = OpDupLane;
295    OpMap["OP_SEL"]   = OpSelect;
296    OpMap["OP_REV16"] = OpRev16;
297    OpMap["OP_REV32"] = OpRev32;
298    OpMap["OP_REV64"] = OpRev64;
299    OpMap["OP_XTN"] = OpXtnHi;
300    OpMap["OP_SQXTUN"] = OpSqxtunHi;
301    OpMap["OP_QXTN"] = OpQxtnHi;
302    OpMap["OP_VCVT_NA_HI"] = OpFcvtnHi;
303    OpMap["OP_VCVT_EX_HI"] = OpFcvtlHi;
304    OpMap["OP_VCVTX_HI"] = OpFcvtxnHi;
305    OpMap["OP_REINT"] = OpReinterpret;
306    OpMap["OP_ADDHNHi"] = OpAddhnHi;
307    OpMap["OP_RADDHNHi"] = OpRAddhnHi;
308    OpMap["OP_SUBHNHi"] = OpSubhnHi;
309    OpMap["OP_RSUBHNHi"] = OpRSubhnHi;
310    OpMap["OP_ABDL"]  = OpAbdl;
311    OpMap["OP_ABDLHi"] = OpAbdlHi;
312    OpMap["OP_ABA"]   = OpAba;
313    OpMap["OP_ABAL"]  = OpAbal;
314    OpMap["OP_ABALHi"] = OpAbalHi;
315    OpMap["OP_QDMULLHi"] = OpQDMullHi;
316    OpMap["OP_QDMULLHi_N"] = OpQDMullHiN;
317    OpMap["OP_QDMLALHi"] = OpQDMlalHi;
318    OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN;
319    OpMap["OP_QDMLSLHi"] = OpQDMlslHi;
320    OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN;
321    OpMap["OP_DIV"] = OpDiv;
322    OpMap["OP_LONG_HI"] = OpLongHi;
323    OpMap["OP_NARROW_HI"] = OpNarrowHi;
324    OpMap["OP_MOVL_HI"] = OpMovlHi;
325    OpMap["OP_COPY_LN"] = OpCopyLane;
326    OpMap["OP_COPYQ_LN"] = OpCopyQLane;
327    OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
328    OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane;
329    OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ;
330    OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane;
331    OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
332    OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
333    OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
334    OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane;
335    OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ;
336    OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane;
337    OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ;
338    OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane;
339    OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ;
340    OpMap["OP_SCALAR_GET_LN"] = OpScalarGetLane;
341    OpMap["OP_SCALAR_SET_LN"] = OpScalarSetLane;
342
343    Record *SI = R.getClass("SInst");
344    Record *II = R.getClass("IInst");
345    Record *WI = R.getClass("WInst");
346    Record *SOpI = R.getClass("SOpInst");
347    Record *IOpI = R.getClass("IOpInst");
348    Record *WOpI = R.getClass("WOpInst");
349    Record *LOpI = R.getClass("LOpInst");
350    Record *NoTestOpI = R.getClass("NoTestOpInst");
351
352    ClassMap[SI] = ClassS;
353    ClassMap[II] = ClassI;
354    ClassMap[WI] = ClassW;
355    ClassMap[SOpI] = ClassS;
356    ClassMap[IOpI] = ClassI;
357    ClassMap[WOpI] = ClassW;
358    ClassMap[LOpI] = ClassL;
359    ClassMap[NoTestOpI] = ClassNoTest;
360  }
361
362  // run - Emit arm_neon.h.inc
363  void run(raw_ostream &o);
364
365  // runHeader - Emit all the __builtin prototypes used in arm_neon.h
366  void runHeader(raw_ostream &o);
367
368  // runTests - Emit tests for all the Neon intrinsics.
369  void runTests(raw_ostream &o);
370
371private:
372  void emitIntrinsic(raw_ostream &OS, Record *R,
373                     StringMap<ClassKind> &EmittedMap);
374  void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
375                      bool isA64GenBuiltinDef);
376  void genOverloadTypeCheckCode(raw_ostream &OS,
377                                StringMap<ClassKind> &A64IntrinsicMap,
378                                bool isA64TypeCheck);
379  void genIntrinsicRangeCheckCode(raw_ostream &OS,
380                                  StringMap<ClassKind> &A64IntrinsicMap,
381                                  bool isA64RangeCheck);
382  void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
383                     bool isA64TestGen);
384};
385} // end anonymous namespace
386
387/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
388/// which each StringRef representing a single type declared in the string.
389/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
390/// 2xfloat and 4xfloat respectively.
391static void ParseTypes(Record *r, std::string &s,
392                       SmallVectorImpl<StringRef> &TV) {
393  const char *data = s.data();
394  int len = 0;
395
396  for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
397    if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U'
398                         || data[len] == 'H' || data[len] == 'S')
399      continue;
400
401    switch (data[len]) {
402      case 'c':
403      case 's':
404      case 'i':
405      case 'l':
406      case 'h':
407      case 'f':
408      case 'd':
409        break;
410      default:
411        PrintFatalError(r->getLoc(),
412                      "Unexpected letter: " + std::string(data + len, 1));
413    }
414    TV.push_back(StringRef(data, len + 1));
415    data += len + 1;
416    len = -1;
417  }
418}
419
420/// Widen - Convert a type code into the next wider type.  char -> short,
421/// short -> int, etc.
422static char Widen(const char t) {
423  switch (t) {
424    case 'c':
425      return 's';
426    case 's':
427      return 'i';
428    case 'i':
429      return 'l';
430    case 'h':
431      return 'f';
432    case 'f':
433      return 'd';
434    default:
435      PrintFatalError("unhandled type in widen!");
436  }
437}
438
439/// Narrow - Convert a type code into the next smaller type.  short -> char,
440/// float -> half float, etc.
441static char Narrow(const char t) {
442  switch (t) {
443    case 's':
444      return 'c';
445    case 'i':
446      return 's';
447    case 'l':
448      return 'i';
449    case 'f':
450      return 'h';
451    case 'd':
452      return 'f';
453    default:
454      PrintFatalError("unhandled type in narrow!");
455  }
456}
457
458static std::string GetNarrowTypestr(StringRef ty)
459{
460  std::string s;
461  for (size_t i = 0, end = ty.size(); i < end; i++) {
462    switch (ty[i]) {
463      case 's':
464        s += 'c';
465        break;
466      case 'i':
467        s += 's';
468        break;
469      case 'l':
470        s += 'i';
471        break;
472      default:
473        s += ty[i];
474        break;
475    }
476  }
477
478  return s;
479}
480
481/// For a particular StringRef, return the base type code, and whether it has
482/// the quad-vector, polynomial, or unsigned modifiers set.
483static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
484  unsigned off = 0;
485  // ignore scalar.
486  if (ty[off] == 'S') {
487    ++off;
488  }
489  // remember quad.
490  if (ty[off] == 'Q' || ty[off] == 'H') {
491    quad = true;
492    ++off;
493  }
494
495  // remember poly.
496  if (ty[off] == 'P') {
497    poly = true;
498    ++off;
499  }
500
501  // remember unsigned.
502  if (ty[off] == 'U') {
503    usgn = true;
504    ++off;
505  }
506
507  // base type to get the type string for.
508  return ty[off];
509}
510
511/// ModType - Transform a type code and its modifiers based on a mod code. The
512/// mod code definitions may be found at the top of arm_neon.td.
513static char ModType(const char mod, char type, bool &quad, bool &poly,
514                    bool &usgn, bool &scal, bool &cnst, bool &pntr) {
515  switch (mod) {
516    case 't':
517      if (poly) {
518        poly = false;
519        usgn = true;
520      }
521      break;
522    case 'b':
523      scal = true;
524    case 'u':
525      usgn = true;
526      poly = false;
527      if (type == 'f')
528        type = 'i';
529      if (type == 'd')
530        type = 'l';
531      break;
532    case '$':
533      scal = true;
534    case 'x':
535      usgn = false;
536      poly = false;
537      if (type == 'f')
538        type = 'i';
539      if (type == 'd')
540        type = 'l';
541      break;
542    case 'o':
543      scal = true;
544      type = 'd';
545      usgn = false;
546      break;
547    case 'y':
548      scal = true;
549    case 'f':
550      if (type == 'h')
551        quad = true;
552      type = 'f';
553      usgn = false;
554      break;
555    case 'F':
556      type = 'd';
557      usgn = false;
558      break;
559    case 'g':
560      quad = false;
561      break;
562    case 'B':
563    case 'C':
564    case 'D':
565    case 'j':
566      quad = true;
567      break;
568    case 'w':
569      type = Widen(type);
570      quad = true;
571      break;
572    case 'n':
573      type = Widen(type);
574      break;
575    case 'i':
576      type = 'i';
577      scal = true;
578      break;
579    case 'l':
580      type = 'l';
581      scal = true;
582      usgn = true;
583      break;
584    case 'z':
585      type = Narrow(type);
586      scal = true;
587      break;
588    case 'r':
589      type = Widen(type);
590      scal = true;
591      break;
592    case 's':
593    case 'a':
594      scal = true;
595      break;
596    case 'k':
597      quad = true;
598      break;
599    case 'c':
600      cnst = true;
601    case 'p':
602      pntr = true;
603      scal = true;
604      break;
605    case 'h':
606      type = Narrow(type);
607      if (type == 'h')
608        quad = false;
609      break;
610    case 'q':
611      type = Narrow(type);
612      quad = true;
613      break;
614    case 'e':
615      type = Narrow(type);
616      usgn = true;
617      break;
618    case 'm':
619      type = Narrow(type);
620      quad = false;
621      break;
622    default:
623      break;
624  }
625  return type;
626}
627
628static bool IsMultiVecProto(const char p) {
629  return ((p >= '2' && p <= '4') || (p >= 'B' && p <= 'D'));
630}
631
632/// TypeString - for a modifier and type, generate the name of the typedef for
633/// that type.  QUc -> uint8x8_t.
634static std::string TypeString(const char mod, StringRef typestr) {
635  bool quad = false;
636  bool poly = false;
637  bool usgn = false;
638  bool scal = false;
639  bool cnst = false;
640  bool pntr = false;
641
642  if (mod == 'v')
643    return "void";
644  if (mod == 'i')
645    return "int";
646
647  // base type to get the type string for.
648  char type = ClassifyType(typestr, quad, poly, usgn);
649
650  // Based on the modifying character, change the type and width if necessary.
651  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
652
653  SmallString<128> s;
654
655  if (usgn)
656    s.push_back('u');
657
658  switch (type) {
659    case 'c':
660      s += poly ? "poly8" : "int8";
661      if (scal)
662        break;
663      s += quad ? "x16" : "x8";
664      break;
665    case 's':
666      s += poly ? "poly16" : "int16";
667      if (scal)
668        break;
669      s += quad ? "x8" : "x4";
670      break;
671    case 'i':
672      s += "int32";
673      if (scal)
674        break;
675      s += quad ? "x4" : "x2";
676      break;
677    case 'l':
678      s += (poly && !usgn)? "poly64" : "int64";
679      if (scal)
680        break;
681      s += quad ? "x2" : "x1";
682      break;
683    case 'h':
684      s += "float16";
685      if (scal)
686        break;
687      s += quad ? "x8" : "x4";
688      break;
689    case 'f':
690      s += "float32";
691      if (scal)
692        break;
693      s += quad ? "x4" : "x2";
694      break;
695    case 'd':
696      s += "float64";
697      if (scal)
698        break;
699      s += quad ? "x2" : "x1";
700      break;
701
702    default:
703      PrintFatalError("unhandled type!");
704  }
705
706  if (mod == '2' || mod == 'B')
707    s += "x2";
708  if (mod == '3' || mod == 'C')
709    s += "x3";
710  if (mod == '4' || mod == 'D')
711    s += "x4";
712
713  // Append _t, finishing the type string typedef type.
714  s += "_t";
715
716  if (cnst)
717    s += " const";
718
719  if (pntr)
720    s += " *";
721
722  return s.str();
723}
724
725/// BuiltinTypeString - for a modifier and type, generate the clang
726/// BuiltinsARM.def prototype code for the function.  See the top of clang's
727/// Builtins.def for a description of the type strings.
728static std::string BuiltinTypeString(const char mod, StringRef typestr,
729                                     ClassKind ck, bool ret) {
730  bool quad = false;
731  bool poly = false;
732  bool usgn = false;
733  bool scal = false;
734  bool cnst = false;
735  bool pntr = false;
736
737  if (mod == 'v')
738    return "v"; // void
739  if (mod == 'i')
740    return "i"; // int
741
742  // base type to get the type string for.
743  char type = ClassifyType(typestr, quad, poly, usgn);
744
745  // Based on the modifying character, change the type and width if necessary.
746  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
747
748  // All pointers are void* pointers.  Change type to 'v' now.
749  if (pntr) {
750    usgn = false;
751    poly = false;
752    type = 'v';
753  }
754  // Treat half-float ('h') types as unsigned short ('s') types.
755  if (type == 'h') {
756    type = 's';
757    usgn = true;
758  }
759  usgn = usgn | poly | ((ck == ClassI || ck == ClassW) &&
760                         scal && type != 'f' && type != 'd');
761
762  if (scal) {
763    SmallString<128> s;
764
765    if (usgn)
766      s.push_back('U');
767    else if (type == 'c')
768      s.push_back('S'); // make chars explicitly signed
769
770    if (type == 'l') // 64-bit long
771      s += "LLi";
772    else
773      s.push_back(type);
774
775    if (cnst)
776      s.push_back('C');
777    if (pntr)
778      s.push_back('*');
779    return s.str();
780  }
781
782  // Since the return value must be one type, return a vector type of the
783  // appropriate width which we will bitcast.  An exception is made for
784  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
785  // fashion, storing them to a pointer arg.
786  if (ret) {
787    if (IsMultiVecProto(mod))
788      return "vv*"; // void result with void* first argument
789    if (mod == 'f' || (ck != ClassB && type == 'f'))
790      return quad ? "V4f" : "V2f";
791    if (mod == 'F' || (ck != ClassB && type == 'd'))
792      return quad ? "V2d" : "V1d";
793    if (ck != ClassB && type == 's')
794      return quad ? "V8s" : "V4s";
795    if (ck != ClassB && type == 'i')
796      return quad ? "V4i" : "V2i";
797    if (ck != ClassB && type == 'l')
798      return quad ? "V2LLi" : "V1LLi";
799
800    return quad ? "V16Sc" : "V8Sc";
801  }
802
803  // Non-return array types are passed as individual vectors.
804  if (mod == '2' || mod == 'B')
805    return quad ? "V16ScV16Sc" : "V8ScV8Sc";
806  if (mod == '3' || mod == 'C')
807    return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
808  if (mod == '4' || mod == 'D')
809    return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
810
811  if (mod == 'f' || (ck != ClassB && type == 'f'))
812    return quad ? "V4f" : "V2f";
813  if (mod == 'F' || (ck != ClassB && type == 'd'))
814    return quad ? "V2d" : "V1d";
815  if (ck != ClassB && type == 's')
816    return quad ? "V8s" : "V4s";
817  if (ck != ClassB && type == 'i')
818    return quad ? "V4i" : "V2i";
819  if (ck != ClassB && type == 'l')
820    return quad ? "V2LLi" : "V1LLi";
821
822  return quad ? "V16Sc" : "V8Sc";
823}
824
825/// InstructionTypeCode - Computes the ARM argument character code and
826/// quad status for a specific type string and ClassKind.
827static void InstructionTypeCode(const StringRef &typeStr,
828                                const ClassKind ck,
829                                bool &quad,
830                                std::string &typeCode) {
831  bool poly = false;
832  bool usgn = false;
833  char type = ClassifyType(typeStr, quad, poly, usgn);
834
835  switch (type) {
836  case 'c':
837    switch (ck) {
838    case ClassS: typeCode = poly ? "p8" : usgn ? "u8" : "s8"; break;
839    case ClassI: typeCode = "i8"; break;
840    case ClassW: typeCode = "8"; break;
841    default: break;
842    }
843    break;
844  case 's':
845    switch (ck) {
846    case ClassS: typeCode = poly ? "p16" : usgn ? "u16" : "s16"; break;
847    case ClassI: typeCode = "i16"; break;
848    case ClassW: typeCode = "16"; break;
849    default: break;
850    }
851    break;
852  case 'i':
853    switch (ck) {
854    case ClassS: typeCode = usgn ? "u32" : "s32"; break;
855    case ClassI: typeCode = "i32"; break;
856    case ClassW: typeCode = "32"; break;
857    default: break;
858    }
859    break;
860  case 'l':
861    switch (ck) {
862    case ClassS: typeCode = poly ? "p64" : usgn ? "u64" : "s64"; break;
863    case ClassI: typeCode = "i64"; break;
864    case ClassW: typeCode = "64"; break;
865    default: break;
866    }
867    break;
868  case 'h':
869    switch (ck) {
870    case ClassS:
871    case ClassI: typeCode = "f16"; break;
872    case ClassW: typeCode = "16"; break;
873    default: break;
874    }
875    break;
876  case 'f':
877    switch (ck) {
878    case ClassS:
879    case ClassI: typeCode = "f32"; break;
880    case ClassW: typeCode = "32"; break;
881    default: break;
882    }
883    break;
884  case 'd':
885    switch (ck) {
886    case ClassS:
887    case ClassI:
888      typeCode += "f64";
889      break;
890    case ClassW:
891      PrintFatalError("unhandled type!");
892    default:
893      break;
894    }
895    break;
896  default:
897    PrintFatalError("unhandled type!");
898  }
899}
900
901static char Insert_BHSD_Suffix(StringRef typestr){
902  unsigned off = 0;
903  if(typestr[off++] == 'S'){
904    while(typestr[off] == 'Q' || typestr[off] == 'H'||
905          typestr[off] == 'P' || typestr[off] == 'U')
906      ++off;
907    switch (typestr[off]){
908    default  : break;
909    case 'c' : return 'b';
910    case 's' : return 'h';
911    case 'i' :
912    case 'f' : return 's';
913    case 'l' :
914    case 'd' : return 'd';
915    }
916  }
917  return 0;
918}
919
920static bool endsWith_xN(std::string const &name) {
921  if (name.length() > 3) {
922    if (name.compare(name.length() - 3, 3, "_x2") == 0 ||
923        name.compare(name.length() - 3, 3, "_x3") == 0 ||
924        name.compare(name.length() - 3, 3, "_x4") == 0)
925      return true;
926  }
927  return false;
928}
929
930/// MangleName - Append a type or width suffix to a base neon function name,
931/// and insert a 'q' in the appropriate location if type string starts with 'Q'.
932/// E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
933/// Insert proper 'b' 'h' 's' 'd' if prefix 'S' is used.
934static std::string MangleName(const std::string &name, StringRef typestr,
935                              ClassKind ck) {
936  if (name == "vcvt_f32_f16" || name == "vcvt_f32_f64" ||
937      name == "vcvt_f64_f32")
938    return name;
939
940  bool quad = false;
941  std::string typeCode = "";
942
943  InstructionTypeCode(typestr, ck, quad, typeCode);
944
945  std::string s = name;
946
947  if (typeCode.size() > 0) {
948    // If the name is end with _xN (N = 2,3,4), insert the typeCode before _xN.
949    if (endsWith_xN(s))
950      s.insert(s.length() - 3, "_" + typeCode);
951    else
952      s += "_" + typeCode;
953  }
954
955  if (ck == ClassB)
956    s += "_v";
957
958  // Insert a 'q' before the first '_' character so that it ends up before
959  // _lane or _n on vector-scalar operations.
960  if (typestr.find("Q") != StringRef::npos) {
961      size_t pos = s.find('_');
962      s = s.insert(pos, "q");
963  }
964  char ins = Insert_BHSD_Suffix(typestr);
965  if(ins){
966    size_t pos = s.find('_');
967    s = s.insert(pos, &ins, 1);
968  }
969
970  return s;
971}
972
973static void PreprocessInstruction(const StringRef &Name,
974                                  const std::string &InstName,
975                                  std::string &Prefix,
976                                  bool &HasNPostfix,
977                                  bool &HasLanePostfix,
978                                  bool &HasDupPostfix,
979                                  bool &IsSpecialVCvt,
980                                  size_t &TBNumber) {
981  // All of our instruction name fields from arm_neon.td are of the form
982  //   <instructionname>_...
983  // Thus we grab our instruction name via computation of said Prefix.
984  const size_t PrefixEnd = Name.find_first_of('_');
985  // If InstName is passed in, we use that instead of our name Prefix.
986  Prefix = InstName.size() == 0? Name.slice(0, PrefixEnd).str() : InstName;
987
988  const StringRef Postfix = Name.slice(PrefixEnd, Name.size());
989
990  HasNPostfix = Postfix.count("_n");
991  HasLanePostfix = Postfix.count("_lane");
992  HasDupPostfix = Postfix.count("_dup");
993  IsSpecialVCvt = Postfix.size() != 0 && Name.count("vcvt");
994
995  if (InstName.compare("vtbl") == 0 ||
996      InstName.compare("vtbx") == 0) {
997    // If we have a vtblN/vtbxN instruction, use the instruction's ASCII
998    // encoding to get its true value.
999    TBNumber = Name[Name.size()-1] - 48;
1000  }
1001}
1002
1003/// GenerateRegisterCheckPatternsForLoadStores - Given a bunch of data we have
1004/// extracted, generate a FileCheck pattern for a Load Or Store
1005static void
1006GenerateRegisterCheckPatternForLoadStores(const StringRef &NameRef,
1007                                          const std::string& OutTypeCode,
1008                                          const bool &IsQuad,
1009                                          const bool &HasDupPostfix,
1010                                          const bool &HasLanePostfix,
1011                                          const size_t Count,
1012                                          std::string &RegisterSuffix) {
1013  const bool IsLDSTOne = NameRef.count("vld1") || NameRef.count("vst1");
1014  // If N == 3 || N == 4 and we are dealing with a quad instruction, Clang
1015  // will output a series of v{ld,st}1s, so we have to handle it specially.
1016  if ((Count == 3 || Count == 4) && IsQuad) {
1017    RegisterSuffix += "{";
1018    for (size_t i = 0; i < Count; i++) {
1019      RegisterSuffix += "d{{[0-9]+}}";
1020      if (HasDupPostfix) {
1021        RegisterSuffix += "[]";
1022      }
1023      if (HasLanePostfix) {
1024        RegisterSuffix += "[{{[0-9]+}}]";
1025      }
1026      if (i < Count-1) {
1027        RegisterSuffix += ", ";
1028      }
1029    }
1030    RegisterSuffix += "}";
1031  } else {
1032
1033    // Handle normal loads and stores.
1034    RegisterSuffix += "{";
1035    for (size_t i = 0; i < Count; i++) {
1036      RegisterSuffix += "d{{[0-9]+}}";
1037      if (HasDupPostfix) {
1038        RegisterSuffix += "[]";
1039      }
1040      if (HasLanePostfix) {
1041        RegisterSuffix += "[{{[0-9]+}}]";
1042      }
1043      if (IsQuad && !HasLanePostfix) {
1044        RegisterSuffix += ", d{{[0-9]+}}";
1045        if (HasDupPostfix) {
1046          RegisterSuffix += "[]";
1047        }
1048      }
1049      if (i < Count-1) {
1050        RegisterSuffix += ", ";
1051      }
1052    }
1053    RegisterSuffix += "}, [r{{[0-9]+}}";
1054
1055    // We only include the alignment hint if we have a vld1.*64 or
1056    // a dup/lane instruction.
1057    if (IsLDSTOne) {
1058      if ((HasLanePostfix || HasDupPostfix) && OutTypeCode != "8") {
1059        RegisterSuffix += ":" + OutTypeCode;
1060      }
1061    }
1062
1063    RegisterSuffix += "]";
1064  }
1065}
1066
1067static bool HasNPostfixAndScalarArgs(const StringRef &NameRef,
1068                                     const bool &HasNPostfix) {
1069  return (NameRef.count("vmla") ||
1070          NameRef.count("vmlal") ||
1071          NameRef.count("vmlsl") ||
1072          NameRef.count("vmull") ||
1073          NameRef.count("vqdmlal") ||
1074          NameRef.count("vqdmlsl") ||
1075          NameRef.count("vqdmulh") ||
1076          NameRef.count("vqdmull") ||
1077          NameRef.count("vqrdmulh")) && HasNPostfix;
1078}
1079
1080static bool IsFiveOperandLaneAccumulator(const StringRef &NameRef,
1081                                         const bool &HasLanePostfix) {
1082  return (NameRef.count("vmla") ||
1083          NameRef.count("vmls") ||
1084          NameRef.count("vmlal") ||
1085          NameRef.count("vmlsl") ||
1086          (NameRef.count("vmul") && NameRef.size() == 3)||
1087          NameRef.count("vqdmlal") ||
1088          NameRef.count("vqdmlsl") ||
1089          NameRef.count("vqdmulh") ||
1090          NameRef.count("vqrdmulh")) && HasLanePostfix;
1091}
1092
1093static bool IsSpecialLaneMultiply(const StringRef &NameRef,
1094                                  const bool &HasLanePostfix,
1095                                  const bool &IsQuad) {
1096  const bool IsVMulOrMulh = (NameRef.count("vmul") || NameRef.count("mulh"))
1097                               && IsQuad;
1098  const bool IsVMull = NameRef.count("mull") && !IsQuad;
1099  return (IsVMulOrMulh || IsVMull) && HasLanePostfix;
1100}
1101
1102static void NormalizeProtoForRegisterPatternCreation(const std::string &Name,
1103                                                     const std::string &Proto,
1104                                                     const bool &HasNPostfix,
1105                                                     const bool &IsQuad,
1106                                                     const bool &HasLanePostfix,
1107                                                     const bool &HasDupPostfix,
1108                                                     std::string &NormedProto) {
1109  // Handle generic case.
1110  const StringRef NameRef(Name);
1111  for (size_t i = 0, end = Proto.size(); i < end; i++) {
1112    switch (Proto[i]) {
1113    case 'u':
1114    case 'f':
1115    case 'F':
1116    case 'd':
1117    case 's':
1118    case 'x':
1119    case 't':
1120    case 'n':
1121      NormedProto += IsQuad? 'q' : 'd';
1122      break;
1123    case 'w':
1124    case 'k':
1125      NormedProto += 'q';
1126      break;
1127    case 'g':
1128    case 'j':
1129    case 'h':
1130    case 'e':
1131      NormedProto += 'd';
1132      break;
1133    case 'i':
1134      NormedProto += HasLanePostfix? 'a' : 'i';
1135      break;
1136    case 'a':
1137      if (HasLanePostfix) {
1138        NormedProto += 'a';
1139      } else if (HasNPostfixAndScalarArgs(NameRef, HasNPostfix)) {
1140        NormedProto += IsQuad? 'q' : 'd';
1141      } else {
1142        NormedProto += 'i';
1143      }
1144      break;
1145    }
1146  }
1147
1148  // Handle Special Cases.
1149  const bool IsNotVExt = !NameRef.count("vext");
1150  const bool IsVPADAL = NameRef.count("vpadal");
1151  const bool Is5OpLaneAccum = IsFiveOperandLaneAccumulator(NameRef,
1152                                                           HasLanePostfix);
1153  const bool IsSpecialLaneMul = IsSpecialLaneMultiply(NameRef, HasLanePostfix,
1154                                                      IsQuad);
1155
1156  if (IsSpecialLaneMul) {
1157    // If
1158    NormedProto[2] = NormedProto[3];
1159    NormedProto.erase(3);
1160  } else if (NormedProto.size() == 4 &&
1161             NormedProto[0] == NormedProto[1] &&
1162             IsNotVExt) {
1163    // If NormedProto.size() == 4 and the first two proto characters are the
1164    // same, ignore the first.
1165    NormedProto = NormedProto.substr(1, 3);
1166  } else if (Is5OpLaneAccum) {
1167    // If we have a 5 op lane accumulator operation, we take characters 1,2,4
1168    std::string tmp = NormedProto.substr(1,2);
1169    tmp += NormedProto[4];
1170    NormedProto = tmp;
1171  } else if (IsVPADAL) {
1172    // If we have VPADAL, ignore the first character.
1173    NormedProto = NormedProto.substr(0, 2);
1174  } else if (NameRef.count("vdup") && NormedProto.size() > 2) {
1175    // If our instruction is a dup instruction, keep only the first and
1176    // last characters.
1177    std::string tmp = "";
1178    tmp += NormedProto[0];
1179    tmp += NormedProto[NormedProto.size()-1];
1180    NormedProto = tmp;
1181  }
1182}
1183
1184/// GenerateRegisterCheckPatterns - Given a bunch of data we have
1185/// extracted, generate a FileCheck pattern to check that an
1186/// instruction's arguments are correct.
1187static void GenerateRegisterCheckPattern(const std::string &Name,
1188                                         const std::string &Proto,
1189                                         const std::string &OutTypeCode,
1190                                         const bool &HasNPostfix,
1191                                         const bool &IsQuad,
1192                                         const bool &HasLanePostfix,
1193                                         const bool &HasDupPostfix,
1194                                         const size_t &TBNumber,
1195                                         std::string &RegisterSuffix) {
1196
1197  RegisterSuffix = "";
1198
1199  const StringRef NameRef(Name);
1200  const StringRef ProtoRef(Proto);
1201
1202  if ((NameRef.count("vdup") || NameRef.count("vmov")) && HasNPostfix) {
1203    return;
1204  }
1205
1206  const bool IsLoadStore = NameRef.count("vld") || NameRef.count("vst");
1207  const bool IsTBXOrTBL = NameRef.count("vtbl") || NameRef.count("vtbx");
1208
1209  if (IsLoadStore) {
1210    // Grab N value from  v{ld,st}N using its ascii representation.
1211    const size_t Count = NameRef[3] - 48;
1212
1213    GenerateRegisterCheckPatternForLoadStores(NameRef, OutTypeCode, IsQuad,
1214                                              HasDupPostfix, HasLanePostfix,
1215                                              Count, RegisterSuffix);
1216  } else if (IsTBXOrTBL) {
1217    RegisterSuffix += "d{{[0-9]+}}, {";
1218    for (size_t i = 0; i < TBNumber-1; i++) {
1219      RegisterSuffix += "d{{[0-9]+}}, ";
1220    }
1221    RegisterSuffix += "d{{[0-9]+}}}, d{{[0-9]+}}";
1222  } else {
1223    // Handle a normal instruction.
1224    if (NameRef.count("vget") || NameRef.count("vset"))
1225      return;
1226
1227    // We first normalize our proto, since we only need to emit 4
1228    // different types of checks, yet have more than 4 proto types
1229    // that map onto those 4 patterns.
1230    std::string NormalizedProto("");
1231    NormalizeProtoForRegisterPatternCreation(Name, Proto, HasNPostfix, IsQuad,
1232                                             HasLanePostfix, HasDupPostfix,
1233                                             NormalizedProto);
1234
1235    for (size_t i = 0, end = NormalizedProto.size(); i < end; i++) {
1236      const char &c = NormalizedProto[i];
1237      switch (c) {
1238      case 'q':
1239        RegisterSuffix += "q{{[0-9]+}}, ";
1240        break;
1241
1242      case 'd':
1243        RegisterSuffix += "d{{[0-9]+}}, ";
1244        break;
1245
1246      case 'i':
1247        RegisterSuffix += "#{{[0-9]+}}, ";
1248        break;
1249
1250      case 'a':
1251        RegisterSuffix += "d{{[0-9]+}}[{{[0-9]}}], ";
1252        break;
1253      }
1254    }
1255
1256    // Remove extra ", ".
1257    RegisterSuffix = RegisterSuffix.substr(0, RegisterSuffix.size()-2);
1258  }
1259}
1260
1261/// GenerateChecksForIntrinsic - Given a specific instruction name +
1262/// typestr + class kind, generate the proper set of FileCheck
1263/// Patterns to check for. We could just return a string, but instead
1264/// use a vector since it provides us with the extra flexibility of
1265/// emitting multiple checks, which comes in handy for certain cases
1266/// like mla where we want to check for 2 different instructions.
1267static void GenerateChecksForIntrinsic(const std::string &Name,
1268                                       const std::string &Proto,
1269                                       StringRef &OutTypeStr,
1270                                       StringRef &InTypeStr,
1271                                       ClassKind Ck,
1272                                       const std::string &InstName,
1273                                       bool IsHiddenLOp,
1274                                       std::vector<std::string>& Result) {
1275
1276  // If Ck is a ClassNoTest instruction, just return so no test is
1277  // emitted.
1278  if(Ck == ClassNoTest)
1279    return;
1280
1281  if (Name == "vcvt_f32_f16") {
1282    Result.push_back("vcvt.f32.f16");
1283    return;
1284  }
1285
1286
1287  // Now we preprocess our instruction given the data we have to get the
1288  // data that we need.
1289  // Create a StringRef for String Manipulation of our Name.
1290  const StringRef NameRef(Name);
1291  // Instruction Prefix.
1292  std::string Prefix;
1293  // The type code for our out type string.
1294  std::string OutTypeCode;
1295  // To handle our different cases, we need to check for different postfixes.
1296  // Is our instruction a quad instruction.
1297  bool IsQuad = false;
1298  // Our instruction is of the form <instructionname>_n.
1299  bool HasNPostfix = false;
1300  // Our instruction is of the form <instructionname>_lane.
1301  bool HasLanePostfix = false;
1302  // Our instruction is of the form <instructionname>_dup.
1303  bool HasDupPostfix  = false;
1304  // Our instruction is a vcvt instruction which requires special handling.
1305  bool IsSpecialVCvt = false;
1306  // If we have a vtbxN or vtblN instruction, this is set to N.
1307  size_t TBNumber = -1;
1308  // Register Suffix
1309  std::string RegisterSuffix;
1310
1311  PreprocessInstruction(NameRef, InstName, Prefix,
1312                        HasNPostfix, HasLanePostfix, HasDupPostfix,
1313                        IsSpecialVCvt, TBNumber);
1314
1315  InstructionTypeCode(OutTypeStr, Ck, IsQuad, OutTypeCode);
1316  GenerateRegisterCheckPattern(Name, Proto, OutTypeCode, HasNPostfix, IsQuad,
1317                               HasLanePostfix, HasDupPostfix, TBNumber,
1318                               RegisterSuffix);
1319
1320  // In the following section, we handle a bunch of special cases. You can tell
1321  // a special case by the fact we are returning early.
1322
1323  // If our instruction is a logical instruction without postfix or a
1324  // hidden LOp just return the current Prefix.
1325  if (Ck == ClassL || IsHiddenLOp) {
1326    Result.push_back(Prefix + " " + RegisterSuffix);
1327    return;
1328  }
1329
1330  // If we have a vmov, due to the many different cases, some of which
1331  // vary within the different intrinsics generated for a single
1332  // instruction type, just output a vmov. (e.g. given an instruction
1333  // A, A.u32 might be vmov and A.u8 might be vmov.8).
1334  //
1335  // FIXME: Maybe something can be done about this. The two cases that we care
1336  // about are vmov as an LType and vmov as a WType.
1337  if (Prefix == "vmov") {
1338    Result.push_back(Prefix + " " + RegisterSuffix);
1339    return;
1340  }
1341
1342  // In the following section, we handle special cases.
1343
1344  if (OutTypeCode == "64") {
1345    // If we have a 64 bit vdup/vext and are handling an uint64x1_t
1346    // type, the intrinsic will be optimized away, so just return
1347    // nothing.  On the other hand if we are handling an uint64x2_t
1348    // (i.e. quad instruction), vdup/vmov instructions should be
1349    // emitted.
1350    if (Prefix == "vdup" || Prefix == "vext") {
1351      if (IsQuad) {
1352        Result.push_back("{{vmov|vdup}}");
1353      }
1354      return;
1355    }
1356
1357    // v{st,ld}{2,3,4}_{u,s}64 emit v{st,ld}1.64 instructions with
1358    // multiple register operands.
1359    bool MultiLoadPrefix = Prefix == "vld2" || Prefix == "vld3"
1360                            || Prefix == "vld4";
1361    bool MultiStorePrefix = Prefix == "vst2" || Prefix == "vst3"
1362                            || Prefix == "vst4";
1363    if (MultiLoadPrefix || MultiStorePrefix) {
1364      Result.push_back(NameRef.slice(0, 3).str() + "1.64");
1365      return;
1366    }
1367
1368    // v{st,ld}1_{lane,dup}_{u64,s64} use vldr/vstr/vmov/str instead of
1369    // emitting said instructions. So return a check for
1370    // vldr/vstr/vmov/str instead.
1371    if (HasLanePostfix || HasDupPostfix) {
1372      if (Prefix == "vst1") {
1373        Result.push_back("{{str|vstr|vmov}}");
1374        return;
1375      } else if (Prefix == "vld1") {
1376        Result.push_back("{{ldr|vldr|vmov}}");
1377        return;
1378      }
1379    }
1380  }
1381
1382  // vzip.32/vuzp.32 are the same instruction as vtrn.32 and are
1383  // sometimes disassembled as vtrn.32. We use a regex to handle both
1384  // cases.
1385  if ((Prefix == "vzip" || Prefix == "vuzp") && OutTypeCode == "32") {
1386    Result.push_back("{{vtrn|" + Prefix + "}}.32 " + RegisterSuffix);
1387    return;
1388  }
1389
1390  // Currently on most ARM processors, we do not use vmla/vmls for
1391  // quad floating point operations. Instead we output vmul + vadd. So
1392  // check if we have one of those instructions and just output a
1393  // check for vmul.
1394  if (OutTypeCode == "f32") {
1395    if (Prefix == "vmls") {
1396      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1397      Result.push_back("vsub." + OutTypeCode);
1398      return;
1399    } else if (Prefix == "vmla") {
1400      Result.push_back("vmul." + OutTypeCode + " " + RegisterSuffix);
1401      Result.push_back("vadd." + OutTypeCode);
1402      return;
1403    }
1404  }
1405
1406  // If we have vcvt, get the input type from the instruction name
1407  // (which should be of the form instname_inputtype) and append it
1408  // before the output type.
1409  if (Prefix == "vcvt") {
1410    const std::string inTypeCode = NameRef.substr(NameRef.find_last_of("_")+1);
1411    Prefix += "." + inTypeCode;
1412  }
1413
1414  // Append output type code to get our final mangled instruction.
1415  Prefix += "." + OutTypeCode;
1416
1417  Result.push_back(Prefix + " " + RegisterSuffix);
1418}
1419
1420/// UseMacro - Examine the prototype string to determine if the intrinsic
1421/// should be defined as a preprocessor macro instead of an inline function.
1422static bool UseMacro(const std::string &proto) {
1423  // If this builtin takes an immediate argument, we need to #define it rather
1424  // than use a standard declaration, so that SemaChecking can range check
1425  // the immediate passed by the user.
1426  if (proto.find('i') != std::string::npos)
1427    return true;
1428
1429  // Pointer arguments need to use macros to avoid hiding aligned attributes
1430  // from the pointer type.
1431  if (proto.find('p') != std::string::npos ||
1432      proto.find('c') != std::string::npos)
1433    return true;
1434
1435  return false;
1436}
1437
1438/// MacroArgUsedDirectly - Return true if argument i for an intrinsic that is
1439/// defined as a macro should be accessed directly instead of being first
1440/// assigned to a local temporary.
1441static bool MacroArgUsedDirectly(const std::string &proto, unsigned i) {
1442  // True for constant ints (i), pointers (p) and const pointers (c).
1443  return (proto[i] == 'i' || proto[i] == 'p' || proto[i] == 'c');
1444}
1445
1446// Generate the string "(argtype a, argtype b, ...)"
1447static std::string GenArgs(const std::string &proto, StringRef typestr,
1448                           const std::string &name) {
1449  bool define = UseMacro(proto);
1450  char arg = 'a';
1451
1452  std::string s;
1453  s += "(";
1454
1455  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1456    if (define) {
1457      // Some macro arguments are used directly instead of being assigned
1458      // to local temporaries; prepend an underscore prefix to make their
1459      // names consistent with the local temporaries.
1460      if (MacroArgUsedDirectly(proto, i))
1461        s += "__";
1462    } else {
1463      s += TypeString(proto[i], typestr) + " __";
1464    }
1465    s.push_back(arg);
1466    //To avoid argument being multiple defined, add extra number for renaming.
1467    if (name == "vcopy_lane" || name == "vcopy_laneq")
1468      s.push_back('1');
1469    if ((i + 1) < e)
1470      s += ", ";
1471  }
1472
1473  s += ")";
1474  return s;
1475}
1476
1477// Macro arguments are not type-checked like inline function arguments, so
1478// assign them to local temporaries to get the right type checking.
1479static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
1480                                  const std::string &name ) {
1481  char arg = 'a';
1482  std::string s;
1483  bool generatedLocal = false;
1484
1485  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
1486    // Do not create a temporary for an immediate argument.
1487    // That would defeat the whole point of using a macro!
1488    if (MacroArgUsedDirectly(proto, i))
1489      continue;
1490    generatedLocal = true;
1491    bool extranumber = false;
1492    if (name == "vcopy_lane" || name == "vcopy_laneq")
1493      extranumber = true;
1494
1495    s += TypeString(proto[i], typestr) + " __";
1496    s.push_back(arg);
1497    if(extranumber)
1498      s.push_back('1');
1499    s += " = (";
1500    s.push_back(arg);
1501    if(extranumber)
1502      s.push_back('1');
1503    s += "); ";
1504  }
1505
1506  if (generatedLocal)
1507    s += "\\\n  ";
1508  return s;
1509}
1510
1511// Use the vmovl builtin to sign-extend or zero-extend a vector.
1512static std::string Extend(StringRef typestr, const std::string &a, bool h=0) {
1513  std::string s, high;
1514  high = h ? "_high" : "";
1515  s = MangleName("vmovl" + high, typestr, ClassS);
1516  s += "(" + a + ")";
1517  return s;
1518}
1519
1520// Get the high 64-bit part of a vector
1521static std::string GetHigh(const std::string &a, StringRef typestr) {
1522  std::string s;
1523  s = MangleName("vget_high", typestr, ClassS);
1524  s += "(" + a + ")";
1525  return s;
1526}
1527
1528// Gen operation with two operands and get high 64-bit for both of two operands.
1529static std::string Gen2OpWith2High(StringRef typestr,
1530                                   const std::string &op,
1531                                   const std::string &a,
1532                                   const std::string &b) {
1533  std::string s;
1534  std::string Op1 = GetHigh(a, typestr);
1535  std::string Op2 = GetHigh(b, typestr);
1536  s = MangleName(op, typestr, ClassS);
1537  s += "(" + Op1 + ", " + Op2 + ");";
1538  return s;
1539}
1540
1541// Gen operation with three operands and get high 64-bit of the latter
1542// two operands.
1543static std::string Gen3OpWith2High(StringRef typestr,
1544                                   const std::string &op,
1545                                   const std::string &a,
1546                                   const std::string &b,
1547                                   const std::string &c) {
1548  std::string s;
1549  std::string Op1 = GetHigh(b, typestr);
1550  std::string Op2 = GetHigh(c, typestr);
1551  s = MangleName(op, typestr, ClassS);
1552  s += "(" + a + ", " + Op1 + ", " + Op2 + ");";
1553  return s;
1554}
1555
1556// Gen combine operation by putting a on low 64-bit, and b on high 64-bit.
1557static std::string GenCombine(std::string typestr,
1558                              const std::string &a,
1559                              const std::string &b) {
1560  std::string s;
1561  s = MangleName("vcombine", typestr, ClassS);
1562  s += "(" + a + ", " + b + ")";
1563  return s;
1564}
1565
1566static std::string Duplicate(unsigned nElts, StringRef typestr,
1567                             const std::string &a) {
1568  std::string s;
1569
1570  s = "(" + TypeString('d', typestr) + "){ ";
1571  for (unsigned i = 0; i != nElts; ++i) {
1572    s += a;
1573    if ((i + 1) < nElts)
1574      s += ", ";
1575  }
1576  s += " }";
1577
1578  return s;
1579}
1580
1581static std::string SplatLane(unsigned nElts, const std::string &vec,
1582                             const std::string &lane) {
1583  std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
1584  for (unsigned i = 0; i < nElts; ++i)
1585    s += ", " + lane;
1586  s += ")";
1587  return s;
1588}
1589
1590static std::string RemoveHigh(const std::string &name) {
1591  std::string s = name;
1592  std::size_t found = s.find("_high_");
1593  if (found == std::string::npos)
1594    PrintFatalError("name should contain \"_high_\" for high intrinsics");
1595  s.replace(found, 5, "");
1596  return s;
1597}
1598
1599static unsigned GetNumElements(StringRef typestr, bool &quad) {
1600  quad = false;
1601  bool dummy = false;
1602  char type = ClassifyType(typestr, quad, dummy, dummy);
1603  unsigned nElts = 0;
1604  switch (type) {
1605  case 'c': nElts = 8; break;
1606  case 's': nElts = 4; break;
1607  case 'i': nElts = 2; break;
1608  case 'l': nElts = 1; break;
1609  case 'h': nElts = 4; break;
1610  case 'f': nElts = 2; break;
1611  case 'd':
1612    nElts = 1;
1613    break;
1614  default:
1615    PrintFatalError("unhandled type!");
1616  }
1617  if (quad) nElts <<= 1;
1618  return nElts;
1619}
1620
1621// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
1622static std::string GenOpString(const std::string &name, OpKind op,
1623                               const std::string &proto, StringRef typestr) {
1624  bool quad;
1625  unsigned nElts = GetNumElements(typestr, quad);
1626  bool define = UseMacro(proto);
1627
1628  std::string ts = TypeString(proto[0], typestr);
1629  std::string s;
1630  if (!define) {
1631    s = "return ";
1632  }
1633
1634  switch(op) {
1635  case OpAdd:
1636    s += "__a + __b;";
1637    break;
1638  case OpAddl:
1639    s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
1640    break;
1641  case OpAddlHi:
1642    s += Extend(typestr, "__a", 1) + " + " + Extend(typestr, "__b", 1) + ";";
1643    break;
1644  case OpAddw:
1645    s += "__a + " + Extend(typestr, "__b") + ";";
1646    break;
1647  case OpAddwHi:
1648    s += "__a + " + Extend(typestr, "__b", 1) + ";";
1649    break;
1650  case OpSub:
1651    s += "__a - __b;";
1652    break;
1653  case OpSubl:
1654    s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
1655    break;
1656  case OpSublHi:
1657    s += Extend(typestr, "__a", 1) + " - " + Extend(typestr, "__b", 1) + ";";
1658    break;
1659  case OpSubw:
1660    s += "__a - " + Extend(typestr, "__b") + ";";
1661    break;
1662  case OpSubwHi:
1663    s += "__a - " + Extend(typestr, "__b", 1) + ";";
1664    break;
1665  case OpMulN:
1666    s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
1667    break;
1668  case OpMulLane:
1669    s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
1670    break;
1671  case OpMulXLane:
1672    s += MangleName("vmulx", typestr, ClassS) + "(__a, " +
1673      SplatLane(nElts, "__b", "__c") + ");";
1674    break;
1675  case OpMul:
1676    s += "__a * __b;";
1677    break;
1678  case OpFMlaN:
1679    s += MangleName("vfma", typestr, ClassS);
1680    s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
1681    break;
1682  case OpFMlsN:
1683    s += MangleName("vfms", typestr, ClassS);
1684    s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");";
1685    break;
1686  case OpMullLane:
1687    s += MangleName("vmull", typestr, ClassS) + "(__a, " +
1688      SplatLane(nElts, "__b", "__c") + ");";
1689    break;
1690  case OpMullHiLane:
1691    s += MangleName("vmull", typestr, ClassS) + "(" +
1692      GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1693    break;
1694  case OpMlaN:
1695    s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1696    break;
1697  case OpMlaLane:
1698    s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1699    break;
1700  case OpMla:
1701    s += "__a + (__b * __c);";
1702    break;
1703  case OpMlalN:
1704    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1705      Duplicate(nElts, typestr, "__c") + ");";
1706    break;
1707  case OpMlalLane:
1708    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1709      SplatLane(nElts, "__c", "__d") + ");";
1710    break;
1711  case OpMlalHiLane:
1712    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(" +
1713      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1714    break;
1715  case OpMlal:
1716    s += "__a + " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1717    break;
1718  case OpMullHi:
1719    s += Gen2OpWith2High(typestr, "vmull", "__a", "__b");
1720    break;
1721  case OpMullHiN:
1722    s += MangleName("vmull_n", typestr, ClassS);
1723    s += "(" + GetHigh("__a", typestr) + ", __b);";
1724    return s;
1725  case OpMlalHi:
1726    s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c");
1727    break;
1728  case OpMlalHiN:
1729    s += MangleName("vmlal_n", typestr, ClassS);
1730    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
1731    return s;
1732  case OpMlsN:
1733    s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
1734    break;
1735  case OpMlsLane:
1736    s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
1737    break;
1738  case OpFMSLane:
1739    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1740    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1741    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1742    s += MangleName("vfma_lane", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1743    break;
1744  case OpFMSLaneQ:
1745    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
1746    s += TypeString(proto[2], typestr) + " __b1 = __b; \\\n  ";
1747    s += TypeString(proto[3], typestr) + " __c1 = __c; \\\n  ";
1748    s += MangleName("vfma_laneq", typestr, ClassS) + "(__a1, __b1, -__c1, __d);";
1749    break;
1750  case OpMls:
1751    s += "__a - (__b * __c);";
1752    break;
1753  case OpMlslN:
1754    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1755      Duplicate(nElts, typestr, "__c") + ");";
1756    break;
1757  case OpMlslLane:
1758    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, " +
1759      SplatLane(nElts, "__c", "__d") + ");";
1760    break;
1761  case OpMlslHiLane:
1762    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(" +
1763      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1764    break;
1765  case OpMlsl:
1766    s += "__a - " + MangleName("vmull", typestr, ClassS) + "(__b, __c);";
1767    break;
1768  case OpMlslHi:
1769    s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c");
1770    break;
1771  case OpMlslHiN:
1772    s += MangleName("vmlsl_n", typestr, ClassS);
1773    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
1774    break;
1775  case OpQDMullLane:
1776    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
1777      SplatLane(nElts, "__b", "__c") + ");";
1778    break;
1779  case OpQDMullHiLane:
1780    s += MangleName("vqdmull", typestr, ClassS) + "(" +
1781      GetHigh("__a", typestr) + ", " + SplatLane(nElts, "__b", "__c") + ");";
1782    break;
1783  case OpQDMlalLane:
1784    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
1785      SplatLane(nElts, "__c", "__d") + ");";
1786    break;
1787  case OpQDMlalHiLane:
1788    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, " +
1789      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1790    break;
1791  case OpQDMlslLane:
1792    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
1793      SplatLane(nElts, "__c", "__d") + ");";
1794    break;
1795  case OpQDMlslHiLane:
1796    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, " +
1797      GetHigh("__b", typestr) + ", " + SplatLane(nElts, "__c", "__d") + ");";
1798    break;
1799  case OpQDMulhLane:
1800    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
1801      SplatLane(nElts, "__b", "__c") + ");";
1802    break;
1803  case OpQRDMulhLane:
1804    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
1805      SplatLane(nElts, "__b", "__c") + ");";
1806    break;
1807  case OpEq:
1808    s += "(" + ts + ")(__a == __b);";
1809    break;
1810  case OpGe:
1811    s += "(" + ts + ")(__a >= __b);";
1812    break;
1813  case OpLe:
1814    s += "(" + ts + ")(__a <= __b);";
1815    break;
1816  case OpGt:
1817    s += "(" + ts + ")(__a > __b);";
1818    break;
1819  case OpLt:
1820    s += "(" + ts + ")(__a < __b);";
1821    break;
1822  case OpNeg:
1823    s += " -__a;";
1824    break;
1825  case OpNot:
1826    s += " ~__a;";
1827    break;
1828  case OpAnd:
1829    s += "__a & __b;";
1830    break;
1831  case OpOr:
1832    s += "__a | __b;";
1833    break;
1834  case OpXor:
1835    s += "__a ^ __b;";
1836    break;
1837  case OpAndNot:
1838    s += "__a & ~__b;";
1839    break;
1840  case OpOrNot:
1841    s += "__a | ~__b;";
1842    break;
1843  case OpCast:
1844    s += "(" + ts + ")__a;";
1845    break;
1846  case OpConcat:
1847    s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
1848    s += ", (int64x1_t)__b, 0, 1);";
1849    break;
1850  case OpHi:
1851    // nElts is for the result vector, so the source is twice that number.
1852    s += "__builtin_shufflevector(__a, __a";
1853    for (unsigned i = nElts; i < nElts * 2; ++i)
1854      s += ", " + utostr(i);
1855    s+= ");";
1856    break;
1857  case OpLo:
1858    s += "__builtin_shufflevector(__a, __a";
1859    for (unsigned i = 0; i < nElts; ++i)
1860      s += ", " + utostr(i);
1861    s+= ");";
1862    break;
1863  case OpDup:
1864    s += Duplicate(nElts, typestr, "__a") + ";";
1865    break;
1866  case OpDupLane:
1867    s += SplatLane(nElts, "__a", "__b") + ";";
1868    break;
1869  case OpSelect:
1870    // ((0 & 1) | (~0 & 2))
1871    s += "(" + ts + ")";
1872    ts = TypeString(proto[1], typestr);
1873    s += "((__a & (" + ts + ")__b) | ";
1874    s += "(~__a & (" + ts + ")__c));";
1875    break;
1876  case OpRev16:
1877    s += "__builtin_shufflevector(__a, __a";
1878    for (unsigned i = 2; i <= nElts; i += 2)
1879      for (unsigned j = 0; j != 2; ++j)
1880        s += ", " + utostr(i - j - 1);
1881    s += ");";
1882    break;
1883  case OpRev32: {
1884    unsigned WordElts = nElts >> (1 + (int)quad);
1885    s += "__builtin_shufflevector(__a, __a";
1886    for (unsigned i = WordElts; i <= nElts; i += WordElts)
1887      for (unsigned j = 0; j != WordElts; ++j)
1888        s += ", " + utostr(i - j - 1);
1889    s += ");";
1890    break;
1891  }
1892  case OpRev64: {
1893    unsigned DblWordElts = nElts >> (int)quad;
1894    s += "__builtin_shufflevector(__a, __a";
1895    for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
1896      for (unsigned j = 0; j != DblWordElts; ++j)
1897        s += ", " + utostr(i - j - 1);
1898    s += ");";
1899    break;
1900  }
1901  case OpXtnHi: {
1902    s = TypeString(proto[1], typestr) + " __a1 = " +
1903        MangleName("vmovn", typestr, ClassS) + "(__b);\n  " +
1904        "return __builtin_shufflevector(__a, __a1";
1905    for (unsigned i = 0; i < nElts * 4; ++i)
1906      s += ", " + utostr(i);
1907    s += ");";
1908    break;
1909  }
1910  case OpSqxtunHi: {
1911    s = TypeString(proto[1], typestr) + " __a1 = " +
1912        MangleName("vqmovun", typestr, ClassS) + "(__b);\n  " +
1913        "return __builtin_shufflevector(__a, __a1";
1914    for (unsigned i = 0; i < nElts * 4; ++i)
1915      s += ", " + utostr(i);
1916    s += ");";
1917    break;
1918  }
1919  case OpQxtnHi: {
1920    s = TypeString(proto[1], typestr) + " __a1 = " +
1921        MangleName("vqmovn", typestr, ClassS) + "(__b);\n  " +
1922        "return __builtin_shufflevector(__a, __a1";
1923    for (unsigned i = 0; i < nElts * 4; ++i)
1924      s += ", " + utostr(i);
1925    s += ");";
1926    break;
1927  }
1928  case OpFcvtnHi: {
1929    std::string FName = (nElts == 1) ? "vcvt_f32" : "vcvt_f16";
1930    s = TypeString(proto[1], typestr) + " __a1 = " +
1931        MangleName(FName, typestr, ClassS) + "(__b);\n  " +
1932        "return __builtin_shufflevector(__a, __a1";
1933    for (unsigned i = 0; i < nElts * 4; ++i)
1934      s += ", " + utostr(i);
1935    s += ");";
1936    break;
1937  }
1938  case OpFcvtlHi: {
1939    std::string FName = (nElts == 2) ? "vcvt_f64" : "vcvt_f32";
1940    s = TypeString('d', typestr) + " __a1 = " + GetHigh("__a", typestr) +
1941        ";\n  return " + MangleName(FName, typestr, ClassS) + "(__a1);";
1942    break;
1943  }
1944  case OpFcvtxnHi: {
1945    s = TypeString(proto[1], typestr) + " __a1 = " +
1946        MangleName("vcvtx_f32", typestr, ClassS) + "(__b);\n  " +
1947        "return __builtin_shufflevector(__a, __a1";
1948    for (unsigned i = 0; i < nElts * 4; ++i)
1949      s += ", " + utostr(i);
1950    s += ");";
1951    break;
1952  }
1953  case OpUzp1:
1954    s += "__builtin_shufflevector(__a, __b";
1955    for (unsigned i = 0; i < nElts; i++)
1956      s += ", " + utostr(2*i);
1957    s += ");";
1958    break;
1959  case OpUzp2:
1960    s += "__builtin_shufflevector(__a, __b";
1961    for (unsigned i = 0; i < nElts; i++)
1962      s += ", " + utostr(2*i+1);
1963    s += ");";
1964    break;
1965  case OpZip1:
1966    s += "__builtin_shufflevector(__a, __b";
1967    for (unsigned i = 0; i < (nElts/2); i++)
1968       s += ", " + utostr(i) + ", " + utostr(i+nElts);
1969    s += ");";
1970    break;
1971  case OpZip2:
1972    s += "__builtin_shufflevector(__a, __b";
1973    for (unsigned i = nElts/2; i < nElts; i++)
1974       s += ", " + utostr(i) + ", " + utostr(i+nElts);
1975    s += ");";
1976    break;
1977  case OpTrn1:
1978    s += "__builtin_shufflevector(__a, __b";
1979    for (unsigned i = 0; i < (nElts/2); i++)
1980       s += ", " + utostr(2*i) + ", " + utostr(2*i+nElts);
1981    s += ");";
1982    break;
1983  case OpTrn2:
1984    s += "__builtin_shufflevector(__a, __b";
1985    for (unsigned i = 0; i < (nElts/2); i++)
1986       s += ", " + utostr(2*i+1) + ", " + utostr(2*i+1+nElts);
1987    s += ");";
1988    break;
1989  case OpAbdl: {
1990    std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
1991    if (typestr[0] != 'U') {
1992      // vabd results are always unsigned and must be zero-extended.
1993      std::string utype = "U" + typestr.str();
1994      s += "(" + TypeString(proto[0], typestr) + ")";
1995      abd = "(" + TypeString('d', utype) + ")" + abd;
1996      s += Extend(utype, abd) + ";";
1997    } else {
1998      s += Extend(typestr, abd) + ";";
1999    }
2000    break;
2001  }
2002  case OpAbdlHi:
2003    s += Gen2OpWith2High(typestr, "vabdl", "__a", "__b");
2004    break;
2005  case OpAddhnHi: {
2006    std::string addhn = MangleName("vaddhn", typestr, ClassS) + "(__b, __c)";
2007    s += GenCombine(GetNarrowTypestr(typestr), "__a", addhn);
2008    s += ";";
2009    break;
2010  }
2011  case OpRAddhnHi: {
2012    std::string raddhn = MangleName("vraddhn", typestr, ClassS) + "(__b, __c)";
2013    s += GenCombine(GetNarrowTypestr(typestr), "__a", raddhn);
2014    s += ";";
2015    break;
2016  }
2017  case OpSubhnHi: {
2018    std::string subhn = MangleName("vsubhn", typestr, ClassS) + "(__b, __c)";
2019    s += GenCombine(GetNarrowTypestr(typestr), "__a", subhn);
2020    s += ";";
2021    break;
2022  }
2023  case OpRSubhnHi: {
2024    std::string rsubhn = MangleName("vrsubhn", typestr, ClassS) + "(__b, __c)";
2025    s += GenCombine(GetNarrowTypestr(typestr), "__a", rsubhn);
2026    s += ";";
2027    break;
2028  }
2029  case OpAba:
2030    s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
2031    break;
2032  case OpAbal:
2033    s += "__a + " + MangleName("vabdl", typestr, ClassS) + "(__b, __c);";
2034    break;
2035  case OpAbalHi:
2036    s += Gen3OpWith2High(typestr, "vabal", "__a", "__b", "__c");
2037    break;
2038  case OpQDMullHi:
2039    s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b");
2040    break;
2041  case OpQDMullHiN:
2042    s += MangleName("vqdmull_n", typestr, ClassS);
2043    s += "(" + GetHigh("__a", typestr) + ", __b);";
2044    return s;
2045  case OpQDMlalHi:
2046    s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c");
2047    break;
2048  case OpQDMlalHiN:
2049    s += MangleName("vqdmlal_n", typestr, ClassS);
2050    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
2051    return s;
2052  case OpQDMlslHi:
2053    s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c");
2054    break;
2055  case OpQDMlslHiN:
2056    s += MangleName("vqdmlsl_n", typestr, ClassS);
2057    s += "(__a, " + GetHigh("__b", typestr) + ", __c);";
2058    return s;
2059  case OpDiv:
2060    s += "__a / __b;";
2061    break;
2062  case OpMovlHi: {
2063    s = TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
2064        MangleName("vget_high", typestr, ClassS) + "(__a);\n  " + s;
2065    s += "(" + ts + ")" + MangleName("vshll_n", typestr, ClassS);
2066    s += "(__a1, 0);";
2067    break;
2068  }
2069  case OpLongHi: {
2070    // Another local variable __a1 is needed for calling a Macro,
2071    // or using __a will have naming conflict when Macro expanding.
2072    s += TypeString(proto[1], typestr.drop_front()) + " __a1 = " +
2073         MangleName("vget_high", typestr, ClassS) + "(__a); \\\n";
2074    s += "  (" + ts + ")" + MangleName(RemoveHigh(name), typestr, ClassS) +
2075         "(__a1, __b);";
2076    break;
2077  }
2078  case OpNarrowHi: {
2079    s += "(" + ts + ")" + MangleName("vcombine", typestr, ClassS) + "(__a, " +
2080         MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
2081    break;
2082  }
2083  case OpCopyLane: {
2084    s += TypeString('s', typestr) + " __c2 = " +
2085         MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n  " +
2086         MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);";
2087    break;
2088  }
2089  case OpCopyQLane: {
2090    std::string typeCode = "";
2091    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2092    s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
2093         "(__c1, __d1); \\\n  vsetq_lane_" + typeCode + "(__c2, __a1, __b1);";
2094    break;
2095  }
2096  case OpCopyLaneQ: {
2097    std::string typeCode = "";
2098    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2099    s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
2100         "(__c1, __d1); \\\n  vset_lane_" + typeCode + "(__c2, __a1, __b1);";
2101    break;
2102  }
2103  case OpScalarMulLane: {
2104    std::string typeCode = "";
2105    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2106    s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
2107      "(__b, __c);\\\n  __a * __d1;";
2108    break;
2109  }
2110  case OpScalarMulLaneQ: {
2111    std::string typeCode = "";
2112    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2113        s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode +
2114          "(__b, __c);\\\n  __a * __d1;";
2115    break;
2116  }
2117  case OpScalarMulXLane: {
2118    bool dummy = false;
2119    char type = ClassifyType(typestr, dummy, dummy, dummy);
2120    if (type == 'f') type = 's';
2121    std::string typeCode = "";
2122    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2123    s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
2124      "(__b, __c);\\\n  vmulx" + type + "_" +
2125      typeCode +  "(__a, __d1);";
2126    break;
2127  }
2128  case OpScalarMulXLaneQ: {
2129    bool dummy = false;
2130    char type = ClassifyType(typestr, dummy, dummy, dummy);
2131    if (type == 'f') type = 's';
2132    std::string typeCode = "";
2133    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2134    s += TypeString('s', typestr) + " __d1 = vgetq_lane_" +
2135      typeCode + "(__b, __c);\\\n  vmulx" + type +
2136      "_" + typeCode +  "(__a, __d1);";
2137    break;
2138  }
2139
2140  case OpScalarVMulXLane: {
2141    bool dummy = false;
2142    char type = ClassifyType(typestr, dummy, dummy, dummy);
2143    if (type == 'f') type = 's';
2144    std::string typeCode = "";
2145    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2146    s += TypeString('s', typestr) + " __d1 = vget_lane_" +
2147      typeCode + "(__a, 0);\\\n" +
2148      "  " + TypeString('s', typestr) + " __e1 = vget_lane_" +
2149      typeCode + "(__b, __c);\\\n" +
2150      "  " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
2151      typeCode + "(__d1, __e1);\\\n" +
2152      "  " + TypeString('d', typestr) + " __g1;\\\n" +
2153      "  vset_lane_" + typeCode + "(__f1, __g1, __c);";
2154    break;
2155  }
2156
2157  case OpScalarVMulXLaneQ: {
2158    bool dummy = false;
2159    char type = ClassifyType(typestr, dummy, dummy, dummy);
2160    if (type == 'f') type = 's';
2161    std::string typeCode = "";
2162    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2163    s += TypeString('s', typestr) + " __d1 = vget_lane_" +
2164      typeCode + "(__a, 0);\\\n" +
2165      "  " + TypeString('s', typestr) + " __e1 = vgetq_lane_" +
2166      typeCode + "(__b, __c);\\\n" +
2167      "  " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
2168      typeCode + "(__d1, __e1);\\\n" +
2169      "  " + TypeString('d', typestr) + " __g1;\\\n" +
2170      "  vset_lane_" + typeCode + "(__f1, __g1, 0);";
2171    break;
2172  }
2173  case OpScalarQDMullLane: {
2174    std::string typeCode = "";
2175    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2176    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
2177    "vget_lane_" + typeCode + "(b, __c));";
2178    break;
2179  }
2180  case OpScalarQDMullLaneQ: {
2181    std::string typeCode = "";
2182    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2183    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
2184    "vgetq_lane_" + typeCode + "(b, __c));";
2185    break;
2186  }
2187  case OpScalarQDMulHiLane: {
2188    std::string typeCode = "";
2189    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2190    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
2191    "vget_lane_" + typeCode + "(__b, __c));";
2192    break;
2193  }
2194  case OpScalarQDMulHiLaneQ: {
2195    std::string typeCode = "";
2196    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2197    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
2198    "vgetq_lane_" + typeCode + "(__b, __c));";
2199    break;
2200  }
2201  case OpScalarQRDMulHiLane: {
2202    std::string typeCode = "";
2203    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2204    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
2205    "vget_lane_" + typeCode + "(__b, __c));";
2206    break;
2207  }
2208  case OpScalarQRDMulHiLaneQ: {
2209    std::string typeCode = "";
2210    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2211    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
2212    "vgetq_lane_" + typeCode + "(__b, __c));";
2213    break;
2214  }
2215  case OpScalarGetLane:{
2216    std::string typeCode = "";
2217    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2218    if (quad) {
2219     s += "int16x8_t __a1 = vreinterpretq_s16_f16(__a);\\\n";
2220     s += "  vgetq_lane_s16(__a1, __b);";
2221    } else {
2222     s += "int16x4_t __a1 = vreinterpret_s16_f16(__a);\\\n";
2223     s += "  vget_lane_s16(__a1, __b);";
2224    }
2225    break;
2226  }
2227  case OpScalarSetLane:{
2228    std::string typeCode = "";
2229    InstructionTypeCode(typestr, ClassS, quad, typeCode);
2230    s += "int16_t __a1 = (int16_t)__a;\\\n";
2231    if (quad) {
2232     s += "  int16x8_t __b1 = vreinterpretq_s16_f16(b);\\\n";
2233     s += "  int16x8_t __b2 = vsetq_lane_s16(__a1, __b1, __c);\\\n";
2234     s += "  vreinterpretq_f16_s16(__b2);";
2235    } else {
2236     s += "  int16x4_t __b1 = vreinterpret_s16_f16(b);\\\n";
2237     s += "  int16x4_t __b2 = vset_lane_s16(__a1, __b1, __c);\\\n";
2238     s += "  vreinterpret_f16_s16(__b2);";
2239    }
2240    break;
2241  }
2242
2243  default:
2244    PrintFatalError("unknown OpKind!");
2245  }
2246  return s;
2247}
2248
2249static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
2250  unsigned mod = proto[0];
2251
2252  if (mod == 'v' || mod == 'f' || mod == 'F')
2253    mod = proto[1];
2254
2255  bool quad = false;
2256  bool poly = false;
2257  bool usgn = false;
2258  bool scal = false;
2259  bool cnst = false;
2260  bool pntr = false;
2261
2262  // Base type to get the type string for.
2263  char type = ClassifyType(typestr, quad, poly, usgn);
2264
2265  // Based on the modifying character, change the type and width if necessary.
2266  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
2267
2268  NeonTypeFlags::EltType ET;
2269  switch (type) {
2270    case 'c':
2271      ET = poly ? NeonTypeFlags::Poly8 : NeonTypeFlags::Int8;
2272      break;
2273    case 's':
2274      ET = poly ? NeonTypeFlags::Poly16 : NeonTypeFlags::Int16;
2275      break;
2276    case 'i':
2277      ET = NeonTypeFlags::Int32;
2278      break;
2279    case 'l':
2280      ET = poly ? NeonTypeFlags::Poly64 : NeonTypeFlags::Int64;
2281      break;
2282    case 'h':
2283      ET = NeonTypeFlags::Float16;
2284      break;
2285    case 'f':
2286      ET = NeonTypeFlags::Float32;
2287      break;
2288    case 'd':
2289      ET = NeonTypeFlags::Float64;
2290      break;
2291    default:
2292      PrintFatalError("unhandled type!");
2293  }
2294  NeonTypeFlags Flags(ET, usgn, quad && proto[1] != 'g');
2295  return Flags.getFlags();
2296}
2297
2298// We don't check 'a' in this function, because for builtin function the
2299// argument matching to 'a' uses a vector type splatted from a scalar type.
2300static bool ProtoHasScalar(const std::string proto)
2301{
2302  return (proto.find('s') != std::string::npos
2303          || proto.find('z') != std::string::npos
2304          || proto.find('r') != std::string::npos
2305          || proto.find('b') != std::string::npos
2306          || proto.find('$') != std::string::npos
2307          || proto.find('y') != std::string::npos
2308          || proto.find('o') != std::string::npos);
2309}
2310
2311// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
2312static std::string GenBuiltin(const std::string &name, const std::string &proto,
2313                              StringRef typestr, ClassKind ck) {
2314  std::string s;
2315
2316  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
2317  // sret-like argument.
2318  bool sret = IsMultiVecProto(proto[0]);
2319
2320  bool define = UseMacro(proto);
2321
2322  // Check if the prototype has a scalar operand with the type of the vector
2323  // elements.  If not, bitcasting the args will take care of arg checking.
2324  // The actual signedness etc. will be taken care of with special enums.
2325  if (!ProtoHasScalar(proto))
2326    ck = ClassB;
2327
2328  if (proto[0] != 'v') {
2329    std::string ts = TypeString(proto[0], typestr);
2330
2331    if (define) {
2332      if (sret)
2333        s += ts + " r; ";
2334      else
2335        s += "(" + ts + ")";
2336    } else if (sret) {
2337      s += ts + " r; ";
2338    } else {
2339      s += "return (" + ts + ")";
2340    }
2341  }
2342
2343  bool splat = proto.find('a') != std::string::npos;
2344
2345  s += "__builtin_neon_";
2346  if (splat) {
2347    // Call the non-splat builtin: chop off the "_n" suffix from the name.
2348    std::string vname(name, 0, name.size()-2);
2349    s += MangleName(vname, typestr, ck);
2350  } else {
2351    s += MangleName(name, typestr, ck);
2352  }
2353  s += "(";
2354
2355  // Pass the address of the return variable as the first argument to sret-like
2356  // builtins.
2357  if (sret)
2358    s += "&r, ";
2359
2360  char arg = 'a';
2361  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
2362    std::string args = std::string(&arg, 1);
2363
2364    // Use the local temporaries instead of the macro arguments.
2365    args = "__" + args;
2366
2367    bool argQuad = false;
2368    bool argPoly = false;
2369    bool argUsgn = false;
2370    bool argScalar = false;
2371    bool dummy = false;
2372    char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
2373    argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
2374                      dummy, dummy);
2375
2376    // Handle multiple-vector values specially, emitting each subvector as an
2377    // argument to the __builtin.
2378    unsigned NumOfVec = 0;
2379    if (proto[i] >= '2' && proto[i] <= '4') {
2380      NumOfVec = proto[i] - '0';
2381    } else if (proto[i] >= 'B' && proto[i] <= 'D') {
2382      NumOfVec = proto[i] - 'A' + 1;
2383    }
2384
2385    if (NumOfVec > 0) {
2386      // Check if an explicit cast is needed.
2387      if (argType != 'c' || argPoly || argUsgn)
2388        args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
2389
2390      for (unsigned vi = 0, ve = NumOfVec; vi != ve; ++vi) {
2391        s += args + ".val[" + utostr(vi) + "]";
2392        if ((vi + 1) < ve)
2393          s += ", ";
2394      }
2395      if ((i + 1) < e)
2396        s += ", ";
2397
2398      continue;
2399    }
2400
2401    if (splat && (i + 1) == e)
2402      args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
2403
2404    // Check if an explicit cast is needed.
2405    if ((splat || !argScalar) &&
2406        ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
2407      std::string argTypeStr = "c";
2408      if (ck != ClassB)
2409        argTypeStr = argType;
2410      if (argQuad)
2411        argTypeStr = "Q" + argTypeStr;
2412      args = "(" + TypeString('d', argTypeStr) + ")" + args;
2413    }
2414
2415    s += args;
2416    if ((i + 1) < e)
2417      s += ", ";
2418  }
2419
2420  // Extra constant integer to hold type class enum for this function, e.g. s8
2421  if (ck == ClassB)
2422    s += ", " + utostr(GetNeonEnum(proto, typestr));
2423
2424  s += ");";
2425
2426  if (proto[0] != 'v' && sret) {
2427    if (define)
2428      s += " r;";
2429    else
2430      s += " return r;";
2431  }
2432  return s;
2433}
2434
2435static std::string GenBuiltinDef(const std::string &name,
2436                                 const std::string &proto,
2437                                 StringRef typestr, ClassKind ck) {
2438  std::string s("BUILTIN(__builtin_neon_");
2439
2440  // If all types are the same size, bitcasting the args will take care
2441  // of arg checking.  The actual signedness etc. will be taken care of with
2442  // special enums.
2443  if (!ProtoHasScalar(proto))
2444    ck = ClassB;
2445
2446  s += MangleName(name, typestr, ck);
2447  s += ", \"";
2448
2449  for (unsigned i = 0, e = proto.size(); i != e; ++i)
2450    s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
2451
2452  // Extra constant integer to hold type class enum for this function, e.g. s8
2453  if (ck == ClassB)
2454    s += "i";
2455
2456  s += "\", \"n\")";
2457  return s;
2458}
2459
2460static std::string GenIntrinsic(const std::string &name,
2461                                const std::string &proto,
2462                                StringRef outTypeStr, StringRef inTypeStr,
2463                                OpKind kind, ClassKind classKind) {
2464  assert(!proto.empty() && "");
2465  bool define = UseMacro(proto) && kind != OpUnavailable;
2466  std::string s;
2467
2468  // static always inline + return type
2469  if (define)
2470    s += "#define ";
2471  else
2472    s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
2473
2474  // Function name with type suffix
2475  std::string mangledName = MangleName(name, outTypeStr, ClassS);
2476  if (outTypeStr != inTypeStr) {
2477    // If the input type is different (e.g., for vreinterpret), append a suffix
2478    // for the input type.  String off a "Q" (quad) prefix so that MangleName
2479    // does not insert another "q" in the name.
2480    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
2481    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
2482    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
2483  }
2484  s += mangledName;
2485
2486  // Function arguments
2487  s += GenArgs(proto, inTypeStr, name);
2488
2489  // Definition.
2490  if (define) {
2491    s += " __extension__ ({ \\\n  ";
2492    s += GenMacroLocals(proto, inTypeStr, name);
2493  } else if (kind == OpUnavailable) {
2494    s += " __attribute__((unavailable));\n";
2495    return s;
2496  } else
2497    s += " {\n  ";
2498
2499  if (kind != OpNone)
2500    s += GenOpString(name, kind, proto, outTypeStr);
2501  else
2502    s += GenBuiltin(name, proto, outTypeStr, classKind);
2503  if (define)
2504    s += " })";
2505  else
2506    s += " }";
2507  s += "\n";
2508  return s;
2509}
2510
2511/// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
2512/// is comprised of type definitions and function declarations.
2513void NeonEmitter::run(raw_ostream &OS) {
2514  OS <<
2515    "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
2516    "---===\n"
2517    " *\n"
2518    " * Permission is hereby granted, free of charge, to any person obtaining "
2519    "a copy\n"
2520    " * of this software and associated documentation files (the \"Software\"),"
2521    " to deal\n"
2522    " * in the Software without restriction, including without limitation the "
2523    "rights\n"
2524    " * to use, copy, modify, merge, publish, distribute, sublicense, "
2525    "and/or sell\n"
2526    " * copies of the Software, and to permit persons to whom the Software is\n"
2527    " * furnished to do so, subject to the following conditions:\n"
2528    " *\n"
2529    " * The above copyright notice and this permission notice shall be "
2530    "included in\n"
2531    " * all copies or substantial portions of the Software.\n"
2532    " *\n"
2533    " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2534    "EXPRESS OR\n"
2535    " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2536    "MERCHANTABILITY,\n"
2537    " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2538    "SHALL THE\n"
2539    " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2540    "OTHER\n"
2541    " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2542    "ARISING FROM,\n"
2543    " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2544    "DEALINGS IN\n"
2545    " * THE SOFTWARE.\n"
2546    " *\n"
2547    " *===--------------------------------------------------------------------"
2548    "---===\n"
2549    " */\n\n";
2550
2551  OS << "#ifndef __ARM_NEON_H\n";
2552  OS << "#define __ARM_NEON_H\n\n";
2553
2554  OS << "#if !defined(__ARM_NEON__) && !defined(__ARM_NEON)\n";
2555  OS << "#error \"NEON support not enabled\"\n";
2556  OS << "#endif\n\n";
2557
2558  OS << "#include <stdint.h>\n\n";
2559
2560  // Emit NEON-specific scalar typedefs.
2561  OS << "typedef float float32_t;\n";
2562  OS << "typedef __fp16 float16_t;\n";
2563
2564  OS << "#ifdef __aarch64__\n";
2565  OS << "typedef double float64_t;\n";
2566  OS << "#endif\n\n";
2567
2568  // For now, signedness of polynomial types depends on target
2569  OS << "#ifdef __aarch64__\n";
2570  OS << "typedef uint8_t poly8_t;\n";
2571  OS << "typedef uint16_t poly16_t;\n";
2572  OS << "typedef uint64_t poly64_t;\n";
2573  OS << "#else\n";
2574  OS << "typedef int8_t poly8_t;\n";
2575  OS << "typedef int16_t poly16_t;\n";
2576  OS << "#endif\n";
2577
2578  // Emit Neon vector typedefs.
2579  std::string TypedefTypes(
2580      "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
2581  SmallVector<StringRef, 24> TDTypeVec;
2582  ParseTypes(0, TypedefTypes, TDTypeVec);
2583
2584  // Emit vector typedefs.
2585  bool isA64 = false;
2586  bool preinsert;
2587  bool postinsert;
2588  for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2589    bool dummy, quad = false, poly = false;
2590    char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2591    preinsert = false;
2592    postinsert = false;
2593
2594    if (type == 'd' || (type == 'l' && poly)) {
2595      preinsert = isA64? false: true;
2596      isA64 = true;
2597    } else {
2598      postinsert = isA64? true: false;
2599      isA64 = false;
2600    }
2601    if (postinsert)
2602      OS << "#endif\n";
2603    if (preinsert)
2604      OS << "#ifdef __aarch64__\n";
2605
2606    if (poly)
2607      OS << "typedef __attribute__((neon_polyvector_type(";
2608    else
2609      OS << "typedef __attribute__((neon_vector_type(";
2610
2611    unsigned nElts = GetNumElements(TDTypeVec[i], quad);
2612    OS << utostr(nElts) << "))) ";
2613    if (nElts < 10)
2614      OS << " ";
2615
2616    OS << TypeString('s', TDTypeVec[i]);
2617    OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
2618
2619  }
2620  postinsert = isA64? true: false;
2621  if (postinsert)
2622    OS << "#endif\n";
2623  OS << "\n";
2624
2625  // Emit struct typedefs.
2626  isA64 = false;
2627  for (unsigned vi = 2; vi != 5; ++vi) {
2628    for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
2629      bool dummy, quad = false, poly = false;
2630      char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
2631      preinsert = false;
2632      postinsert = false;
2633
2634      if (type == 'd' || (type == 'l' && poly)) {
2635        preinsert = isA64? false: true;
2636        isA64 = true;
2637      } else {
2638        postinsert = isA64? true: false;
2639        isA64 = false;
2640      }
2641      if (postinsert)
2642        OS << "#endif\n";
2643      if (preinsert)
2644        OS << "#ifdef __aarch64__\n";
2645
2646      std::string ts = TypeString('d', TDTypeVec[i]);
2647      std::string vs = TypeString('0' + vi, TDTypeVec[i]);
2648      OS << "typedef struct " << vs << " {\n";
2649      OS << "  " << ts << " val";
2650      OS << "[" << utostr(vi) << "]";
2651      OS << ";\n} ";
2652      OS << vs << ";\n";
2653      OS << "\n";
2654    }
2655  }
2656  postinsert = isA64? true: false;
2657  if (postinsert)
2658    OS << "#endif\n";
2659  OS << "\n";
2660
2661  OS<<"#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n\n";
2662
2663  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
2664
2665  StringMap<ClassKind> EmittedMap;
2666
2667  // Emit vmovl, vmull and vabd intrinsics first so they can be used by other
2668  // intrinsics.  (Some of the saturating multiply instructions are also
2669  // used to implement the corresponding "_lane" variants, but tablegen
2670  // sorts the records into alphabetical order so that the "_lane" variants
2671  // come after the intrinsics they use.)
2672  emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
2673  emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
2674  emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
2675  emitIntrinsic(OS, Records.getDef("VABDL"), EmittedMap);
2676
2677  // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
2678  // common intrinsics appear only once in the output stream.
2679  // The check for uniquiness is done in emitIntrinsic.
2680  // Emit ARM intrinsics.
2681  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2682    Record *R = RV[i];
2683
2684    // Skip AArch64 intrinsics; they will be emitted at the end.
2685    bool isA64 = R->getValueAsBit("isA64");
2686    if (isA64)
2687      continue;
2688
2689    if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
2690        R->getName() != "VABD")
2691      emitIntrinsic(OS, R, EmittedMap);
2692  }
2693
2694  // Emit AArch64-specific intrinsics.
2695  OS << "#ifdef __aarch64__\n";
2696
2697  emitIntrinsic(OS, Records.getDef("VMOVL_HIGH"), EmittedMap);
2698  emitIntrinsic(OS, Records.getDef("VMULL_HIGH"), EmittedMap);
2699  emitIntrinsic(OS, Records.getDef("VABDL_HIGH"), EmittedMap);
2700
2701  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2702    Record *R = RV[i];
2703
2704    // Skip ARM intrinsics already included above.
2705    bool isA64 = R->getValueAsBit("isA64");
2706    if (!isA64)
2707      continue;
2708
2709    // Skip crypto temporarily, and will emit them all together at the end.
2710    bool isCrypto = R->getValueAsBit("isCrypto");
2711    if (isCrypto)
2712      continue;
2713
2714    emitIntrinsic(OS, R, EmittedMap);
2715  }
2716
2717  OS << "#ifdef __ARM_FEATURE_CRYPTO\n";
2718
2719  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2720    Record *R = RV[i];
2721
2722    // Skip crypto temporarily, and will emit them all together at the end.
2723    bool isCrypto = R->getValueAsBit("isCrypto");
2724    if (!isCrypto)
2725      continue;
2726
2727    emitIntrinsic(OS, R, EmittedMap);
2728  }
2729
2730  OS << "#endif\n\n";
2731
2732  OS << "#endif\n\n";
2733
2734  OS << "#undef __ai\n\n";
2735  OS << "#endif /* __ARM_NEON_H */\n";
2736}
2737
2738/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
2739/// intrinsics specified by record R checking for intrinsic uniqueness.
2740void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
2741                                StringMap<ClassKind> &EmittedMap) {
2742  std::string name = R->getValueAsString("Name");
2743  std::string Proto = R->getValueAsString("Prototype");
2744  std::string Types = R->getValueAsString("Types");
2745
2746  SmallVector<StringRef, 16> TypeVec;
2747  ParseTypes(R, Types, TypeVec);
2748
2749  OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
2750
2751  ClassKind classKind = ClassNone;
2752  if (R->getSuperClasses().size() >= 2)
2753    classKind = ClassMap[R->getSuperClasses()[1]];
2754  if (classKind == ClassNone && kind == OpNone)
2755    PrintFatalError(R->getLoc(), "Builtin has no class kind");
2756
2757  for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2758    if (kind == OpReinterpret) {
2759      bool outQuad = false;
2760      bool dummy = false;
2761      (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
2762      for (unsigned srcti = 0, srcte = TypeVec.size();
2763           srcti != srcte; ++srcti) {
2764        bool inQuad = false;
2765        (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
2766        if (srcti == ti || inQuad != outQuad)
2767          continue;
2768        std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
2769                                     OpCast, ClassS);
2770        if (EmittedMap.count(s))
2771          continue;
2772        EmittedMap[s] = ClassS;
2773        OS << s;
2774      }
2775    } else {
2776      std::string s =
2777          GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
2778      if (EmittedMap.count(s))
2779        continue;
2780      EmittedMap[s] = classKind;
2781      OS << s;
2782    }
2783  }
2784  OS << "\n";
2785}
2786
2787static unsigned RangeFromType(const char mod, StringRef typestr) {
2788  // base type to get the type string for.
2789  bool quad = false, dummy = false;
2790  char type = ClassifyType(typestr, quad, dummy, dummy);
2791  type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
2792
2793  switch (type) {
2794    case 'c':
2795      return (8 << (int)quad) - 1;
2796    case 'h':
2797    case 's':
2798      return (4 << (int)quad) - 1;
2799    case 'f':
2800    case 'i':
2801      return (2 << (int)quad) - 1;
2802    case 'd':
2803    case 'l':
2804      return (1 << (int)quad) - 1;
2805    default:
2806      PrintFatalError("unhandled type!");
2807  }
2808}
2809
2810static unsigned RangeScalarShiftImm(const char mod, StringRef typestr) {
2811  // base type to get the type string for.
2812  bool dummy = false;
2813  char type = ClassifyType(typestr, dummy, dummy, dummy);
2814  type = ModType(mod, type, dummy, dummy, dummy, dummy, dummy, dummy);
2815
2816  switch (type) {
2817    case 'c':
2818      return 7;
2819    case 'h':
2820    case 's':
2821      return 15;
2822    case 'f':
2823    case 'i':
2824      return 31;
2825    case 'd':
2826    case 'l':
2827      return 63;
2828    default:
2829      PrintFatalError("unhandled type!");
2830  }
2831}
2832
2833/// Generate the ARM and AArch64 intrinsic range checking code for
2834/// shift/lane immediates, checking for unique declarations.
2835void
2836NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2837                                        StringMap<ClassKind> &A64IntrinsicMap,
2838                                        bool isA64RangeCheck) {
2839  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2840  StringMap<OpKind> EmittedMap;
2841
2842  // Generate the intrinsic range checking code for shift/lane immediates.
2843  if (isA64RangeCheck)
2844    OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
2845  else
2846    OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2847
2848  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
2849    Record *R = RV[i];
2850
2851    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
2852    if (k != OpNone)
2853      continue;
2854
2855    std::string name = R->getValueAsString("Name");
2856    std::string Proto = R->getValueAsString("Prototype");
2857    std::string Types = R->getValueAsString("Types");
2858    std::string Rename = name + "@" + Proto;
2859
2860    // Functions with 'a' (the splat code) in the type prototype should not get
2861    // their own builtin as they use the non-splat variant.
2862    if (Proto.find('a') != std::string::npos)
2863      continue;
2864
2865    // Functions which do not have an immediate do not need to have range
2866    // checking code emitted.
2867    size_t immPos = Proto.find('i');
2868    if (immPos == std::string::npos)
2869      continue;
2870
2871    SmallVector<StringRef, 16> TypeVec;
2872    ParseTypes(R, Types, TypeVec);
2873
2874    if (R->getSuperClasses().size() < 2)
2875      PrintFatalError(R->getLoc(), "Builtin has no class kind");
2876
2877    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
2878    if (!ProtoHasScalar(Proto))
2879      ck = ClassB;
2880
2881    // Do not include AArch64 range checks if not generating code for AArch64.
2882    bool isA64 = R->getValueAsBit("isA64");
2883    if (!isA64RangeCheck && isA64)
2884      continue;
2885
2886    // Include ARM range checks in AArch64 but only if ARM intrinsics are not
2887    // redefined by AArch64 to handle new types.
2888    if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
2889      ClassKind &A64CK = A64IntrinsicMap[Rename];
2890      if (A64CK == ck && ck != ClassNone)
2891        continue;
2892    }
2893
2894    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
2895      std::string namestr, shiftstr, rangestr;
2896
2897      if (R->getValueAsBit("isVCVT_N")) {
2898        // VCVT between floating- and fixed-point values takes an immediate
2899        // in the range [1, 32] for f32, or [1, 64] for f64.
2900        ck = ClassB;
2901        if (name.find("32") != std::string::npos)
2902          rangestr = "l = 1; u = 31"; // upper bound = l + u
2903        else if (name.find("64") != std::string::npos)
2904          rangestr = "l = 1; u = 63";
2905        else
2906          PrintFatalError(R->getLoc(),
2907              "Fixed point convert name should contains \"32\" or \"64\"");
2908
2909      } else if (R->getValueAsBit("isScalarShift")) {
2910        // Right shifts have an 'r' in the name, left shifts do not.  Convert
2911        // instructions have the same bounds and right shifts.
2912        if (name.find('r') != std::string::npos ||
2913            name.find("cvt") != std::string::npos)
2914          rangestr = "l = 1; ";
2915
2916        unsigned upBound = RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti]);
2917        // Narrow shift has half the upper bound
2918        if (R->getValueAsBit("isScalarNarrowShift"))
2919          upBound /= 2;
2920
2921        rangestr += "u = " + utostr(upBound);
2922      } else if (R->getValueAsBit("isShift")) {
2923        // Builtins which are overloaded by type will need to have their upper
2924        // bound computed at Sema time based on the type constant.
2925        shiftstr = ", true";
2926
2927        // Right shifts have an 'r' in the name, left shifts do not.
2928        if (name.find('r') != std::string::npos)
2929          rangestr = "l = 1; ";
2930
2931        rangestr += "u = RFT(TV" + shiftstr + ")";
2932      } else {
2933        // The immediate generally refers to a lane in the preceding argument.
2934        assert(immPos > 0 && "unexpected immediate operand");
2935        rangestr =
2936            "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
2937      }
2938      // Make sure cases appear only once by uniquing them in a string map.
2939      namestr = MangleName(name, TypeVec[ti], ck);
2940      if (EmittedMap.count(namestr))
2941        continue;
2942      EmittedMap[namestr] = OpNone;
2943
2944      // Calculate the index of the immediate that should be range checked.
2945      unsigned immidx = 0;
2946
2947      // Builtins that return a struct of multiple vectors have an extra
2948      // leading arg for the struct return.
2949      if (IsMultiVecProto(Proto[0]))
2950        ++immidx;
2951
2952      // Add one to the index for each argument until we reach the immediate
2953      // to be checked.  Structs of vectors are passed as multiple arguments.
2954      for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
2955        switch (Proto[ii]) {
2956        default:
2957          immidx += 1;
2958          break;
2959        case '2':
2960        case 'B':
2961          immidx += 2;
2962          break;
2963        case '3':
2964        case 'C':
2965          immidx += 3;
2966          break;
2967        case '4':
2968        case 'D':
2969          immidx += 4;
2970          break;
2971        case 'i':
2972          ie = ii + 1;
2973          break;
2974        }
2975      }
2976      if (isA64RangeCheck)
2977        OS << "case AArch64::BI__builtin_neon_";
2978      else
2979        OS << "case ARM::BI__builtin_neon_";
2980      OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
2981         << rangestr << "; break;\n";
2982    }
2983  }
2984  OS << "#endif\n\n";
2985}
2986
2987/// Generate the ARM and AArch64 overloaded type checking code for
2988/// SemaChecking.cpp, checking for unique builtin declarations.
2989void
2990NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2991                                      StringMap<ClassKind> &A64IntrinsicMap,
2992                                      bool isA64TypeCheck) {
2993  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2994  StringMap<OpKind> EmittedMap;
2995
2996  // Generate the overloaded type checking code for SemaChecking.cpp
2997  if (isA64TypeCheck)
2998    OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
2999  else
3000    OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
3001
3002  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3003    Record *R = RV[i];
3004    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
3005    if (k != OpNone)
3006      continue;
3007
3008    std::string Proto = R->getValueAsString("Prototype");
3009    std::string Types = R->getValueAsString("Types");
3010    std::string name = R->getValueAsString("Name");
3011    std::string Rename = name + "@" + Proto;
3012
3013    // Functions with 'a' (the splat code) in the type prototype should not get
3014    // their own builtin as they use the non-splat variant.
3015    if (Proto.find('a') != std::string::npos)
3016      continue;
3017
3018    // Functions which have a scalar argument cannot be overloaded, no need to
3019    // check them if we are emitting the type checking code.
3020    if (ProtoHasScalar(Proto))
3021      continue;
3022
3023    SmallVector<StringRef, 16> TypeVec;
3024    ParseTypes(R, Types, TypeVec);
3025
3026    if (R->getSuperClasses().size() < 2)
3027      PrintFatalError(R->getLoc(), "Builtin has no class kind");
3028
3029    // Do not include AArch64 type checks if not generating code for AArch64.
3030    bool isA64 = R->getValueAsBit("isA64");
3031    if (!isA64TypeCheck && isA64)
3032      continue;
3033
3034    // Include ARM  type check in AArch64 but only if ARM intrinsics
3035    // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
3036    // redefined in AArch64 to handle an additional 2 x f64 type.
3037    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3038    if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(Rename)) {
3039      ClassKind &A64CK = A64IntrinsicMap[Rename];
3040      if (A64CK == ck && ck != ClassNone)
3041        continue;
3042    }
3043
3044    int si = -1, qi = -1;
3045    uint64_t mask = 0, qmask = 0;
3046    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3047      // Generate the switch case(s) for this builtin for the type validation.
3048      bool quad = false, poly = false, usgn = false;
3049      (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
3050
3051      if (quad) {
3052        qi = ti;
3053        qmask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
3054      } else {
3055        si = ti;
3056        mask |= 1ULL << GetNeonEnum(Proto, TypeVec[ti]);
3057      }
3058    }
3059
3060    // Check if the builtin function has a pointer or const pointer argument.
3061    int PtrArgNum = -1;
3062    bool HasConstPtr = false;
3063    for (unsigned arg = 1, arge = Proto.size(); arg != arge; ++arg) {
3064      char ArgType = Proto[arg];
3065      if (ArgType == 'c') {
3066        HasConstPtr = true;
3067        PtrArgNum = arg - 1;
3068        break;
3069      }
3070      if (ArgType == 'p') {
3071        PtrArgNum = arg - 1;
3072        break;
3073      }
3074    }
3075    // For sret builtins, adjust the pointer argument index.
3076    if (PtrArgNum >= 0 && IsMultiVecProto(Proto[0]))
3077      PtrArgNum += 1;
3078
3079    // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
3080    // and vst1_lane intrinsics.  Using a pointer to the vector element
3081    // type with one of those operations causes codegen to select an aligned
3082    // load/store instruction.  If you want an unaligned operation,
3083    // the pointer argument needs to have less alignment than element type,
3084    // so just accept any pointer type.
3085    if (name == "vld1_lane" || name == "vld1_dup" || name == "vst1_lane") {
3086      PtrArgNum = -1;
3087      HasConstPtr = false;
3088    }
3089
3090    if (mask) {
3091      if (isA64TypeCheck)
3092        OS << "case AArch64::BI__builtin_neon_";
3093      else
3094        OS << "case ARM::BI__builtin_neon_";
3095      OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
3096         << "0x" << utohexstr(mask) << "ULL";
3097      if (PtrArgNum >= 0)
3098        OS << "; PtrArgNum = " << PtrArgNum;
3099      if (HasConstPtr)
3100        OS << "; HasConstPtr = true";
3101      OS << "; break;\n";
3102    }
3103    if (qmask) {
3104      if (isA64TypeCheck)
3105        OS << "case AArch64::BI__builtin_neon_";
3106      else
3107        OS << "case ARM::BI__builtin_neon_";
3108      OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
3109         << "0x" << utohexstr(qmask) << "ULL";
3110      if (PtrArgNum >= 0)
3111        OS << "; PtrArgNum = " << PtrArgNum;
3112      if (HasConstPtr)
3113        OS << "; HasConstPtr = true";
3114      OS << "; break;\n";
3115    }
3116  }
3117  OS << "#endif\n\n";
3118}
3119
3120/// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
3121/// declaration of builtins, checking for unique builtin declarations.
3122void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
3123                                 StringMap<ClassKind> &A64IntrinsicMap,
3124                                 bool isA64GenBuiltinDef) {
3125  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3126  StringMap<OpKind> EmittedMap;
3127
3128  // Generate BuiltinsARM.def and BuiltinsAArch64.def
3129  if (isA64GenBuiltinDef)
3130    OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
3131  else
3132    OS << "#ifdef GET_NEON_BUILTINS\n";
3133
3134  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3135    Record *R = RV[i];
3136    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
3137    if (k != OpNone)
3138      continue;
3139
3140    std::string Proto = R->getValueAsString("Prototype");
3141    std::string name = R->getValueAsString("Name");
3142    std::string Rename = name + "@" + Proto;
3143
3144    // Functions with 'a' (the splat code) in the type prototype should not get
3145    // their own builtin as they use the non-splat variant.
3146    if (Proto.find('a') != std::string::npos)
3147      continue;
3148
3149    std::string Types = R->getValueAsString("Types");
3150    SmallVector<StringRef, 16> TypeVec;
3151    ParseTypes(R, Types, TypeVec);
3152
3153    if (R->getSuperClasses().size() < 2)
3154      PrintFatalError(R->getLoc(), "Builtin has no class kind");
3155
3156    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3157
3158    // Do not include AArch64 BUILTIN() macros if not generating
3159    // code for AArch64
3160    bool isA64 = R->getValueAsBit("isA64");
3161    if (!isA64GenBuiltinDef && isA64)
3162      continue;
3163
3164    // Include ARM  BUILTIN() macros  in AArch64 but only if ARM intrinsics
3165    // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
3166    // redefined in AArch64 to handle an additional 2 x f64 type.
3167    if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(Rename)) {
3168      ClassKind &A64CK = A64IntrinsicMap[Rename];
3169      if (A64CK == ck && ck != ClassNone)
3170        continue;
3171    }
3172
3173    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3174      // Generate the declaration for this builtin, ensuring
3175      // that each unique BUILTIN() macro appears only once in the output
3176      // stream.
3177      std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
3178      if (EmittedMap.count(bd))
3179        continue;
3180
3181      EmittedMap[bd] = OpNone;
3182      OS << bd << "\n";
3183    }
3184  }
3185  OS << "#endif\n\n";
3186}
3187
3188/// runHeader - Emit a file with sections defining:
3189/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
3190/// 2. the SemaChecking code for the type overload checking.
3191/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
3192void NeonEmitter::runHeader(raw_ostream &OS) {
3193  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3194
3195  // build a map of AArch64 intriniscs to be used in uniqueness checks.
3196  StringMap<ClassKind> A64IntrinsicMap;
3197  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3198    Record *R = RV[i];
3199
3200    bool isA64 = R->getValueAsBit("isA64");
3201    if (!isA64)
3202      continue;
3203
3204    ClassKind CK = ClassNone;
3205    if (R->getSuperClasses().size() >= 2)
3206      CK = ClassMap[R->getSuperClasses()[1]];
3207
3208    std::string Name = R->getValueAsString("Name");
3209    std::string Proto = R->getValueAsString("Prototype");
3210    std::string Rename = Name + "@" + Proto;
3211    if (A64IntrinsicMap.count(Rename))
3212      continue;
3213    A64IntrinsicMap[Rename] = CK;
3214  }
3215
3216  // Generate BuiltinsARM.def for ARM
3217  genBuiltinsDef(OS, A64IntrinsicMap, false);
3218
3219  // Generate BuiltinsAArch64.def for AArch64
3220  genBuiltinsDef(OS, A64IntrinsicMap, true);
3221
3222  // Generate ARM overloaded type checking code for SemaChecking.cpp
3223  genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
3224
3225  // Generate AArch64 overloaded type checking code for SemaChecking.cpp
3226  genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
3227
3228  // Generate ARM range checking code for shift/lane immediates.
3229  genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
3230
3231  // Generate the AArch64 range checking code for shift/lane immediates.
3232  genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
3233}
3234
3235/// GenTest - Write out a test for the intrinsic specified by the name and
3236/// type strings, including the embedded patterns for FileCheck to match.
3237static std::string GenTest(const std::string &name,
3238                           const std::string &proto,
3239                           StringRef outTypeStr, StringRef inTypeStr,
3240                           bool isShift, bool isHiddenLOp,
3241                           ClassKind ck, const std::string &InstName,
3242                           bool isA64,
3243                           std::string & testFuncProto) {
3244  assert(!proto.empty() && "");
3245  std::string s;
3246
3247  // Function name with type suffix
3248  std::string mangledName = MangleName(name, outTypeStr, ClassS);
3249  if (outTypeStr != inTypeStr) {
3250    // If the input type is different (e.g., for vreinterpret), append a suffix
3251    // for the input type.  String off a "Q" (quad) prefix so that MangleName
3252    // does not insert another "q" in the name.
3253    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
3254    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
3255    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
3256  }
3257
3258  // todo: GenerateChecksForIntrinsic does not generate CHECK
3259  // for aarch64 instructions yet
3260  std::vector<std::string> FileCheckPatterns;
3261  if (!isA64) {
3262	GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
3263							   isHiddenLOp, FileCheckPatterns);
3264	s+= "// CHECK_ARM: test_" + mangledName + "\n";
3265  }
3266  s += "// CHECK_AARCH64: test_" + mangledName + "\n";
3267
3268  // Emit the FileCheck patterns.
3269  // If for any reason we do not want to emit a check, mangledInst
3270  // will be the empty string.
3271  if (FileCheckPatterns.size()) {
3272    for (std::vector<std::string>::const_iterator i = FileCheckPatterns.begin(),
3273                                                  e = FileCheckPatterns.end();
3274         i != e;
3275         ++i) {
3276      s += "// CHECK_ARM: " + *i + "\n";
3277    }
3278  }
3279
3280  // Emit the start of the test function.
3281
3282  testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
3283  char arg = 'a';
3284  std::string comma;
3285  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3286    // Do not create arguments for values that must be immediate constants.
3287    if (proto[i] == 'i')
3288      continue;
3289    testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
3290    testFuncProto.push_back(arg);
3291    comma = ", ";
3292  }
3293  testFuncProto += ")";
3294
3295  s+= testFuncProto;
3296  s+= " {\n  ";
3297
3298  if (proto[0] != 'v')
3299    s += "return ";
3300  s += mangledName + "(";
3301  arg = 'a';
3302  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
3303    if (proto[i] == 'i') {
3304      // For immediate operands, test the maximum value.
3305      if (isShift)
3306        s += "1"; // FIXME
3307      else
3308        // The immediate generally refers to a lane in the preceding argument.
3309        s += utostr(RangeFromType(proto[i-1], inTypeStr));
3310    } else {
3311      s.push_back(arg);
3312    }
3313    if ((i + 1) < e)
3314      s += ", ";
3315  }
3316  s += ");\n}\n\n";
3317  return s;
3318}
3319
3320/// Write out all intrinsic tests for the specified target, checking
3321/// for intrinsic test uniqueness.
3322void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
3323                                bool isA64GenTest) {
3324  if (isA64GenTest)
3325	OS << "#ifdef __aarch64__\n";
3326
3327  std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
3328  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
3329    Record *R = RV[i];
3330    std::string name = R->getValueAsString("Name");
3331    std::string Proto = R->getValueAsString("Prototype");
3332    std::string Types = R->getValueAsString("Types");
3333    bool isShift = R->getValueAsBit("isShift");
3334    std::string InstName = R->getValueAsString("InstName");
3335    bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
3336    bool isA64 = R->getValueAsBit("isA64");
3337
3338    // do not include AArch64 intrinsic test if not generating
3339    // code for AArch64
3340    if (!isA64GenTest && isA64)
3341      continue;
3342
3343    SmallVector<StringRef, 16> TypeVec;
3344    ParseTypes(R, Types, TypeVec);
3345
3346    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
3347    OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
3348    if (kind == OpUnavailable)
3349      continue;
3350    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
3351      if (kind == OpReinterpret) {
3352        bool outQuad = false;
3353        bool dummy = false;
3354        (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
3355        for (unsigned srcti = 0, srcte = TypeVec.size();
3356             srcti != srcte; ++srcti) {
3357          bool inQuad = false;
3358          (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
3359          if (srcti == ti || inQuad != outQuad)
3360            continue;
3361		  std::string testFuncProto;
3362          std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
3363                                  isShift, isHiddenLOp, ck, InstName, isA64,
3364								  testFuncProto);
3365          if (EmittedMap.count(testFuncProto))
3366            continue;
3367          EmittedMap[testFuncProto] = kind;
3368          OS << s << "\n";
3369        }
3370      } else {
3371		std::string testFuncProto;
3372        std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
3373                                isHiddenLOp, ck, InstName, isA64, testFuncProto);
3374        if (EmittedMap.count(testFuncProto))
3375          continue;
3376        EmittedMap[testFuncProto] = kind;
3377        OS << s << "\n";
3378      }
3379    }
3380  }
3381
3382  if (isA64GenTest)
3383	OS << "#endif\n";
3384}
3385/// runTests - Write out a complete set of tests for all of the Neon
3386/// intrinsics.
3387void NeonEmitter::runTests(raw_ostream &OS) {
3388  OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
3389        "apcs-gnu\\\n"
3390        "// RUN:  -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
3391        "// RUN:  | FileCheck %s -check-prefix=CHECK_ARM\n"
3392		"\n"
3393	    "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
3394	    "// RUN -target-feature +neon  -ffreestanding -S -o - %s \\\n"
3395	    "// RUN:  | FileCheck %s -check-prefix=CHECK_AARCH64\n"
3396        "\n"
3397        "// REQUIRES: long_tests\n"
3398        "\n"
3399        "#include <arm_neon.h>\n"
3400        "\n";
3401
3402  // ARM tests must be emitted before AArch64 tests to ensure
3403  // tests for intrinsics that are common to ARM and AArch64
3404  // appear only once in the output stream.
3405  // The check for uniqueness is done in genTargetTest.
3406  StringMap<OpKind> EmittedMap;
3407
3408  genTargetTest(OS, EmittedMap, false);
3409
3410  genTargetTest(OS, EmittedMap, true);
3411}
3412
3413namespace clang {
3414void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
3415  NeonEmitter(Records).run(OS);
3416}
3417void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
3418  NeonEmitter(Records).runHeader(OS);
3419}
3420void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
3421  NeonEmitter(Records).runTests(OS);
3422}
3423} // End namespace clang
3424