1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/ADT/StringSwitch.h"
18#include "llvm/BinaryFormat/Dwarf.h"
19#include "llvm/IR/AttributeMask.h"
20#include "llvm/IR/Constants.h"
21#include "llvm/IR/DebugInfo.h"
22#include "llvm/IR/DebugInfoMetadata.h"
23#include "llvm/IR/DiagnosticInfo.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/InstVisitor.h"
27#include "llvm/IR/Instruction.h"
28#include "llvm/IR/IntrinsicInst.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/IntrinsicsNVPTX.h"
33#include "llvm/IR/IntrinsicsRISCV.h"
34#include "llvm/IR/IntrinsicsWebAssembly.h"
35#include "llvm/IR/IntrinsicsX86.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Metadata.h"
38#include "llvm/IR/Module.h"
39#include "llvm/IR/Verifier.h"
40#include "llvm/Support/CommandLine.h"
41#include "llvm/Support/ErrorHandling.h"
42#include "llvm/Support/Regex.h"
43#include "llvm/TargetParser/Triple.h"
44#include <cstring>
45
46using namespace llvm;
47
48static cl::opt<bool>
49    DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
50                                cl::desc("Disable autoupgrade of debug info"));
51
52static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
53
54// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
55// changed their type from v4f32 to v2i64.
56static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID,
57                                  Function *&NewFn) {
58  // Check whether this is an old version of the function, which received
59  // v4f32 arguments.
60  Type *Arg0Type = F->getFunctionType()->getParamType(0);
61  if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
62    return false;
63
64  // Yes, it's old, replace it with new version.
65  rename(F);
66  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
67  return true;
68}
69
70// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
71// arguments have changed their type from i32 to i8.
72static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
73                                             Function *&NewFn) {
74  // Check that the last argument is an i32.
75  Type *LastArgType = F->getFunctionType()->getParamType(
76     F->getFunctionType()->getNumParams() - 1);
77  if (!LastArgType->isIntegerTy(32))
78    return false;
79
80  // Move this function aside and map down.
81  rename(F);
82  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
83  return true;
84}
85
86// Upgrade the declaration of fp compare intrinsics that change return type
87// from scalar to vXi1 mask.
88static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
89                                      Function *&NewFn) {
90  // Check if the return type is a vector.
91  if (F->getReturnType()->isVectorTy())
92    return false;
93
94  rename(F);
95  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
96  return true;
97}
98
99static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
100                                    Function *&NewFn) {
101  if (F->getReturnType()->getScalarType()->isBFloatTy())
102    return false;
103
104  rename(F);
105  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
106  return true;
107}
108
109static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
110                                      Function *&NewFn) {
111  if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
112    return false;
113
114  rename(F);
115  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
116  return true;
117}
118
119static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
120  // All of the intrinsics matches below should be marked with which llvm
121  // version started autoupgrading them. At some point in the future we would
122  // like to use this information to remove upgrade code for some older
123  // intrinsics. It is currently undecided how we will determine that future
124  // point.
125  if (Name.consume_front("avx."))
126    return (Name.starts_with("blend.p") ||        // Added in 3.7
127            Name == "cvt.ps2.pd.256" ||           // Added in 3.9
128            Name == "cvtdq2.pd.256" ||            // Added in 3.9
129            Name == "cvtdq2.ps.256" ||            // Added in 7.0
130            Name.starts_with("movnt.") ||         // Added in 3.2
131            Name.starts_with("sqrt.p") ||         // Added in 7.0
132            Name.starts_with("storeu.") ||        // Added in 3.9
133            Name.starts_with("vbroadcast.s") ||   // Added in 3.5
134            Name.starts_with("vbroadcastf128") || // Added in 4.0
135            Name.starts_with("vextractf128.") ||  // Added in 3.7
136            Name.starts_with("vinsertf128.") ||   // Added in 3.7
137            Name.starts_with("vperm2f128.") ||    // Added in 6.0
138            Name.starts_with("vpermil."));        // Added in 3.1
139
140  if (Name.consume_front("avx2."))
141    return (Name == "movntdqa" ||             // Added in 5.0
142            Name.starts_with("pabs.") ||      // Added in 6.0
143            Name.starts_with("padds.") ||     // Added in 8.0
144            Name.starts_with("paddus.") ||    // Added in 8.0
145            Name.starts_with("pblendd.") ||   // Added in 3.7
146            Name == "pblendw" ||              // Added in 3.7
147            Name.starts_with("pbroadcast") || // Added in 3.8
148            Name.starts_with("pcmpeq.") ||    // Added in 3.1
149            Name.starts_with("pcmpgt.") ||    // Added in 3.1
150            Name.starts_with("pmax") ||       // Added in 3.9
151            Name.starts_with("pmin") ||       // Added in 3.9
152            Name.starts_with("pmovsx") ||     // Added in 3.9
153            Name.starts_with("pmovzx") ||     // Added in 3.9
154            Name == "pmul.dq" ||              // Added in 7.0
155            Name == "pmulu.dq" ||             // Added in 7.0
156            Name.starts_with("psll.dq") ||    // Added in 3.7
157            Name.starts_with("psrl.dq") ||    // Added in 3.7
158            Name.starts_with("psubs.") ||     // Added in 8.0
159            Name.starts_with("psubus.") ||    // Added in 8.0
160            Name.starts_with("vbroadcast") || // Added in 3.8
161            Name == "vbroadcasti128" ||       // Added in 3.7
162            Name == "vextracti128" ||         // Added in 3.7
163            Name == "vinserti128" ||          // Added in 3.7
164            Name == "vperm2i128");            // Added in 6.0
165
166  if (Name.consume_front("avx512.")) {
167    if (Name.consume_front("mask."))
168      // 'avx512.mask.*'
169      return (Name.starts_with("add.p") ||       // Added in 7.0. 128/256 in 4.0
170              Name.starts_with("and.") ||        // Added in 3.9
171              Name.starts_with("andn.") ||       // Added in 3.9
172              Name.starts_with("broadcast.s") || // Added in 3.9
173              Name.starts_with("broadcastf32x4.") || // Added in 6.0
174              Name.starts_with("broadcastf32x8.") || // Added in 6.0
175              Name.starts_with("broadcastf64x2.") || // Added in 6.0
176              Name.starts_with("broadcastf64x4.") || // Added in 6.0
177              Name.starts_with("broadcasti32x4.") || // Added in 6.0
178              Name.starts_with("broadcasti32x8.") || // Added in 6.0
179              Name.starts_with("broadcasti64x2.") || // Added in 6.0
180              Name.starts_with("broadcasti64x4.") || // Added in 6.0
181              Name.starts_with("cmp.b") ||           // Added in 5.0
182              Name.starts_with("cmp.d") ||           // Added in 5.0
183              Name.starts_with("cmp.q") ||           // Added in 5.0
184              Name.starts_with("cmp.w") ||           // Added in 5.0
185              Name.starts_with("compress.b") ||      // Added in 9.0
186              Name.starts_with("compress.d") ||      // Added in 9.0
187              Name.starts_with("compress.p") ||      // Added in 9.0
188              Name.starts_with("compress.q") ||      // Added in 9.0
189              Name.starts_with("compress.store.") || // Added in 7.0
190              Name.starts_with("compress.w") ||      // Added in 9.0
191              Name.starts_with("conflict.") ||       // Added in 9.0
192              Name.starts_with("cvtdq2pd.") ||       // Added in 4.0
193              Name.starts_with("cvtdq2ps.") ||       // Added in 7.0 updated 9.0
194              Name == "cvtpd2dq.256" ||              // Added in 7.0
195              Name == "cvtpd2ps.256" ||              // Added in 7.0
196              Name == "cvtps2pd.128" ||              // Added in 7.0
197              Name == "cvtps2pd.256" ||              // Added in 7.0
198              Name.starts_with("cvtqq2pd.") ||       // Added in 7.0 updated 9.0
199              Name == "cvtqq2ps.256" ||              // Added in 9.0
200              Name == "cvtqq2ps.512" ||              // Added in 9.0
201              Name == "cvttpd2dq.256" ||             // Added in 7.0
202              Name == "cvttps2dq.128" ||             // Added in 7.0
203              Name == "cvttps2dq.256" ||             // Added in 7.0
204              Name.starts_with("cvtudq2pd.") ||      // Added in 4.0
205              Name.starts_with("cvtudq2ps.") ||      // Added in 7.0 updated 9.0
206              Name.starts_with("cvtuqq2pd.") ||      // Added in 7.0 updated 9.0
207              Name == "cvtuqq2ps.256" ||             // Added in 9.0
208              Name == "cvtuqq2ps.512" ||             // Added in 9.0
209              Name.starts_with("dbpsadbw.") ||       // Added in 7.0
210              Name.starts_with("div.p") ||    // Added in 7.0. 128/256 in 4.0
211              Name.starts_with("expand.b") || // Added in 9.0
212              Name.starts_with("expand.d") || // Added in 9.0
213              Name.starts_with("expand.load.") || // Added in 7.0
214              Name.starts_with("expand.p") ||     // Added in 9.0
215              Name.starts_with("expand.q") ||     // Added in 9.0
216              Name.starts_with("expand.w") ||     // Added in 9.0
217              Name.starts_with("fpclass.p") ||    // Added in 7.0
218              Name.starts_with("insert") ||       // Added in 4.0
219              Name.starts_with("load.") ||        // Added in 3.9
220              Name.starts_with("loadu.") ||       // Added in 3.9
221              Name.starts_with("lzcnt.") ||       // Added in 5.0
222              Name.starts_with("max.p") ||       // Added in 7.0. 128/256 in 5.0
223              Name.starts_with("min.p") ||       // Added in 7.0. 128/256 in 5.0
224              Name.starts_with("movddup") ||     // Added in 3.9
225              Name.starts_with("move.s") ||      // Added in 4.0
226              Name.starts_with("movshdup") ||    // Added in 3.9
227              Name.starts_with("movsldup") ||    // Added in 3.9
228              Name.starts_with("mul.p") ||       // Added in 7.0. 128/256 in 4.0
229              Name.starts_with("or.") ||         // Added in 3.9
230              Name.starts_with("pabs.") ||       // Added in 6.0
231              Name.starts_with("packssdw.") ||   // Added in 5.0
232              Name.starts_with("packsswb.") ||   // Added in 5.0
233              Name.starts_with("packusdw.") ||   // Added in 5.0
234              Name.starts_with("packuswb.") ||   // Added in 5.0
235              Name.starts_with("padd.") ||       // Added in 4.0
236              Name.starts_with("padds.") ||      // Added in 8.0
237              Name.starts_with("paddus.") ||     // Added in 8.0
238              Name.starts_with("palignr.") ||    // Added in 3.9
239              Name.starts_with("pand.") ||       // Added in 3.9
240              Name.starts_with("pandn.") ||      // Added in 3.9
241              Name.starts_with("pavg") ||        // Added in 6.0
242              Name.starts_with("pbroadcast") ||  // Added in 6.0
243              Name.starts_with("pcmpeq.") ||     // Added in 3.9
244              Name.starts_with("pcmpgt.") ||     // Added in 3.9
245              Name.starts_with("perm.df.") ||    // Added in 3.9
246              Name.starts_with("perm.di.") ||    // Added in 3.9
247              Name.starts_with("permvar.") ||    // Added in 7.0
248              Name.starts_with("pmaddubs.w.") || // Added in 7.0
249              Name.starts_with("pmaddw.d.") ||   // Added in 7.0
250              Name.starts_with("pmax") ||        // Added in 4.0
251              Name.starts_with("pmin") ||        // Added in 4.0
252              Name == "pmov.qd.256" ||           // Added in 9.0
253              Name == "pmov.qd.512" ||           // Added in 9.0
254              Name == "pmov.wb.256" ||           // Added in 9.0
255              Name == "pmov.wb.512" ||           // Added in 9.0
256              Name.starts_with("pmovsx") ||      // Added in 4.0
257              Name.starts_with("pmovzx") ||      // Added in 4.0
258              Name.starts_with("pmul.dq.") ||    // Added in 4.0
259              Name.starts_with("pmul.hr.sw.") || // Added in 7.0
260              Name.starts_with("pmulh.w.") ||    // Added in 7.0
261              Name.starts_with("pmulhu.w.") ||   // Added in 7.0
262              Name.starts_with("pmull.") ||      // Added in 4.0
263              Name.starts_with("pmultishift.qb.") || // Added in 8.0
264              Name.starts_with("pmulu.dq.") ||       // Added in 4.0
265              Name.starts_with("por.") ||            // Added in 3.9
266              Name.starts_with("prol.") ||           // Added in 8.0
267              Name.starts_with("prolv.") ||          // Added in 8.0
268              Name.starts_with("pror.") ||           // Added in 8.0
269              Name.starts_with("prorv.") ||          // Added in 8.0
270              Name.starts_with("pshuf.b.") ||        // Added in 4.0
271              Name.starts_with("pshuf.d.") ||        // Added in 3.9
272              Name.starts_with("pshufh.w.") ||       // Added in 3.9
273              Name.starts_with("pshufl.w.") ||       // Added in 3.9
274              Name.starts_with("psll.d") ||          // Added in 4.0
275              Name.starts_with("psll.q") ||          // Added in 4.0
276              Name.starts_with("psll.w") ||          // Added in 4.0
277              Name.starts_with("pslli") ||           // Added in 4.0
278              Name.starts_with("psllv") ||           // Added in 4.0
279              Name.starts_with("psra.d") ||          // Added in 4.0
280              Name.starts_with("psra.q") ||          // Added in 4.0
281              Name.starts_with("psra.w") ||          // Added in 4.0
282              Name.starts_with("psrai") ||           // Added in 4.0
283              Name.starts_with("psrav") ||           // Added in 4.0
284              Name.starts_with("psrl.d") ||          // Added in 4.0
285              Name.starts_with("psrl.q") ||          // Added in 4.0
286              Name.starts_with("psrl.w") ||          // Added in 4.0
287              Name.starts_with("psrli") ||           // Added in 4.0
288              Name.starts_with("psrlv") ||           // Added in 4.0
289              Name.starts_with("psub.") ||           // Added in 4.0
290              Name.starts_with("psubs.") ||          // Added in 8.0
291              Name.starts_with("psubus.") ||         // Added in 8.0
292              Name.starts_with("pternlog.") ||       // Added in 7.0
293              Name.starts_with("punpckh") ||         // Added in 3.9
294              Name.starts_with("punpckl") ||         // Added in 3.9
295              Name.starts_with("pxor.") ||           // Added in 3.9
296              Name.starts_with("shuf.f") ||          // Added in 6.0
297              Name.starts_with("shuf.i") ||          // Added in 6.0
298              Name.starts_with("shuf.p") ||          // Added in 4.0
299              Name.starts_with("sqrt.p") ||          // Added in 7.0
300              Name.starts_with("store.b.") ||        // Added in 3.9
301              Name.starts_with("store.d.") ||        // Added in 3.9
302              Name.starts_with("store.p") ||         // Added in 3.9
303              Name.starts_with("store.q.") ||        // Added in 3.9
304              Name.starts_with("store.w.") ||        // Added in 3.9
305              Name == "store.ss" ||                  // Added in 7.0
306              Name.starts_with("storeu.") ||         // Added in 3.9
307              Name.starts_with("sub.p") ||       // Added in 7.0. 128/256 in 4.0
308              Name.starts_with("ucmp.") ||       // Added in 5.0
309              Name.starts_with("unpckh.") ||     // Added in 3.9
310              Name.starts_with("unpckl.") ||     // Added in 3.9
311              Name.starts_with("valign.") ||     // Added in 4.0
312              Name == "vcvtph2ps.128" ||         // Added in 11.0
313              Name == "vcvtph2ps.256" ||         // Added in 11.0
314              Name.starts_with("vextract") ||    // Added in 4.0
315              Name.starts_with("vfmadd.") ||     // Added in 7.0
316              Name.starts_with("vfmaddsub.") ||  // Added in 7.0
317              Name.starts_with("vfnmadd.") ||    // Added in 7.0
318              Name.starts_with("vfnmsub.") ||    // Added in 7.0
319              Name.starts_with("vpdpbusd.") ||   // Added in 7.0
320              Name.starts_with("vpdpbusds.") ||  // Added in 7.0
321              Name.starts_with("vpdpwssd.") ||   // Added in 7.0
322              Name.starts_with("vpdpwssds.") ||  // Added in 7.0
323              Name.starts_with("vpermi2var.") || // Added in 7.0
324              Name.starts_with("vpermil.p") ||   // Added in 3.9
325              Name.starts_with("vpermilvar.") || // Added in 4.0
326              Name.starts_with("vpermt2var.") || // Added in 7.0
327              Name.starts_with("vpmadd52") ||    // Added in 7.0
328              Name.starts_with("vpshld.") ||     // Added in 7.0
329              Name.starts_with("vpshldv.") ||    // Added in 8.0
330              Name.starts_with("vpshrd.") ||     // Added in 7.0
331              Name.starts_with("vpshrdv.") ||    // Added in 8.0
332              Name.starts_with("vpshufbitqmb.") || // Added in 8.0
333              Name.starts_with("xor."));           // Added in 3.9
334
335    if (Name.consume_front("mask3."))
336      // 'avx512.mask3.*'
337      return (Name.starts_with("vfmadd.") ||    // Added in 7.0
338              Name.starts_with("vfmaddsub.") || // Added in 7.0
339              Name.starts_with("vfmsub.") ||    // Added in 7.0
340              Name.starts_with("vfmsubadd.") || // Added in 7.0
341              Name.starts_with("vfnmsub."));    // Added in 7.0
342
343    if (Name.consume_front("maskz."))
344      // 'avx512.maskz.*'
345      return (Name.starts_with("pternlog.") ||   // Added in 7.0
346              Name.starts_with("vfmadd.") ||     // Added in 7.0
347              Name.starts_with("vfmaddsub.") ||  // Added in 7.0
348              Name.starts_with("vpdpbusd.") ||   // Added in 7.0
349              Name.starts_with("vpdpbusds.") ||  // Added in 7.0
350              Name.starts_with("vpdpwssd.") ||   // Added in 7.0
351              Name.starts_with("vpdpwssds.") ||  // Added in 7.0
352              Name.starts_with("vpermt2var.") || // Added in 7.0
353              Name.starts_with("vpmadd52") ||    // Added in 7.0
354              Name.starts_with("vpshldv.") ||    // Added in 8.0
355              Name.starts_with("vpshrdv."));     // Added in 8.0
356
357    // 'avx512.*'
358    return (Name == "movntdqa" ||               // Added in 5.0
359            Name == "pmul.dq.512" ||            // Added in 7.0
360            Name == "pmulu.dq.512" ||           // Added in 7.0
361            Name.starts_with("broadcastm") ||   // Added in 6.0
362            Name.starts_with("cmp.p") ||        // Added in 12.0
363            Name.starts_with("cvtb2mask.") ||   // Added in 7.0
364            Name.starts_with("cvtd2mask.") ||   // Added in 7.0
365            Name.starts_with("cvtmask2") ||     // Added in 5.0
366            Name.starts_with("cvtq2mask.") ||   // Added in 7.0
367            Name == "cvtusi2sd" ||              // Added in 7.0
368            Name.starts_with("cvtw2mask.") ||   // Added in 7.0
369            Name == "kand.w" ||                 // Added in 7.0
370            Name == "kandn.w" ||                // Added in 7.0
371            Name == "knot.w" ||                 // Added in 7.0
372            Name == "kor.w" ||                  // Added in 7.0
373            Name == "kortestc.w" ||             // Added in 7.0
374            Name == "kortestz.w" ||             // Added in 7.0
375            Name.starts_with("kunpck") ||       // added in 6.0
376            Name == "kxnor.w" ||                // Added in 7.0
377            Name == "kxor.w" ||                 // Added in 7.0
378            Name.starts_with("padds.") ||       // Added in 8.0
379            Name.starts_with("pbroadcast") ||   // Added in 3.9
380            Name.starts_with("prol") ||         // Added in 8.0
381            Name.starts_with("pror") ||         // Added in 8.0
382            Name.starts_with("psll.dq") ||      // Added in 3.9
383            Name.starts_with("psrl.dq") ||      // Added in 3.9
384            Name.starts_with("psubs.") ||       // Added in 8.0
385            Name.starts_with("ptestm") ||       // Added in 6.0
386            Name.starts_with("ptestnm") ||      // Added in 6.0
387            Name.starts_with("storent.") ||     // Added in 3.9
388            Name.starts_with("vbroadcast.s") || // Added in 7.0
389            Name.starts_with("vpshld.") ||      // Added in 8.0
390            Name.starts_with("vpshrd."));       // Added in 8.0
391  }
392
393  if (Name.consume_front("fma."))
394    return (Name.starts_with("vfmadd.") ||    // Added in 7.0
395            Name.starts_with("vfmsub.") ||    // Added in 7.0
396            Name.starts_with("vfmsubadd.") || // Added in 7.0
397            Name.starts_with("vfnmadd.") ||   // Added in 7.0
398            Name.starts_with("vfnmsub."));    // Added in 7.0
399
400  if (Name.consume_front("fma4."))
401    return Name.starts_with("vfmadd.s"); // Added in 7.0
402
403  if (Name.consume_front("sse."))
404    return (Name == "add.ss" ||            // Added in 4.0
405            Name == "cvtsi2ss" ||          // Added in 7.0
406            Name == "cvtsi642ss" ||        // Added in 7.0
407            Name == "div.ss" ||            // Added in 4.0
408            Name == "mul.ss" ||            // Added in 4.0
409            Name.starts_with("sqrt.p") ||  // Added in 7.0
410            Name == "sqrt.ss" ||           // Added in 7.0
411            Name.starts_with("storeu.") || // Added in 3.9
412            Name == "sub.ss");             // Added in 4.0
413
414  if (Name.consume_front("sse2."))
415    return (Name == "add.sd" ||            // Added in 4.0
416            Name == "cvtdq2pd" ||          // Added in 3.9
417            Name == "cvtdq2ps" ||          // Added in 7.0
418            Name == "cvtps2pd" ||          // Added in 3.9
419            Name == "cvtsi2sd" ||          // Added in 7.0
420            Name == "cvtsi642sd" ||        // Added in 7.0
421            Name == "cvtss2sd" ||          // Added in 7.0
422            Name == "div.sd" ||            // Added in 4.0
423            Name == "mul.sd" ||            // Added in 4.0
424            Name.starts_with("padds.") ||  // Added in 8.0
425            Name.starts_with("paddus.") || // Added in 8.0
426            Name.starts_with("pcmpeq.") || // Added in 3.1
427            Name.starts_with("pcmpgt.") || // Added in 3.1
428            Name == "pmaxs.w" ||           // Added in 3.9
429            Name == "pmaxu.b" ||           // Added in 3.9
430            Name == "pmins.w" ||           // Added in 3.9
431            Name == "pminu.b" ||           // Added in 3.9
432            Name == "pmulu.dq" ||          // Added in 7.0
433            Name.starts_with("pshuf") ||   // Added in 3.9
434            Name.starts_with("psll.dq") || // Added in 3.7
435            Name.starts_with("psrl.dq") || // Added in 3.7
436            Name.starts_with("psubs.") ||  // Added in 8.0
437            Name.starts_with("psubus.") || // Added in 8.0
438            Name.starts_with("sqrt.p") ||  // Added in 7.0
439            Name == "sqrt.sd" ||           // Added in 7.0
440            Name == "storel.dq" ||         // Added in 3.9
441            Name.starts_with("storeu.") || // Added in 3.9
442            Name == "sub.sd");             // Added in 4.0
443
444  if (Name.consume_front("sse41."))
445    return (Name.starts_with("blendp") || // Added in 3.7
446            Name == "movntdqa" ||         // Added in 5.0
447            Name == "pblendw" ||          // Added in 3.7
448            Name == "pmaxsb" ||           // Added in 3.9
449            Name == "pmaxsd" ||           // Added in 3.9
450            Name == "pmaxud" ||           // Added in 3.9
451            Name == "pmaxuw" ||           // Added in 3.9
452            Name == "pminsb" ||           // Added in 3.9
453            Name == "pminsd" ||           // Added in 3.9
454            Name == "pminud" ||           // Added in 3.9
455            Name == "pminuw" ||           // Added in 3.9
456            Name.starts_with("pmovsx") || // Added in 3.8
457            Name.starts_with("pmovzx") || // Added in 3.9
458            Name == "pmuldq");            // Added in 7.0
459
460  if (Name.consume_front("sse42."))
461    return Name == "crc32.64.8"; // Added in 3.4
462
463  if (Name.consume_front("sse4a."))
464    return Name.starts_with("movnt."); // Added in 3.9
465
466  if (Name.consume_front("ssse3."))
467    return (Name == "pabs.b.128" || // Added in 6.0
468            Name == "pabs.d.128" || // Added in 6.0
469            Name == "pabs.w.128");  // Added in 6.0
470
471  if (Name.consume_front("xop."))
472    return (Name == "vpcmov" ||          // Added in 3.8
473            Name == "vpcmov.256" ||      // Added in 5.0
474            Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
475            Name.starts_with("vprot"));  // Added in 8.0
476
477  return (Name == "addcarry.u32" ||        // Added in 8.0
478          Name == "addcarry.u64" ||        // Added in 8.0
479          Name == "addcarryx.u32" ||       // Added in 8.0
480          Name == "addcarryx.u64" ||       // Added in 8.0
481          Name == "subborrow.u32" ||       // Added in 8.0
482          Name == "subborrow.u64" ||       // Added in 8.0
483          Name.starts_with("vcvtph2ps.")); // Added in 11.0
484}
485
486static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name,
487                                        Function *&NewFn) {
488  // Only handle intrinsics that start with "x86.".
489  if (!Name.consume_front("x86."))
490    return false;
491
492  if (shouldUpgradeX86Intrinsic(F, Name)) {
493    NewFn = nullptr;
494    return true;
495  }
496
497  if (Name == "rdtscp") { // Added in 8.0
498    // If this intrinsic has 0 operands, it's the new version.
499    if (F->getFunctionType()->getNumParams() == 0)
500      return false;
501
502    rename(F);
503    NewFn = Intrinsic::getDeclaration(F->getParent(),
504                                      Intrinsic::x86_rdtscp);
505    return true;
506  }
507
508  Intrinsic::ID ID;
509
510  // SSE4.1 ptest functions may have an old signature.
511  if (Name.consume_front("sse41.ptest")) { // Added in 3.2
512    ID = StringSwitch<Intrinsic::ID>(Name)
513             .Case("c", Intrinsic::x86_sse41_ptestc)
514             .Case("z", Intrinsic::x86_sse41_ptestz)
515             .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
516             .Default(Intrinsic::not_intrinsic);
517    if (ID != Intrinsic::not_intrinsic)
518      return upgradePTESTIntrinsic(F, ID, NewFn);
519
520    return false;
521  }
522
523  // Several blend and other instructions with masks used the wrong number of
524  // bits.
525
526  // Added in 3.6
527  ID = StringSwitch<Intrinsic::ID>(Name)
528           .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
529           .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
530           .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
531           .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
532           .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
533           .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
534           .Default(Intrinsic::not_intrinsic);
535  if (ID != Intrinsic::not_intrinsic)
536    return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
537
538  if (Name.consume_front("avx512.mask.cmp.")) {
539    // Added in 7.0
540    ID = StringSwitch<Intrinsic::ID>(Name)
541             .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
542             .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
543             .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
544             .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
545             .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
546             .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
547             .Default(Intrinsic::not_intrinsic);
548    if (ID != Intrinsic::not_intrinsic)
549      return upgradeX86MaskedFPCompare(F, ID, NewFn);
550    return false; // No other 'x86.avx523.mask.cmp.*'.
551  }
552
553  if (Name.consume_front("avx512bf16.")) {
554    // Added in 9.0
555    ID = StringSwitch<Intrinsic::ID>(Name)
556             .Case("cvtne2ps2bf16.128",
557                   Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
558             .Case("cvtne2ps2bf16.256",
559                   Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
560             .Case("cvtne2ps2bf16.512",
561                   Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
562             .Case("mask.cvtneps2bf16.128",
563                   Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
564             .Case("cvtneps2bf16.256",
565                   Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
566             .Case("cvtneps2bf16.512",
567                   Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
568             .Default(Intrinsic::not_intrinsic);
569    if (ID != Intrinsic::not_intrinsic)
570      return upgradeX86BF16Intrinsic(F, ID, NewFn);
571
572    // Added in 9.0
573    ID = StringSwitch<Intrinsic::ID>(Name)
574             .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
575             .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
576             .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
577             .Default(Intrinsic::not_intrinsic);
578    if (ID != Intrinsic::not_intrinsic)
579      return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
580    return false; // No other 'x86.avx512bf16.*'.
581  }
582
583  if (Name.consume_front("xop.")) {
584    Intrinsic::ID ID = Intrinsic::not_intrinsic;
585    if (Name.starts_with("vpermil2")) { // Added in 3.9
586      // Upgrade any XOP PERMIL2 index operand still using a float/double
587      // vector.
588      auto Idx = F->getFunctionType()->getParamType(2);
589      if (Idx->isFPOrFPVectorTy()) {
590        unsigned IdxSize = Idx->getPrimitiveSizeInBits();
591        unsigned EltSize = Idx->getScalarSizeInBits();
592        if (EltSize == 64 && IdxSize == 128)
593          ID = Intrinsic::x86_xop_vpermil2pd;
594        else if (EltSize == 32 && IdxSize == 128)
595          ID = Intrinsic::x86_xop_vpermil2ps;
596        else if (EltSize == 64 && IdxSize == 256)
597          ID = Intrinsic::x86_xop_vpermil2pd_256;
598        else
599          ID = Intrinsic::x86_xop_vpermil2ps_256;
600      }
601    } else if (F->arg_size() == 2)
602      // frcz.ss/sd may need to have an argument dropped. Added in 3.2
603      ID = StringSwitch<Intrinsic::ID>(Name)
604               .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
605               .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
606               .Default(Intrinsic::not_intrinsic);
607
608    if (ID != Intrinsic::not_intrinsic) {
609      rename(F);
610      NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
611      return true;
612    }
613    return false; // No other 'x86.xop.*'
614  }
615
616  if (Name == "seh.recoverfp") {
617    NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
618    return true;
619  }
620
621  return false;
622}
623
624// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
625// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
626static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
627                                                 StringRef Name,
628                                                 Function *&NewFn) {
629  if (Name.starts_with("rbit")) {
630    // '(arm|aarch64).rbit'.
631    NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
632                                      F->arg_begin()->getType());
633    return true;
634  }
635
636  if (Name == "thread.pointer") {
637    // '(arm|aarch64).thread.pointer'.
638    NewFn =
639        Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
640    return true;
641  }
642
643  bool Neon = Name.consume_front("neon.");
644  if (Neon) {
645    // '(arm|aarch64).neon.*'.
646    // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
647    // v16i8 respectively.
648    if (Name.consume_front("bfdot.")) {
649      // (arm|aarch64).neon.bfdot.*'.
650      Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
651                             .Cases("v2f32.v8i8", "v4f32.v16i8",
652                                    IsArm ? Intrinsic::arm_neon_bfdot
653                                          : Intrinsic::aarch64_neon_bfdot)
654                             .Default(Intrinsic::not_intrinsic);
655      if (ID != Intrinsic::not_intrinsic) {
656        size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
657        assert((OperandWidth == 64 || OperandWidth == 128) &&
658               "Unexpected operand width");
659        LLVMContext &Ctx = F->getParent()->getContext();
660        std::array<Type *, 2> Tys{
661            {F->getReturnType(),
662             FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
663        NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
664        return true;
665      }
666      return false; // No other '(arm|aarch64).neon.bfdot.*'.
667    }
668
669    // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
670    // anymore and accept v8bf16 instead of v16i8.
671    if (Name.consume_front("bfm")) {
672      // (arm|aarch64).neon.bfm*'.
673      if (Name.consume_back(".v4f32.v16i8")) {
674        // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
675        Intrinsic::ID ID =
676            StringSwitch<Intrinsic::ID>(Name)
677                .Case("mla", IsArm ? Intrinsic::arm_neon_bfmmla
678                                   : Intrinsic::aarch64_neon_bfmmla)
679                .Case("lalb", IsArm ? Intrinsic::arm_neon_bfmlalb
680                                    : Intrinsic::aarch64_neon_bfmlalb)
681                .Case("lalt", IsArm ? Intrinsic::arm_neon_bfmlalt
682                                    : Intrinsic::aarch64_neon_bfmlalt)
683                .Default(Intrinsic::not_intrinsic);
684        if (ID != Intrinsic::not_intrinsic) {
685          NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
686          return true;
687        }
688        return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
689      }
690      return false; // No other '(arm|aarch64).neon.bfm*.
691    }
692    // Continue on to Aarch64 Neon or Arm Neon.
693  }
694  // Continue on to Arm or Aarch64.
695
696  if (IsArm) {
697    // 'arm.*'.
698    if (Neon) {
699      // 'arm.neon.*'.
700      Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
701                             .StartsWith("vclz.", Intrinsic::ctlz)
702                             .StartsWith("vcnt.", Intrinsic::ctpop)
703                             .StartsWith("vqadds.", Intrinsic::sadd_sat)
704                             .StartsWith("vqaddu.", Intrinsic::uadd_sat)
705                             .StartsWith("vqsubs.", Intrinsic::ssub_sat)
706                             .StartsWith("vqsubu.", Intrinsic::usub_sat)
707                             .Default(Intrinsic::not_intrinsic);
708      if (ID != Intrinsic::not_intrinsic) {
709        NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
710                                          F->arg_begin()->getType());
711        return true;
712      }
713
714      if (Name.consume_front("vst")) {
715        // 'arm.neon.vst*'.
716        static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
717        SmallVector<StringRef, 2> Groups;
718        if (vstRegex.match(Name, &Groups)) {
719          static const Intrinsic::ID StoreInts[] = {
720              Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
721              Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
722
723          static const Intrinsic::ID StoreLaneInts[] = {
724              Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
725              Intrinsic::arm_neon_vst4lane};
726
727          auto fArgs = F->getFunctionType()->params();
728          Type *Tys[] = {fArgs[0], fArgs[1]};
729          if (Groups[1].size() == 1)
730            NewFn = Intrinsic::getDeclaration(F->getParent(),
731                                              StoreInts[fArgs.size() - 3], Tys);
732          else
733            NewFn = Intrinsic::getDeclaration(
734                F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
735          return true;
736        }
737        return false; // No other 'arm.neon.vst*'.
738      }
739
740      return false; // No other 'arm.neon.*'.
741    }
742
743    if (Name.consume_front("mve.")) {
744      // 'arm.mve.*'.
745      if (Name == "vctp64") {
746        if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
747          // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
748          // the function and deal with it below in UpgradeIntrinsicCall.
749          rename(F);
750          return true;
751        }
752        return false; // Not 'arm.mve.vctp64'.
753      }
754
755      // These too are changed to accept a v2i1 instead of the old v4i1.
756      if (Name.consume_back(".v4i1")) {
757        // 'arm.mve.*.v4i1'.
758        if (Name.consume_back(".predicated.v2i64.v4i32"))
759          // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
760          return Name == "mull.int" || Name == "vqdmull";
761
762        if (Name.consume_back(".v2i64")) {
763          // 'arm.mve.*.v2i64.v4i1'
764          bool IsGather = Name.consume_front("vldr.gather.");
765          if (IsGather || Name.consume_front("vstr.scatter.")) {
766            if (Name.consume_front("base.")) {
767              // Optional 'wb.' prefix.
768              Name.consume_front("wb.");
769              // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
770              // predicated.v2i64.v2i64.v4i1'.
771              return Name == "predicated.v2i64";
772            }
773
774            if (Name.consume_front("offset.predicated."))
775              return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
776                     Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
777
778            // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
779            return false;
780          }
781
782          return false; // No other 'arm.mve.*.v2i64.v4i1'.
783        }
784        return false; // No other 'arm.mve.*.v4i1'.
785      }
786      return false; // No other 'arm.mve.*'.
787    }
788
789    if (Name.consume_front("cde.vcx")) {
790      // 'arm.cde.vcx*'.
791      if (Name.consume_back(".predicated.v2i64.v4i1"))
792        // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
793        return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
794               Name == "3q" || Name == "3qa";
795
796      return false; // No other 'arm.cde.vcx*'.
797    }
798  } else {
799    // 'aarch64.*'.
800    if (Neon) {
801      // 'aarch64.neon.*'.
802      Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
803                             .StartsWith("frintn", Intrinsic::roundeven)
804                             .StartsWith("rbit", Intrinsic::bitreverse)
805                             .Default(Intrinsic::not_intrinsic);
806      if (ID != Intrinsic::not_intrinsic) {
807        NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
808                                          F->arg_begin()->getType());
809        return true;
810      }
811
812      if (Name.starts_with("addp")) {
813        // 'aarch64.neon.addp*'.
814        if (F->arg_size() != 2)
815          return false; // Invalid IR.
816        VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
817        if (Ty && Ty->getElementType()->isFloatingPointTy()) {
818          NewFn = Intrinsic::getDeclaration(F->getParent(),
819                                            Intrinsic::aarch64_neon_faddp, Ty);
820          return true;
821        }
822      }
823      return false; // No other 'aarch64.neon.*'.
824    }
825    if (Name.consume_front("sve.")) {
826      // 'aarch64.sve.*'.
827      if (Name.consume_front("bf")) {
828        if (Name.consume_back(".lane")) {
829          // 'aarch64.sve.bf*.lane'.
830          Intrinsic::ID ID =
831              StringSwitch<Intrinsic::ID>(Name)
832                  .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
833                  .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
834                  .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
835                  .Default(Intrinsic::not_intrinsic);
836          if (ID != Intrinsic::not_intrinsic) {
837            NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
838            return true;
839          }
840          return false; // No other 'aarch64.sve.bf*.lane'.
841        }
842        return false; // No other 'aarch64.sve.bf*'.
843      }
844
845      if (Name.consume_front("ld")) {
846        // 'aarch64.sve.ld*'.
847        static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
848        if (LdRegex.match(Name)) {
849          Type *ScalarTy =
850              dyn_cast<VectorType>(F->getReturnType())->getElementType();
851          ElementCount EC = dyn_cast<VectorType>(F->arg_begin()->getType())
852                                ->getElementCount();
853          Type *Ty = VectorType::get(ScalarTy, EC);
854          static const Intrinsic::ID LoadIDs[] = {
855              Intrinsic::aarch64_sve_ld2_sret,
856              Intrinsic::aarch64_sve_ld3_sret,
857              Intrinsic::aarch64_sve_ld4_sret,
858          };
859          NewFn = Intrinsic::getDeclaration(F->getParent(),
860                                            LoadIDs[Name[0] - '2'], Ty);
861          return true;
862        }
863        return false; // No other 'aarch64.sve.ld*'.
864      }
865
866      if (Name.consume_front("tuple.")) {
867        // 'aarch64.sve.tuple.*'.
868        if (Name.starts_with("get")) {
869          // 'aarch64.sve.tuple.get*'.
870          Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
871          NewFn = Intrinsic::getDeclaration(F->getParent(),
872                                            Intrinsic::vector_extract, Tys);
873          return true;
874        }
875
876        if (Name.starts_with("set")) {
877          // 'aarch64.sve.tuple.set*'.
878          auto Args = F->getFunctionType()->params();
879          Type *Tys[] = {Args[0], Args[2], Args[1]};
880          NewFn = Intrinsic::getDeclaration(F->getParent(),
881                                            Intrinsic::vector_insert, Tys);
882          return true;
883        }
884
885        static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
886        if (CreateTupleRegex.match(Name)) {
887          // 'aarch64.sve.tuple.create*'.
888          auto Args = F->getFunctionType()->params();
889          Type *Tys[] = {F->getReturnType(), Args[1]};
890          NewFn = Intrinsic::getDeclaration(F->getParent(),
891                                            Intrinsic::vector_insert, Tys);
892          return true;
893        }
894        return false; // No other 'aarch64.sve.tuple.*'.
895      }
896      return false; // No other 'aarch64.sve.*'.
897    }
898  }
899  return false; // No other 'arm.*', 'aarch64.*'.
900}
901
902static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
903  if (Name.consume_front("abs."))
904    return StringSwitch<Intrinsic::ID>(Name)
905        .Case("bf16", Intrinsic::nvvm_abs_bf16)
906        .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
907        .Default(Intrinsic::not_intrinsic);
908
909  if (Name.consume_front("fma.rn."))
910    return StringSwitch<Intrinsic::ID>(Name)
911        .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
912        .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
913        .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
914        .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
915        .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
916        .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
917        .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
918        .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
919        .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
920        .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
921        .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
922        .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
923        .Default(Intrinsic::not_intrinsic);
924
925  if (Name.consume_front("fmax."))
926    return StringSwitch<Intrinsic::ID>(Name)
927        .Case("bf16", Intrinsic::nvvm_fmax_bf16)
928        .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
929        .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
930        .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
931        .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
932        .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
933        .Case("ftz.nan.xorsign.abs.bf16",
934              Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
935        .Case("ftz.nan.xorsign.abs.bf16x2",
936              Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
937        .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
938        .Case("ftz.xorsign.abs.bf16x2",
939              Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
940        .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
941        .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
942        .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
943        .Case("nan.xorsign.abs.bf16x2",
944              Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
945        .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
946        .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
947        .Default(Intrinsic::not_intrinsic);
948
949  if (Name.consume_front("fmin."))
950    return StringSwitch<Intrinsic::ID>(Name)
951        .Case("bf16", Intrinsic::nvvm_fmin_bf16)
952        .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
953        .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
954        .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
955        .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
956        .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
957        .Case("ftz.nan.xorsign.abs.bf16",
958              Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
959        .Case("ftz.nan.xorsign.abs.bf16x2",
960              Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
961        .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
962        .Case("ftz.xorsign.abs.bf16x2",
963              Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
964        .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
965        .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
966        .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
967        .Case("nan.xorsign.abs.bf16x2",
968              Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
969        .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
970        .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
971        .Default(Intrinsic::not_intrinsic);
972
973  if (Name.consume_front("neg."))
974    return StringSwitch<Intrinsic::ID>(Name)
975        .Case("bf16", Intrinsic::nvvm_neg_bf16)
976        .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
977        .Default(Intrinsic::not_intrinsic);
978
979  return Intrinsic::not_intrinsic;
980}
981
982static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
983  assert(F && "Illegal to upgrade a non-existent Function.");
984
985  StringRef Name = F->getName();
986
987  // Quickly eliminate it, if it's not a candidate.
988  if (!Name.consume_front("llvm.") || Name.empty())
989    return false;
990
991  switch (Name[0]) {
992  default: break;
993  case 'a': {
994    bool IsArm = Name.consume_front("arm.");
995    if (IsArm || Name.consume_front("aarch64.")) {
996      if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
997        return true;
998      break;
999    }
1000
1001    if (Name.consume_front("amdgcn.")) {
1002      if (Name == "alignbit") {
1003        // Target specific intrinsic became redundant
1004        NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
1005                                          {F->getReturnType()});
1006        return true;
1007      }
1008
1009      if (Name.consume_front("atomic.")) {
1010        if (Name.starts_with("inc") || Name.starts_with("dec")) {
1011          // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1012          // there's no new declaration.
1013          NewFn = nullptr;
1014          return true;
1015        }
1016        break; // No other 'amdgcn.atomic.*'
1017      }
1018
1019      if (Name.starts_with("ldexp.")) {
1020        // Target specific intrinsic became redundant
1021        NewFn = Intrinsic::getDeclaration(
1022          F->getParent(), Intrinsic::ldexp,
1023          {F->getReturnType(), F->getArg(1)->getType()});
1024        return true;
1025      }
1026      break; // No other 'amdgcn.*'
1027    }
1028
1029    break;
1030  }
1031  case 'c': {
1032    if (F->arg_size() == 1) {
1033      Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1034                             .StartsWith("ctlz.", Intrinsic::ctlz)
1035                             .StartsWith("cttz.", Intrinsic::cttz)
1036                             .Default(Intrinsic::not_intrinsic);
1037      if (ID != Intrinsic::not_intrinsic) {
1038        rename(F);
1039        NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
1040                                          F->arg_begin()->getType());
1041        return true;
1042      }
1043    }
1044
1045    if (F->arg_size() == 2 && Name.equals("coro.end")) {
1046      rename(F);
1047      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::coro_end);
1048      return true;
1049    }
1050
1051    break;
1052  }
1053  case 'd':
1054    if (Name.consume_front("dbg.")) {
1055      if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1056        rename(F);
1057        NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
1058        return true;
1059      }
1060      break; // No other 'dbg.*'.
1061    }
1062    break;
1063  case 'e':
1064    if (Name.consume_front("experimental.vector.")) {
1065      Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1066                             .StartsWith("extract.", Intrinsic::vector_extract)
1067                             .StartsWith("insert.", Intrinsic::vector_insert)
1068                             .Default(Intrinsic::not_intrinsic);
1069      if (ID != Intrinsic::not_intrinsic) {
1070        const auto *FT = F->getFunctionType();
1071        SmallVector<Type *, 2> Tys;
1072        if (ID == Intrinsic::vector_extract)
1073          // Extracting overloads the return type.
1074          Tys.push_back(FT->getReturnType());
1075        Tys.push_back(FT->getParamType(0));
1076        if (ID == Intrinsic::vector_insert)
1077          // Inserting overloads the inserted type.
1078          Tys.push_back(FT->getParamType(1));
1079        rename(F);
1080        NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1081        return true;
1082      }
1083
1084      if (Name.consume_front("reduce.")) {
1085        SmallVector<StringRef, 2> Groups;
1086        static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1087        if (R.match(Name, &Groups))
1088          ID = StringSwitch<Intrinsic::ID>(Groups[1])
1089                   .Case("add", Intrinsic::vector_reduce_add)
1090                   .Case("mul", Intrinsic::vector_reduce_mul)
1091                   .Case("and", Intrinsic::vector_reduce_and)
1092                   .Case("or", Intrinsic::vector_reduce_or)
1093                   .Case("xor", Intrinsic::vector_reduce_xor)
1094                   .Case("smax", Intrinsic::vector_reduce_smax)
1095                   .Case("smin", Intrinsic::vector_reduce_smin)
1096                   .Case("umax", Intrinsic::vector_reduce_umax)
1097                   .Case("umin", Intrinsic::vector_reduce_umin)
1098                   .Case("fmax", Intrinsic::vector_reduce_fmax)
1099                   .Case("fmin", Intrinsic::vector_reduce_fmin)
1100                   .Default(Intrinsic::not_intrinsic);
1101
1102        bool V2 = false;
1103        if (ID == Intrinsic::not_intrinsic) {
1104          static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1105          Groups.clear();
1106          V2 = true;
1107          if (R2.match(Name, &Groups))
1108            ID = StringSwitch<Intrinsic::ID>(Groups[1])
1109                     .Case("fadd", Intrinsic::vector_reduce_fadd)
1110                     .Case("fmul", Intrinsic::vector_reduce_fmul)
1111                     .Default(Intrinsic::not_intrinsic);
1112        }
1113        if (ID != Intrinsic::not_intrinsic) {
1114          rename(F);
1115          auto Args = F->getFunctionType()->params();
1116          NewFn =
1117              Intrinsic::getDeclaration(F->getParent(), ID, {Args[V2 ? 1 : 0]});
1118          return true;
1119        }
1120        break; // No other 'expermental.vector.reduce.*'.
1121      }
1122      break; // No other 'experimental.vector.*'.
1123    }
1124    break; // No other 'e*'.
1125  case 'f':
1126    if (Name.starts_with("flt.rounds")) {
1127      rename(F);
1128      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding);
1129      return true;
1130    }
1131    break;
1132  case 'i':
1133    if (Name.starts_with("invariant.group.barrier")) {
1134      // Rename invariant.group.barrier to launder.invariant.group
1135      auto Args = F->getFunctionType()->params();
1136      Type* ObjectPtr[1] = {Args[0]};
1137      rename(F);
1138      NewFn = Intrinsic::getDeclaration(F->getParent(),
1139          Intrinsic::launder_invariant_group, ObjectPtr);
1140      return true;
1141    }
1142    break;
1143  case 'm': {
1144    // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1145    // alignment parameter to embedding the alignment as an attribute of
1146    // the pointer args.
1147    if (unsigned ID = StringSwitch<unsigned>(Name)
1148                          .StartsWith("memcpy.", Intrinsic::memcpy)
1149                          .StartsWith("memmove.", Intrinsic::memmove)
1150                          .Default(0)) {
1151      if (F->arg_size() == 5) {
1152        rename(F);
1153        // Get the types of dest, src, and len
1154        ArrayRef<Type *> ParamTypes =
1155            F->getFunctionType()->params().slice(0, 3);
1156        NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ParamTypes);
1157        return true;
1158      }
1159    }
1160    if (Name.starts_with("memset.") && F->arg_size() == 5) {
1161      rename(F);
1162      // Get the types of dest, and len
1163      const auto *FT = F->getFunctionType();
1164      Type *ParamTypes[2] = {
1165          FT->getParamType(0), // Dest
1166          FT->getParamType(2)  // len
1167      };
1168      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
1169                                        ParamTypes);
1170      return true;
1171    }
1172    break;
1173  }
1174  case 'n': {
1175    if (Name.consume_front("nvvm.")) {
1176      // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1177      if (F->arg_size() == 1) {
1178        Intrinsic::ID IID =
1179            StringSwitch<Intrinsic::ID>(Name)
1180                .Cases("brev32", "brev64", Intrinsic::bitreverse)
1181                .Case("clz.i", Intrinsic::ctlz)
1182                .Case("popc.i", Intrinsic::ctpop)
1183                .Default(Intrinsic::not_intrinsic);
1184        if (IID != Intrinsic::not_intrinsic) {
1185          NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
1186                                            {F->getReturnType()});
1187          return true;
1188        }
1189      }
1190
1191      // Check for nvvm intrinsics that need a return type adjustment.
1192      if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1193        Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
1194        if (IID != Intrinsic::not_intrinsic) {
1195          NewFn = nullptr;
1196          return true;
1197        }
1198      }
1199
1200      // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1201      // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
1202      //
1203      // TODO: We could add lohi.i2d.
1204      bool Expand = false;
1205      if (Name.consume_front("abs."))
1206        // nvvm.abs.{i,ii}
1207        Expand = Name == "i" || Name == "ll";
1208      else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1209        Expand = true;
1210      else if (Name.consume_front("max.") || Name.consume_front("min."))
1211        // nvvm.{min,max}.{i,ii,ui,ull}
1212        Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1213                 Name == "ui" || Name == "ull";
1214      else if (Name.consume_front("atomic.load.add."))
1215        // nvvm.atomic.load.add.{f32.p,f64.p}
1216        Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1217      else
1218        Expand = false;
1219
1220      if (Expand) {
1221        NewFn = nullptr;
1222        return true;
1223      }
1224      break; // No other 'nvvm.*'.
1225    }
1226    break;
1227  }
1228  case 'o':
1229    // We only need to change the name to match the mangling including the
1230    // address space.
1231    if (Name.starts_with("objectsize.")) {
1232      Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1233      if (F->arg_size() == 2 || F->arg_size() == 3 ||
1234          F->getName() !=
1235              Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1236        rename(F);
1237        NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
1238                                          Tys);
1239        return true;
1240      }
1241    }
1242    break;
1243
1244  case 'p':
1245    if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1246      rename(F);
1247      NewFn = Intrinsic::getDeclaration(
1248          F->getParent(), Intrinsic::ptr_annotation,
1249          {F->arg_begin()->getType(), F->getArg(1)->getType()});
1250      return true;
1251    }
1252    break;
1253
1254  case 'r': {
1255    if (Name.consume_front("riscv.")) {
1256      Intrinsic::ID ID;
1257      ID = StringSwitch<Intrinsic::ID>(Name)
1258               .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1259               .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1260               .Case("aes32esi", Intrinsic::riscv_aes32esi)
1261               .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1262               .Default(Intrinsic::not_intrinsic);
1263      if (ID != Intrinsic::not_intrinsic) {
1264        if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1265          rename(F);
1266          NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1267          return true;
1268        }
1269        break; // No other applicable upgrades.
1270      }
1271
1272      ID = StringSwitch<Intrinsic::ID>(Name)
1273               .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1274               .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1275               .Default(Intrinsic::not_intrinsic);
1276      if (ID != Intrinsic::not_intrinsic) {
1277        if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1278            F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1279          rename(F);
1280          NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1281          return true;
1282        }
1283        break; // No other applicable upgrades.
1284      }
1285
1286      ID = StringSwitch<Intrinsic::ID>(Name)
1287               .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1288               .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1289               .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1290               .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1291               .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1292               .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1293               .Default(Intrinsic::not_intrinsic);
1294      if (ID != Intrinsic::not_intrinsic) {
1295        if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1296          rename(F);
1297          NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1298          return true;
1299        }
1300        break; // No other applicable upgrades.
1301      }
1302      break; // No other 'riscv.*' intrinsics
1303    }
1304  } break;
1305
1306  case 's':
1307    if (Name == "stackprotectorcheck") {
1308      NewFn = nullptr;
1309      return true;
1310    }
1311    break;
1312
1313  case 'v': {
1314    if (Name == "var.annotation" && F->arg_size() == 4) {
1315      rename(F);
1316      NewFn = Intrinsic::getDeclaration(
1317          F->getParent(), Intrinsic::var_annotation,
1318          {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1319      return true;
1320    }
1321    break;
1322  }
1323
1324  case 'w':
1325    if (Name.consume_front("wasm.")) {
1326      Intrinsic::ID ID =
1327          StringSwitch<Intrinsic::ID>(Name)
1328              .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1329              .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1330              .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1331              .Default(Intrinsic::not_intrinsic);
1332      if (ID != Intrinsic::not_intrinsic) {
1333        rename(F);
1334        NewFn =
1335            Intrinsic::getDeclaration(F->getParent(), ID, F->getReturnType());
1336        return true;
1337      }
1338
1339      if (Name.consume_front("dot.i8x16.i7x16.")) {
1340        ID = StringSwitch<Intrinsic::ID>(Name)
1341                 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1342                 .Case("add.signed",
1343                       Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1344                 .Default(Intrinsic::not_intrinsic);
1345        if (ID != Intrinsic::not_intrinsic) {
1346          rename(F);
1347          NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1348          return true;
1349        }
1350        break; // No other 'wasm.dot.i8x16.i7x16.*'.
1351      }
1352      break; // No other 'wasm.*'.
1353    }
1354    break;
1355
1356  case 'x':
1357    if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1358      return true;
1359  }
1360
1361  auto *ST = dyn_cast<StructType>(F->getReturnType());
1362  if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1363      F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1364    // Replace return type with literal non-packed struct. Only do this for
1365    // intrinsics declared to return a struct, not for intrinsics with
1366    // overloaded return type, in which case the exact struct type will be
1367    // mangled into the name.
1368    SmallVector<Intrinsic::IITDescriptor> Desc;
1369    Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
1370    if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1371      auto *FT = F->getFunctionType();
1372      auto *NewST = StructType::get(ST->getContext(), ST->elements());
1373      auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1374      std::string Name = F->getName().str();
1375      rename(F);
1376      NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1377                               Name, F->getParent());
1378
1379      // The new function may also need remangling.
1380      if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1381        NewFn = *Result;
1382      return true;
1383    }
1384  }
1385
1386  // Remangle our intrinsic since we upgrade the mangling
1387  auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1388  if (Result != std::nullopt) {
1389    NewFn = *Result;
1390    return true;
1391  }
1392
1393  //  This may not belong here. This function is effectively being overloaded
1394  //  to both detect an intrinsic which needs upgrading, and to provide the
1395  //  upgraded form of the intrinsic. We should perhaps have two separate
1396  //  functions for this.
1397  return false;
1398}
1399
1400bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
1401  NewFn = nullptr;
1402  bool Upgraded = upgradeIntrinsicFunction1(F, NewFn);
1403  assert(F != NewFn && "Intrinsic function upgraded to the same function");
1404
1405  // Upgrade intrinsic attributes.  This does not change the function.
1406  if (NewFn)
1407    F = NewFn;
1408  if (Intrinsic::ID id = F->getIntrinsicID())
1409    F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1410  return Upgraded;
1411}
1412
1413GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1414  if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1415                          GV->getName() == "llvm.global_dtors")) ||
1416      !GV->hasInitializer())
1417    return nullptr;
1418  ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1419  if (!ATy)
1420    return nullptr;
1421  StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1422  if (!STy || STy->getNumElements() != 2)
1423    return nullptr;
1424
1425  LLVMContext &C = GV->getContext();
1426  IRBuilder<> IRB(C);
1427  auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1428                               IRB.getPtrTy());
1429  Constant *Init = GV->getInitializer();
1430  unsigned N = Init->getNumOperands();
1431  std::vector<Constant *> NewCtors(N);
1432  for (unsigned i = 0; i != N; ++i) {
1433    auto Ctor = cast<Constant>(Init->getOperand(i));
1434    NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1435                                      Ctor->getAggregateElement(1),
1436                                      Constant::getNullValue(IRB.getPtrTy()));
1437  }
1438  Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1439
1440  return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1441                            NewInit, GV->getName());
1442}
1443
1444// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1445// to byte shuffles.
1446static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1447                                         unsigned Shift) {
1448  auto *ResultTy = cast<FixedVectorType>(Op->getType());
1449  unsigned NumElts = ResultTy->getNumElements() * 8;
1450
1451  // Bitcast from a 64-bit element type to a byte element type.
1452  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1453  Op = Builder.CreateBitCast(Op, VecTy, "cast");
1454
1455  // We'll be shuffling in zeroes.
1456  Value *Res = Constant::getNullValue(VecTy);
1457
1458  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1459  // we'll just return the zero vector.
1460  if (Shift < 16) {
1461    int Idxs[64];
1462    // 256/512-bit version is split into 2/4 16-byte lanes.
1463    for (unsigned l = 0; l != NumElts; l += 16)
1464      for (unsigned i = 0; i != 16; ++i) {
1465        unsigned Idx = NumElts + i - Shift;
1466        if (Idx < NumElts)
1467          Idx -= NumElts - 16; // end of lane, switch operand.
1468        Idxs[l + i] = Idx + l;
1469      }
1470
1471    Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1472  }
1473
1474  // Bitcast back to a 64-bit element type.
1475  return Builder.CreateBitCast(Res, ResultTy, "cast");
1476}
1477
1478// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1479// to byte shuffles.
1480static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1481                                         unsigned Shift) {
1482  auto *ResultTy = cast<FixedVectorType>(Op->getType());
1483  unsigned NumElts = ResultTy->getNumElements() * 8;
1484
1485  // Bitcast from a 64-bit element type to a byte element type.
1486  Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1487  Op = Builder.CreateBitCast(Op, VecTy, "cast");
1488
1489  // We'll be shuffling in zeroes.
1490  Value *Res = Constant::getNullValue(VecTy);
1491
1492  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1493  // we'll just return the zero vector.
1494  if (Shift < 16) {
1495    int Idxs[64];
1496    // 256/512-bit version is split into 2/4 16-byte lanes.
1497    for (unsigned l = 0; l != NumElts; l += 16)
1498      for (unsigned i = 0; i != 16; ++i) {
1499        unsigned Idx = i + Shift;
1500        if (Idx >= 16)
1501          Idx += NumElts - 16; // end of lane, switch operand.
1502        Idxs[l + i] = Idx + l;
1503      }
1504
1505    Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1506  }
1507
1508  // Bitcast back to a 64-bit element type.
1509  return Builder.CreateBitCast(Res, ResultTy, "cast");
1510}
1511
1512static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1513                            unsigned NumElts) {
1514  assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1515  llvm::VectorType *MaskTy = FixedVectorType::get(
1516      Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1517  Mask = Builder.CreateBitCast(Mask, MaskTy);
1518
1519  // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1520  // i8 and we need to extract down to the right number of elements.
1521  if (NumElts <= 4) {
1522    int Indices[4];
1523    for (unsigned i = 0; i != NumElts; ++i)
1524      Indices[i] = i;
1525    Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1526                                       "extract");
1527  }
1528
1529  return Mask;
1530}
1531
1532static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1533                            Value *Op1) {
1534  // If the mask is all ones just emit the first operation.
1535  if (const auto *C = dyn_cast<Constant>(Mask))
1536    if (C->isAllOnesValue())
1537      return Op0;
1538
1539  Mask = getX86MaskVec(Builder, Mask,
1540                       cast<FixedVectorType>(Op0->getType())->getNumElements());
1541  return Builder.CreateSelect(Mask, Op0, Op1);
1542}
1543
1544static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1545                                  Value *Op1) {
1546  // If the mask is all ones just emit the first operation.
1547  if (const auto *C = dyn_cast<Constant>(Mask))
1548    if (C->isAllOnesValue())
1549      return Op0;
1550
1551  auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1552                                      Mask->getType()->getIntegerBitWidth());
1553  Mask = Builder.CreateBitCast(Mask, MaskTy);
1554  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1555  return Builder.CreateSelect(Mask, Op0, Op1);
1556}
1557
1558// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1559// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1560// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1561static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1562                                        Value *Op1, Value *Shift,
1563                                        Value *Passthru, Value *Mask,
1564                                        bool IsVALIGN) {
1565  unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1566
1567  unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1568  assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1569  assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1570  assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1571
1572  // Mask the immediate for VALIGN.
1573  if (IsVALIGN)
1574    ShiftVal &= (NumElts - 1);
1575
1576  // If palignr is shifting the pair of vectors more than the size of two
1577  // lanes, emit zero.
1578  if (ShiftVal >= 32)
1579    return llvm::Constant::getNullValue(Op0->getType());
1580
1581  // If palignr is shifting the pair of input vectors more than one lane,
1582  // but less than two lanes, convert to shifting in zeroes.
1583  if (ShiftVal > 16) {
1584    ShiftVal -= 16;
1585    Op1 = Op0;
1586    Op0 = llvm::Constant::getNullValue(Op0->getType());
1587  }
1588
1589  int Indices[64];
1590  // 256-bit palignr operates on 128-bit lanes so we need to handle that
1591  for (unsigned l = 0; l < NumElts; l += 16) {
1592    for (unsigned i = 0; i != 16; ++i) {
1593      unsigned Idx = ShiftVal + i;
1594      if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1595        Idx += NumElts - 16; // End of lane, switch operand.
1596      Indices[l + i] = Idx + l;
1597    }
1598  }
1599
1600  Value *Align = Builder.CreateShuffleVector(
1601      Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1602
1603  return emitX86Select(Builder, Mask, Align, Passthru);
1604}
1605
1606static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
1607                                          bool ZeroMask, bool IndexForm) {
1608  Type *Ty = CI.getType();
1609  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1610  unsigned EltWidth = Ty->getScalarSizeInBits();
1611  bool IsFloat = Ty->isFPOrFPVectorTy();
1612  Intrinsic::ID IID;
1613  if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1614    IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1615  else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1616    IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1617  else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1618    IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1619  else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1620    IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1621  else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1622    IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1623  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1624    IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1625  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1626    IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1627  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1628    IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1629  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1630    IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1631  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1632    IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1633  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1634    IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1635  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1636    IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1637  else if (VecWidth == 128 && EltWidth == 16)
1638    IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1639  else if (VecWidth == 256 && EltWidth == 16)
1640    IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1641  else if (VecWidth == 512 && EltWidth == 16)
1642    IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1643  else if (VecWidth == 128 && EltWidth == 8)
1644    IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1645  else if (VecWidth == 256 && EltWidth == 8)
1646    IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1647  else if (VecWidth == 512 && EltWidth == 8)
1648    IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1649  else
1650    llvm_unreachable("Unexpected intrinsic");
1651
1652  Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1653                    CI.getArgOperand(2) };
1654
1655  // If this isn't index form we need to swap operand 0 and 1.
1656  if (!IndexForm)
1657    std::swap(Args[0], Args[1]);
1658
1659  Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1660                                Args);
1661  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1662                             : Builder.CreateBitCast(CI.getArgOperand(1),
1663                                                     Ty);
1664  return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1665}
1666
1667static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
1668                                         Intrinsic::ID IID) {
1669  Type *Ty = CI.getType();
1670  Value *Op0 = CI.getOperand(0);
1671  Value *Op1 = CI.getOperand(1);
1672  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1673  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1674
1675  if (CI.arg_size() == 4) { // For masked intrinsics.
1676    Value *VecSrc = CI.getOperand(2);
1677    Value *Mask = CI.getOperand(3);
1678    Res = emitX86Select(Builder, Mask, Res, VecSrc);
1679  }
1680  return Res;
1681}
1682
1683static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
1684                               bool IsRotateRight) {
1685  Type *Ty = CI.getType();
1686  Value *Src = CI.getArgOperand(0);
1687  Value *Amt = CI.getArgOperand(1);
1688
1689  // Amount may be scalar immediate, in which case create a splat vector.
1690  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1691  // we only care about the lowest log2 bits anyway.
1692  if (Amt->getType() != Ty) {
1693    unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1694    Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1695    Amt = Builder.CreateVectorSplat(NumElts, Amt);
1696  }
1697
1698  Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1699  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1700  Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1701
1702  if (CI.arg_size() == 4) { // For masked intrinsics.
1703    Value *VecSrc = CI.getOperand(2);
1704    Value *Mask = CI.getOperand(3);
1705    Res = emitX86Select(Builder, Mask, Res, VecSrc);
1706  }
1707  return Res;
1708}
1709
1710static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1711                              bool IsSigned) {
1712  Type *Ty = CI.getType();
1713  Value *LHS = CI.getArgOperand(0);
1714  Value *RHS = CI.getArgOperand(1);
1715
1716  CmpInst::Predicate Pred;
1717  switch (Imm) {
1718  case 0x0:
1719    Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1720    break;
1721  case 0x1:
1722    Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1723    break;
1724  case 0x2:
1725    Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1726    break;
1727  case 0x3:
1728    Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1729    break;
1730  case 0x4:
1731    Pred = ICmpInst::ICMP_EQ;
1732    break;
1733  case 0x5:
1734    Pred = ICmpInst::ICMP_NE;
1735    break;
1736  case 0x6:
1737    return Constant::getNullValue(Ty); // FALSE
1738  case 0x7:
1739    return Constant::getAllOnesValue(Ty); // TRUE
1740  default:
1741    llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1742  }
1743
1744  Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1745  Value *Ext = Builder.CreateSExt(Cmp, Ty);
1746  return Ext;
1747}
1748
1749static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
1750                                    bool IsShiftRight, bool ZeroMask) {
1751  Type *Ty = CI.getType();
1752  Value *Op0 = CI.getArgOperand(0);
1753  Value *Op1 = CI.getArgOperand(1);
1754  Value *Amt = CI.getArgOperand(2);
1755
1756  if (IsShiftRight)
1757    std::swap(Op0, Op1);
1758
1759  // Amount may be scalar immediate, in which case create a splat vector.
1760  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1761  // we only care about the lowest log2 bits anyway.
1762  if (Amt->getType() != Ty) {
1763    unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1764    Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1765    Amt = Builder.CreateVectorSplat(NumElts, Amt);
1766  }
1767
1768  Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1769  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1770  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1771
1772  unsigned NumArgs = CI.arg_size();
1773  if (NumArgs >= 4) { // For masked intrinsics.
1774    Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1775                    ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1776                                   CI.getArgOperand(0);
1777    Value *Mask = CI.getOperand(NumArgs - 1);
1778    Res = emitX86Select(Builder, Mask, Res, VecSrc);
1779  }
1780  return Res;
1781}
1782
1783static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data,
1784                                 Value *Mask, bool Aligned) {
1785  // Cast the pointer to the right type.
1786  Ptr = Builder.CreateBitCast(Ptr,
1787                              llvm::PointerType::getUnqual(Data->getType()));
1788  const Align Alignment =
1789      Aligned
1790          ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1791          : Align(1);
1792
1793  // If the mask is all ones just emit a regular store.
1794  if (const auto *C = dyn_cast<Constant>(Mask))
1795    if (C->isAllOnesValue())
1796      return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1797
1798  // Convert the mask from an integer type to a vector of i1.
1799  unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1800  Mask = getX86MaskVec(Builder, Mask, NumElts);
1801  return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1802}
1803
1804static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr,
1805                                Value *Passthru, Value *Mask, bool Aligned) {
1806  Type *ValTy = Passthru->getType();
1807  // Cast the pointer to the right type.
1808  Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1809  const Align Alignment =
1810      Aligned
1811          ? Align(
1812                Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
1813                8)
1814          : Align(1);
1815
1816  // If the mask is all ones just emit a regular store.
1817  if (const auto *C = dyn_cast<Constant>(Mask))
1818    if (C->isAllOnesValue())
1819      return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1820
1821  // Convert the mask from an integer type to a vector of i1.
1822  unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1823  Mask = getX86MaskVec(Builder, Mask, NumElts);
1824  return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1825}
1826
1827static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1828  Type *Ty = CI.getType();
1829  Value *Op0 = CI.getArgOperand(0);
1830  Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1831  Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1832  if (CI.arg_size() == 3)
1833    Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1834  return Res;
1835}
1836
1837static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1838  Type *Ty = CI.getType();
1839
1840  // Arguments have a vXi32 type so cast to vXi64.
1841  Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1842  Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1843
1844  if (IsSigned) {
1845    // Shift left then arithmetic shift right.
1846    Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1847    LHS = Builder.CreateShl(LHS, ShiftAmt);
1848    LHS = Builder.CreateAShr(LHS, ShiftAmt);
1849    RHS = Builder.CreateShl(RHS, ShiftAmt);
1850    RHS = Builder.CreateAShr(RHS, ShiftAmt);
1851  } else {
1852    // Clear the upper bits.
1853    Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1854    LHS = Builder.CreateAnd(LHS, Mask);
1855    RHS = Builder.CreateAnd(RHS, Mask);
1856  }
1857
1858  Value *Res = Builder.CreateMul(LHS, RHS);
1859
1860  if (CI.arg_size() == 4)
1861    Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1862
1863  return Res;
1864}
1865
1866// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1867static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1868                                     Value *Mask) {
1869  unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1870  if (Mask) {
1871    const auto *C = dyn_cast<Constant>(Mask);
1872    if (!C || !C->isAllOnesValue())
1873      Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1874  }
1875
1876  if (NumElts < 8) {
1877    int Indices[8];
1878    for (unsigned i = 0; i != NumElts; ++i)
1879      Indices[i] = i;
1880    for (unsigned i = NumElts; i != 8; ++i)
1881      Indices[i] = NumElts + i % NumElts;
1882    Vec = Builder.CreateShuffleVector(Vec,
1883                                      Constant::getNullValue(Vec->getType()),
1884                                      Indices);
1885  }
1886  return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1887}
1888
1889static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
1890                                   unsigned CC, bool Signed) {
1891  Value *Op0 = CI.getArgOperand(0);
1892  unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1893
1894  Value *Cmp;
1895  if (CC == 3) {
1896    Cmp = Constant::getNullValue(
1897        FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1898  } else if (CC == 7) {
1899    Cmp = Constant::getAllOnesValue(
1900        FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1901  } else {
1902    ICmpInst::Predicate Pred;
1903    switch (CC) {
1904    default: llvm_unreachable("Unknown condition code");
1905    case 0: Pred = ICmpInst::ICMP_EQ;  break;
1906    case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1907    case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1908    case 4: Pred = ICmpInst::ICMP_NE;  break;
1909    case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1910    case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1911    }
1912    Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1913  }
1914
1915  Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1916
1917  return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1918}
1919
1920// Replace a masked intrinsic with an older unmasked intrinsic.
1921static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
1922                                    Intrinsic::ID IID) {
1923  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1924  Value *Rep = Builder.CreateCall(Intrin,
1925                                 { CI.getArgOperand(0), CI.getArgOperand(1) });
1926  return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1927}
1928
1929static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
1930  Value* A = CI.getArgOperand(0);
1931  Value* B = CI.getArgOperand(1);
1932  Value* Src = CI.getArgOperand(2);
1933  Value* Mask = CI.getArgOperand(3);
1934
1935  Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1936  Value* Cmp = Builder.CreateIsNotNull(AndNode);
1937  Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1938  Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1939  Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1940  return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1941}
1942
1943static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
1944  Value* Op = CI.getArgOperand(0);
1945  Type* ReturnOp = CI.getType();
1946  unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1947  Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1948  return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1949}
1950
1951// Replace intrinsic with unmasked version and a select.
1952static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1953                                      CallBase &CI, Value *&Rep) {
1954  Name = Name.substr(12); // Remove avx512.mask.
1955
1956  unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1957  unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1958  Intrinsic::ID IID;
1959  if (Name.starts_with("max.p")) {
1960    if (VecWidth == 128 && EltWidth == 32)
1961      IID = Intrinsic::x86_sse_max_ps;
1962    else if (VecWidth == 128 && EltWidth == 64)
1963      IID = Intrinsic::x86_sse2_max_pd;
1964    else if (VecWidth == 256 && EltWidth == 32)
1965      IID = Intrinsic::x86_avx_max_ps_256;
1966    else if (VecWidth == 256 && EltWidth == 64)
1967      IID = Intrinsic::x86_avx_max_pd_256;
1968    else
1969      llvm_unreachable("Unexpected intrinsic");
1970  } else if (Name.starts_with("min.p")) {
1971    if (VecWidth == 128 && EltWidth == 32)
1972      IID = Intrinsic::x86_sse_min_ps;
1973    else if (VecWidth == 128 && EltWidth == 64)
1974      IID = Intrinsic::x86_sse2_min_pd;
1975    else if (VecWidth == 256 && EltWidth == 32)
1976      IID = Intrinsic::x86_avx_min_ps_256;
1977    else if (VecWidth == 256 && EltWidth == 64)
1978      IID = Intrinsic::x86_avx_min_pd_256;
1979    else
1980      llvm_unreachable("Unexpected intrinsic");
1981  } else if (Name.starts_with("pshuf.b.")) {
1982    if (VecWidth == 128)
1983      IID = Intrinsic::x86_ssse3_pshuf_b_128;
1984    else if (VecWidth == 256)
1985      IID = Intrinsic::x86_avx2_pshuf_b;
1986    else if (VecWidth == 512)
1987      IID = Intrinsic::x86_avx512_pshuf_b_512;
1988    else
1989      llvm_unreachable("Unexpected intrinsic");
1990  } else if (Name.starts_with("pmul.hr.sw.")) {
1991    if (VecWidth == 128)
1992      IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1993    else if (VecWidth == 256)
1994      IID = Intrinsic::x86_avx2_pmul_hr_sw;
1995    else if (VecWidth == 512)
1996      IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1997    else
1998      llvm_unreachable("Unexpected intrinsic");
1999  } else if (Name.starts_with("pmulh.w.")) {
2000    if (VecWidth == 128)
2001      IID = Intrinsic::x86_sse2_pmulh_w;
2002    else if (VecWidth == 256)
2003      IID = Intrinsic::x86_avx2_pmulh_w;
2004    else if (VecWidth == 512)
2005      IID = Intrinsic::x86_avx512_pmulh_w_512;
2006    else
2007      llvm_unreachable("Unexpected intrinsic");
2008  } else if (Name.starts_with("pmulhu.w.")) {
2009    if (VecWidth == 128)
2010      IID = Intrinsic::x86_sse2_pmulhu_w;
2011    else if (VecWidth == 256)
2012      IID = Intrinsic::x86_avx2_pmulhu_w;
2013    else if (VecWidth == 512)
2014      IID = Intrinsic::x86_avx512_pmulhu_w_512;
2015    else
2016      llvm_unreachable("Unexpected intrinsic");
2017  } else if (Name.starts_with("pmaddw.d.")) {
2018    if (VecWidth == 128)
2019      IID = Intrinsic::x86_sse2_pmadd_wd;
2020    else if (VecWidth == 256)
2021      IID = Intrinsic::x86_avx2_pmadd_wd;
2022    else if (VecWidth == 512)
2023      IID = Intrinsic::x86_avx512_pmaddw_d_512;
2024    else
2025      llvm_unreachable("Unexpected intrinsic");
2026  } else if (Name.starts_with("pmaddubs.w.")) {
2027    if (VecWidth == 128)
2028      IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2029    else if (VecWidth == 256)
2030      IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2031    else if (VecWidth == 512)
2032      IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2033    else
2034      llvm_unreachable("Unexpected intrinsic");
2035  } else if (Name.starts_with("packsswb.")) {
2036    if (VecWidth == 128)
2037      IID = Intrinsic::x86_sse2_packsswb_128;
2038    else if (VecWidth == 256)
2039      IID = Intrinsic::x86_avx2_packsswb;
2040    else if (VecWidth == 512)
2041      IID = Intrinsic::x86_avx512_packsswb_512;
2042    else
2043      llvm_unreachable("Unexpected intrinsic");
2044  } else if (Name.starts_with("packssdw.")) {
2045    if (VecWidth == 128)
2046      IID = Intrinsic::x86_sse2_packssdw_128;
2047    else if (VecWidth == 256)
2048      IID = Intrinsic::x86_avx2_packssdw;
2049    else if (VecWidth == 512)
2050      IID = Intrinsic::x86_avx512_packssdw_512;
2051    else
2052      llvm_unreachable("Unexpected intrinsic");
2053  } else if (Name.starts_with("packuswb.")) {
2054    if (VecWidth == 128)
2055      IID = Intrinsic::x86_sse2_packuswb_128;
2056    else if (VecWidth == 256)
2057      IID = Intrinsic::x86_avx2_packuswb;
2058    else if (VecWidth == 512)
2059      IID = Intrinsic::x86_avx512_packuswb_512;
2060    else
2061      llvm_unreachable("Unexpected intrinsic");
2062  } else if (Name.starts_with("packusdw.")) {
2063    if (VecWidth == 128)
2064      IID = Intrinsic::x86_sse41_packusdw;
2065    else if (VecWidth == 256)
2066      IID = Intrinsic::x86_avx2_packusdw;
2067    else if (VecWidth == 512)
2068      IID = Intrinsic::x86_avx512_packusdw_512;
2069    else
2070      llvm_unreachable("Unexpected intrinsic");
2071  } else if (Name.starts_with("vpermilvar.")) {
2072    if (VecWidth == 128 && EltWidth == 32)
2073      IID = Intrinsic::x86_avx_vpermilvar_ps;
2074    else if (VecWidth == 128 && EltWidth == 64)
2075      IID = Intrinsic::x86_avx_vpermilvar_pd;
2076    else if (VecWidth == 256 && EltWidth == 32)
2077      IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2078    else if (VecWidth == 256 && EltWidth == 64)
2079      IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2080    else if (VecWidth == 512 && EltWidth == 32)
2081      IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2082    else if (VecWidth == 512 && EltWidth == 64)
2083      IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2084    else
2085      llvm_unreachable("Unexpected intrinsic");
2086  } else if (Name == "cvtpd2dq.256") {
2087    IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2088  } else if (Name == "cvtpd2ps.256") {
2089    IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2090  } else if (Name == "cvttpd2dq.256") {
2091    IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2092  } else if (Name == "cvttps2dq.128") {
2093    IID = Intrinsic::x86_sse2_cvttps2dq;
2094  } else if (Name == "cvttps2dq.256") {
2095    IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2096  } else if (Name.starts_with("permvar.")) {
2097    bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2098    if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2099      IID = Intrinsic::x86_avx2_permps;
2100    else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2101      IID = Intrinsic::x86_avx2_permd;
2102    else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2103      IID = Intrinsic::x86_avx512_permvar_df_256;
2104    else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2105      IID = Intrinsic::x86_avx512_permvar_di_256;
2106    else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2107      IID = Intrinsic::x86_avx512_permvar_sf_512;
2108    else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2109      IID = Intrinsic::x86_avx512_permvar_si_512;
2110    else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2111      IID = Intrinsic::x86_avx512_permvar_df_512;
2112    else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2113      IID = Intrinsic::x86_avx512_permvar_di_512;
2114    else if (VecWidth == 128 && EltWidth == 16)
2115      IID = Intrinsic::x86_avx512_permvar_hi_128;
2116    else if (VecWidth == 256 && EltWidth == 16)
2117      IID = Intrinsic::x86_avx512_permvar_hi_256;
2118    else if (VecWidth == 512 && EltWidth == 16)
2119      IID = Intrinsic::x86_avx512_permvar_hi_512;
2120    else if (VecWidth == 128 && EltWidth == 8)
2121      IID = Intrinsic::x86_avx512_permvar_qi_128;
2122    else if (VecWidth == 256 && EltWidth == 8)
2123      IID = Intrinsic::x86_avx512_permvar_qi_256;
2124    else if (VecWidth == 512 && EltWidth == 8)
2125      IID = Intrinsic::x86_avx512_permvar_qi_512;
2126    else
2127      llvm_unreachable("Unexpected intrinsic");
2128  } else if (Name.starts_with("dbpsadbw.")) {
2129    if (VecWidth == 128)
2130      IID = Intrinsic::x86_avx512_dbpsadbw_128;
2131    else if (VecWidth == 256)
2132      IID = Intrinsic::x86_avx512_dbpsadbw_256;
2133    else if (VecWidth == 512)
2134      IID = Intrinsic::x86_avx512_dbpsadbw_512;
2135    else
2136      llvm_unreachable("Unexpected intrinsic");
2137  } else if (Name.starts_with("pmultishift.qb.")) {
2138    if (VecWidth == 128)
2139      IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2140    else if (VecWidth == 256)
2141      IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2142    else if (VecWidth == 512)
2143      IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2144    else
2145      llvm_unreachable("Unexpected intrinsic");
2146  } else if (Name.starts_with("conflict.")) {
2147    if (Name[9] == 'd' && VecWidth == 128)
2148      IID = Intrinsic::x86_avx512_conflict_d_128;
2149    else if (Name[9] == 'd' && VecWidth == 256)
2150      IID = Intrinsic::x86_avx512_conflict_d_256;
2151    else if (Name[9] == 'd' && VecWidth == 512)
2152      IID = Intrinsic::x86_avx512_conflict_d_512;
2153    else if (Name[9] == 'q' && VecWidth == 128)
2154      IID = Intrinsic::x86_avx512_conflict_q_128;
2155    else if (Name[9] == 'q' && VecWidth == 256)
2156      IID = Intrinsic::x86_avx512_conflict_q_256;
2157    else if (Name[9] == 'q' && VecWidth == 512)
2158      IID = Intrinsic::x86_avx512_conflict_q_512;
2159    else
2160      llvm_unreachable("Unexpected intrinsic");
2161  } else if (Name.starts_with("pavg.")) {
2162    if (Name[5] == 'b' && VecWidth == 128)
2163      IID = Intrinsic::x86_sse2_pavg_b;
2164    else if (Name[5] == 'b' && VecWidth == 256)
2165      IID = Intrinsic::x86_avx2_pavg_b;
2166    else if (Name[5] == 'b' && VecWidth == 512)
2167      IID = Intrinsic::x86_avx512_pavg_b_512;
2168    else if (Name[5] == 'w' && VecWidth == 128)
2169      IID = Intrinsic::x86_sse2_pavg_w;
2170    else if (Name[5] == 'w' && VecWidth == 256)
2171      IID = Intrinsic::x86_avx2_pavg_w;
2172    else if (Name[5] == 'w' && VecWidth == 512)
2173      IID = Intrinsic::x86_avx512_pavg_w_512;
2174    else
2175      llvm_unreachable("Unexpected intrinsic");
2176  } else
2177    return false;
2178
2179  SmallVector<Value *, 4> Args(CI.args());
2180  Args.pop_back();
2181  Args.pop_back();
2182  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
2183                           Args);
2184  unsigned NumArgs = CI.arg_size();
2185  Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2186                      CI.getArgOperand(NumArgs - 2));
2187  return true;
2188}
2189
2190/// Upgrade comment in call to inline asm that represents an objc retain release
2191/// marker.
2192void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2193  size_t Pos;
2194  if (AsmStr->find("mov\tfp") == 0 &&
2195      AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2196      (Pos = AsmStr->find("# marker")) != std::string::npos) {
2197    AsmStr->replace(Pos, 1, ";");
2198  }
2199}
2200
2201static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2202                                      IRBuilder<> &Builder) {
2203  if (Name == "mve.vctp64.old") {
2204    // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
2205    // correct type.
2206    Value *VCTP = Builder.CreateCall(
2207        Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
2208        CI->getArgOperand(0), CI->getName());
2209    Value *C1 = Builder.CreateCall(
2210        Intrinsic::getDeclaration(
2211            F->getParent(), Intrinsic::arm_mve_pred_v2i,
2212            {VectorType::get(Builder.getInt1Ty(), 2, false)}),
2213        VCTP);
2214    return Builder.CreateCall(
2215        Intrinsic::getDeclaration(
2216            F->getParent(), Intrinsic::arm_mve_pred_i2v,
2217            {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2218        C1);
2219  } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
2220             Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
2221             Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
2222             Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
2223             Name ==
2224                 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
2225             Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
2226             Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
2227             Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
2228             Name ==
2229                 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
2230             Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
2231             Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
2232             Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
2233             Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
2234             Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
2235             Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
2236             Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
2237    std::vector<Type *> Tys;
2238    unsigned ID = CI->getIntrinsicID();
2239    Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
2240    switch (ID) {
2241    case Intrinsic::arm_mve_mull_int_predicated:
2242    case Intrinsic::arm_mve_vqdmull_predicated:
2243    case Intrinsic::arm_mve_vldr_gather_base_predicated:
2244      Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
2245      break;
2246    case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
2247    case Intrinsic::arm_mve_vstr_scatter_base_predicated:
2248    case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
2249      Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
2250             V2I1Ty};
2251      break;
2252    case Intrinsic::arm_mve_vldr_gather_offset_predicated:
2253      Tys = {CI->getType(), CI->getOperand(0)->getType(),
2254             CI->getOperand(1)->getType(), V2I1Ty};
2255      break;
2256    case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
2257      Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
2258             CI->getOperand(2)->getType(), V2I1Ty};
2259      break;
2260    case Intrinsic::arm_cde_vcx1q_predicated:
2261    case Intrinsic::arm_cde_vcx1qa_predicated:
2262    case Intrinsic::arm_cde_vcx2q_predicated:
2263    case Intrinsic::arm_cde_vcx2qa_predicated:
2264    case Intrinsic::arm_cde_vcx3q_predicated:
2265    case Intrinsic::arm_cde_vcx3qa_predicated:
2266      Tys = {CI->getOperand(1)->getType(), V2I1Ty};
2267      break;
2268    default:
2269      llvm_unreachable("Unhandled Intrinsic!");
2270    }
2271
2272    std::vector<Value *> Ops;
2273    for (Value *Op : CI->args()) {
2274      Type *Ty = Op->getType();
2275      if (Ty->getScalarSizeInBits() == 1) {
2276        Value *C1 = Builder.CreateCall(
2277            Intrinsic::getDeclaration(
2278                F->getParent(), Intrinsic::arm_mve_pred_v2i,
2279                {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2280            Op);
2281        Op = Builder.CreateCall(
2282            Intrinsic::getDeclaration(F->getParent(),
2283                                      Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
2284            C1);
2285      }
2286      Ops.push_back(Op);
2287    }
2288
2289    Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
2290    return Builder.CreateCall(Fn, Ops, CI->getName());
2291  }
2292  llvm_unreachable("Unknown function for ARM CallBase upgrade.");
2293}
2294
2295static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
2296                                         Function *F, IRBuilder<> &Builder) {
2297  const bool IsInc = Name.starts_with("atomic.inc.");
2298  if (IsInc || Name.starts_with("atomic.dec.")) {
2299    if (CI->getNumOperands() != 6) // Malformed bitcode.
2300      return nullptr;
2301
2302    AtomicRMWInst::BinOp RMWOp =
2303        IsInc ? AtomicRMWInst::UIncWrap : AtomicRMWInst::UDecWrap;
2304
2305    Value *Ptr = CI->getArgOperand(0);
2306    Value *Val = CI->getArgOperand(1);
2307    ConstantInt *OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
2308    ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
2309
2310    AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
2311    if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
2312      Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
2313    if (Order == AtomicOrdering::NotAtomic ||
2314        Order == AtomicOrdering::Unordered)
2315      Order = AtomicOrdering::SequentiallyConsistent;
2316
2317    // The scope argument never really worked correctly. Use agent as the most
2318    // conservative option which should still always produce the instruction.
2319    SyncScope::ID SSID = F->getContext().getOrInsertSyncScopeID("agent");
2320    AtomicRMWInst *RMW =
2321        Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
2322
2323    if (!VolatileArg || !VolatileArg->isZero())
2324      RMW->setVolatile(true);
2325    return RMW;
2326  }
2327
2328  llvm_unreachable("Unknown function for AMDGPU intrinsic upgrade.");
2329}
2330
2331/// Upgrade a call to an old intrinsic. All argument and return casting must be
2332/// provided to seamlessly integrate with existing context.
2333void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
2334  // Note dyn_cast to Function is not quite the same as getCalledFunction, which
2335  // checks the callee's function type matches. It's likely we need to handle
2336  // type changes here.
2337  Function *F = dyn_cast<Function>(CI->getCalledOperand());
2338  if (!F)
2339    return;
2340
2341  LLVMContext &C = CI->getContext();
2342  IRBuilder<> Builder(C);
2343  Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
2344
2345  if (!NewFn) {
2346    // Get the Function's name.
2347    StringRef Name = F->getName();
2348
2349    assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
2350    Name = Name.substr(5);
2351
2352    bool IsX86 = Name.consume_front("x86.");
2353    bool IsNVVM = Name.consume_front("nvvm.");
2354    bool IsARM = Name.consume_front("arm.");
2355    bool IsAMDGCN = Name.consume_front("amdgcn.");
2356
2357    if (IsX86 && Name.starts_with("sse4a.movnt.")) {
2358      SmallVector<Metadata *, 1> Elts;
2359      Elts.push_back(
2360          ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2361      MDNode *Node = MDNode::get(C, Elts);
2362
2363      Value *Arg0 = CI->getArgOperand(0);
2364      Value *Arg1 = CI->getArgOperand(1);
2365
2366      // Nontemporal (unaligned) store of the 0'th element of the float/double
2367      // vector.
2368      Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
2369      PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
2370      Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
2371      Value *Extract =
2372          Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2373
2374      StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
2375      SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2376
2377      // Remove intrinsic.
2378      CI->eraseFromParent();
2379      return;
2380    }
2381
2382    if (IsX86 && (Name.starts_with("avx.movnt.") ||
2383                  Name.starts_with("avx512.storent."))) {
2384      SmallVector<Metadata *, 1> Elts;
2385      Elts.push_back(
2386          ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2387      MDNode *Node = MDNode::get(C, Elts);
2388
2389      Value *Arg0 = CI->getArgOperand(0);
2390      Value *Arg1 = CI->getArgOperand(1);
2391
2392      // Convert the type of the pointer to a pointer to the stored type.
2393      Value *BC = Builder.CreateBitCast(Arg0,
2394                                        PointerType::getUnqual(Arg1->getType()),
2395                                        "cast");
2396      StoreInst *SI = Builder.CreateAlignedStore(
2397          Arg1, BC,
2398          Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2399      SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2400
2401      // Remove intrinsic.
2402      CI->eraseFromParent();
2403      return;
2404    }
2405
2406    if (IsX86 && Name == "sse2.storel.dq") {
2407      Value *Arg0 = CI->getArgOperand(0);
2408      Value *Arg1 = CI->getArgOperand(1);
2409
2410      auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2411      Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2412      Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2413      Value *BC = Builder.CreateBitCast(Arg0,
2414                                        PointerType::getUnqual(Elt->getType()),
2415                                        "cast");
2416      Builder.CreateAlignedStore(Elt, BC, Align(1));
2417
2418      // Remove intrinsic.
2419      CI->eraseFromParent();
2420      return;
2421    }
2422
2423    if (IsX86 && (Name.starts_with("sse.storeu.") ||
2424                  Name.starts_with("sse2.storeu.") ||
2425                  Name.starts_with("avx.storeu."))) {
2426      Value *Arg0 = CI->getArgOperand(0);
2427      Value *Arg1 = CI->getArgOperand(1);
2428
2429      Arg0 = Builder.CreateBitCast(Arg0,
2430                                   PointerType::getUnqual(Arg1->getType()),
2431                                   "cast");
2432      Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2433
2434      // Remove intrinsic.
2435      CI->eraseFromParent();
2436      return;
2437    }
2438
2439    if (IsX86 && Name == "avx512.mask.store.ss") {
2440      Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2441      upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2442                         Mask, false);
2443
2444      // Remove intrinsic.
2445      CI->eraseFromParent();
2446      return;
2447    }
2448
2449    if (IsX86 && Name.starts_with("avx512.mask.store")) {
2450      // "avx512.mask.storeu." or "avx512.mask.store."
2451      bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2452      upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2453                         CI->getArgOperand(2), Aligned);
2454
2455      // Remove intrinsic.
2456      CI->eraseFromParent();
2457      return;
2458    }
2459
2460    Value *Rep;
2461    // Upgrade packed integer vector compare intrinsics to compare instructions.
2462    if (IsX86 && (Name.starts_with("sse2.pcmp") ||
2463                  Name.starts_with("avx2.pcmp"))) {
2464      // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2465      bool CmpEq = Name[9] == 'e';
2466      Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2467                               CI->getArgOperand(0), CI->getArgOperand(1));
2468      Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2469    } else if (IsX86 && (Name.starts_with("avx512.broadcastm"))) {
2470      Type *ExtTy = Type::getInt32Ty(C);
2471      if (CI->getOperand(0)->getType()->isIntegerTy(8))
2472        ExtTy = Type::getInt64Ty(C);
2473      unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2474                         ExtTy->getPrimitiveSizeInBits();
2475      Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2476      Rep = Builder.CreateVectorSplat(NumElts, Rep);
2477    } else if (IsX86 && (Name == "sse.sqrt.ss" ||
2478                         Name == "sse2.sqrt.sd")) {
2479      Value *Vec = CI->getArgOperand(0);
2480      Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2481      Function *Intr = Intrinsic::getDeclaration(F->getParent(),
2482                                                 Intrinsic::sqrt, Elt0->getType());
2483      Elt0 = Builder.CreateCall(Intr, Elt0);
2484      Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2485    } else if (IsX86 && (Name.starts_with("avx.sqrt.p") ||
2486                         Name.starts_with("sse2.sqrt.p") ||
2487                         Name.starts_with("sse.sqrt.p"))) {
2488      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2489                                                         Intrinsic::sqrt,
2490                                                         CI->getType()),
2491                               {CI->getArgOperand(0)});
2492    } else if (IsX86 && (Name.starts_with("avx512.mask.sqrt.p"))) {
2493      if (CI->arg_size() == 4 &&
2494          (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2495           cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2496        Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2497                                            : Intrinsic::x86_avx512_sqrt_pd_512;
2498
2499        Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
2500        Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2501                                                           IID), Args);
2502      } else {
2503        Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2504                                                           Intrinsic::sqrt,
2505                                                           CI->getType()),
2506                                 {CI->getArgOperand(0)});
2507      }
2508      Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2509                          CI->getArgOperand(1));
2510    } else if (IsX86 && (Name.starts_with("avx512.ptestm") ||
2511                         Name.starts_with("avx512.ptestnm"))) {
2512      Value *Op0 = CI->getArgOperand(0);
2513      Value *Op1 = CI->getArgOperand(1);
2514      Value *Mask = CI->getArgOperand(2);
2515      Rep = Builder.CreateAnd(Op0, Op1);
2516      llvm::Type *Ty = Op0->getType();
2517      Value *Zero = llvm::Constant::getNullValue(Ty);
2518      ICmpInst::Predicate Pred =
2519        Name.starts_with("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2520      Rep = Builder.CreateICmp(Pred, Rep, Zero);
2521      Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2522    } else if (IsX86 && (Name.starts_with("avx512.mask.pbroadcast"))){
2523      unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2524                             ->getNumElements();
2525      Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2526      Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2527                          CI->getArgOperand(1));
2528    } else if (IsX86 && (Name.starts_with("avx512.kunpck"))) {
2529      unsigned NumElts = CI->getType()->getScalarSizeInBits();
2530      Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2531      Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2532      int Indices[64];
2533      for (unsigned i = 0; i != NumElts; ++i)
2534        Indices[i] = i;
2535
2536      // First extract half of each vector. This gives better codegen than
2537      // doing it in a single shuffle.
2538      LHS =
2539          Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2540      RHS =
2541          Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2542      // Concat the vectors.
2543      // NOTE: Operands have to be swapped to match intrinsic definition.
2544      Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2545      Rep = Builder.CreateBitCast(Rep, CI->getType());
2546    } else if (IsX86 && Name == "avx512.kand.w") {
2547      Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2548      Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2549      Rep = Builder.CreateAnd(LHS, RHS);
2550      Rep = Builder.CreateBitCast(Rep, CI->getType());
2551    } else if (IsX86 && Name == "avx512.kandn.w") {
2552      Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2553      Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2554      LHS = Builder.CreateNot(LHS);
2555      Rep = Builder.CreateAnd(LHS, RHS);
2556      Rep = Builder.CreateBitCast(Rep, CI->getType());
2557    } else if (IsX86 && Name == "avx512.kor.w") {
2558      Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2559      Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2560      Rep = Builder.CreateOr(LHS, RHS);
2561      Rep = Builder.CreateBitCast(Rep, CI->getType());
2562    } else if (IsX86 && Name == "avx512.kxor.w") {
2563      Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2564      Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2565      Rep = Builder.CreateXor(LHS, RHS);
2566      Rep = Builder.CreateBitCast(Rep, CI->getType());
2567    } else if (IsX86 && Name == "avx512.kxnor.w") {
2568      Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2569      Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2570      LHS = Builder.CreateNot(LHS);
2571      Rep = Builder.CreateXor(LHS, RHS);
2572      Rep = Builder.CreateBitCast(Rep, CI->getType());
2573    } else if (IsX86 && Name == "avx512.knot.w") {
2574      Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2575      Rep = Builder.CreateNot(Rep);
2576      Rep = Builder.CreateBitCast(Rep, CI->getType());
2577    } else if (IsX86 &&
2578               (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2579      Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2580      Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2581      Rep = Builder.CreateOr(LHS, RHS);
2582      Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2583      Value *C;
2584      if (Name[14] == 'c')
2585        C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2586      else
2587        C = ConstantInt::getNullValue(Builder.getInt16Ty());
2588      Rep = Builder.CreateICmpEQ(Rep, C);
2589      Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2590    } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2591                         Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2592                         Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2593                         Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2594      Type *I32Ty = Type::getInt32Ty(C);
2595      Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2596                                                 ConstantInt::get(I32Ty, 0));
2597      Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2598                                                 ConstantInt::get(I32Ty, 0));
2599      Value *EltOp;
2600      if (Name.contains(".add."))
2601        EltOp = Builder.CreateFAdd(Elt0, Elt1);
2602      else if (Name.contains(".sub."))
2603        EltOp = Builder.CreateFSub(Elt0, Elt1);
2604      else if (Name.contains(".mul."))
2605        EltOp = Builder.CreateFMul(Elt0, Elt1);
2606      else
2607        EltOp = Builder.CreateFDiv(Elt0, Elt1);
2608      Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2609                                        ConstantInt::get(I32Ty, 0));
2610    } else if (IsX86 && Name.starts_with("avx512.mask.pcmp")) {
2611      // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2612      bool CmpEq = Name[16] == 'e';
2613      Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2614    } else if (IsX86 && Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2615      Type *OpTy = CI->getArgOperand(0)->getType();
2616      unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2617      Intrinsic::ID IID;
2618      switch (VecWidth) {
2619      default: llvm_unreachable("Unexpected intrinsic");
2620      case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2621      case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2622      case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2623      }
2624
2625      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2626                               { CI->getOperand(0), CI->getArgOperand(1) });
2627      Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2628    } else if (IsX86 && Name.starts_with("avx512.mask.fpclass.p")) {
2629      Type *OpTy = CI->getArgOperand(0)->getType();
2630      unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2631      unsigned EltWidth = OpTy->getScalarSizeInBits();
2632      Intrinsic::ID IID;
2633      if (VecWidth == 128 && EltWidth == 32)
2634        IID = Intrinsic::x86_avx512_fpclass_ps_128;
2635      else if (VecWidth == 256 && EltWidth == 32)
2636        IID = Intrinsic::x86_avx512_fpclass_ps_256;
2637      else if (VecWidth == 512 && EltWidth == 32)
2638        IID = Intrinsic::x86_avx512_fpclass_ps_512;
2639      else if (VecWidth == 128 && EltWidth == 64)
2640        IID = Intrinsic::x86_avx512_fpclass_pd_128;
2641      else if (VecWidth == 256 && EltWidth == 64)
2642        IID = Intrinsic::x86_avx512_fpclass_pd_256;
2643      else if (VecWidth == 512 && EltWidth == 64)
2644        IID = Intrinsic::x86_avx512_fpclass_pd_512;
2645      else
2646        llvm_unreachable("Unexpected intrinsic");
2647
2648      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2649                               { CI->getOperand(0), CI->getArgOperand(1) });
2650      Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2651    } else if (IsX86 && Name.starts_with("avx512.cmp.p")) {
2652      SmallVector<Value *, 4> Args(CI->args());
2653      Type *OpTy = Args[0]->getType();
2654      unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2655      unsigned EltWidth = OpTy->getScalarSizeInBits();
2656      Intrinsic::ID IID;
2657      if (VecWidth == 128 && EltWidth == 32)
2658        IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2659      else if (VecWidth == 256 && EltWidth == 32)
2660        IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2661      else if (VecWidth == 512 && EltWidth == 32)
2662        IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2663      else if (VecWidth == 128 && EltWidth == 64)
2664        IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2665      else if (VecWidth == 256 && EltWidth == 64)
2666        IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2667      else if (VecWidth == 512 && EltWidth == 64)
2668        IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2669      else
2670        llvm_unreachable("Unexpected intrinsic");
2671
2672      Value *Mask = Constant::getAllOnesValue(CI->getType());
2673      if (VecWidth == 512)
2674        std::swap(Mask, Args.back());
2675      Args.push_back(Mask);
2676
2677      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2678                               Args);
2679    } else if (IsX86 && Name.starts_with("avx512.mask.cmp.")) {
2680      // Integer compare intrinsics.
2681      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2682      Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2683    } else if (IsX86 && Name.starts_with("avx512.mask.ucmp.")) {
2684      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2685      Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2686    } else if (IsX86 && (Name.starts_with("avx512.cvtb2mask.") ||
2687                         Name.starts_with("avx512.cvtw2mask.") ||
2688                         Name.starts_with("avx512.cvtd2mask.") ||
2689                         Name.starts_with("avx512.cvtq2mask."))) {
2690      Value *Op = CI->getArgOperand(0);
2691      Value *Zero = llvm::Constant::getNullValue(Op->getType());
2692      Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2693      Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2694    } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2695                        Name == "ssse3.pabs.w.128" ||
2696                        Name == "ssse3.pabs.d.128" ||
2697                        Name.starts_with("avx2.pabs") ||
2698                        Name.starts_with("avx512.mask.pabs"))) {
2699      Rep = upgradeAbs(Builder, *CI);
2700    } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2701                         Name == "sse2.pmaxs.w" ||
2702                         Name == "sse41.pmaxsd" ||
2703                         Name.starts_with("avx2.pmaxs") ||
2704                         Name.starts_with("avx512.mask.pmaxs"))) {
2705      Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2706    } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2707                         Name == "sse41.pmaxuw" ||
2708                         Name == "sse41.pmaxud" ||
2709                         Name.starts_with("avx2.pmaxu") ||
2710                         Name.starts_with("avx512.mask.pmaxu"))) {
2711      Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2712    } else if (IsX86 && (Name == "sse41.pminsb" ||
2713                         Name == "sse2.pmins.w" ||
2714                         Name == "sse41.pminsd" ||
2715                         Name.starts_with("avx2.pmins") ||
2716                         Name.starts_with("avx512.mask.pmins"))) {
2717      Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2718    } else if (IsX86 && (Name == "sse2.pminu.b" ||
2719                         Name == "sse41.pminuw" ||
2720                         Name == "sse41.pminud" ||
2721                         Name.starts_with("avx2.pminu") ||
2722                         Name.starts_with("avx512.mask.pminu"))) {
2723      Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2724    } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2725                         Name == "avx2.pmulu.dq" ||
2726                         Name == "avx512.pmulu.dq.512" ||
2727                         Name.starts_with("avx512.mask.pmulu.dq."))) {
2728      Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2729    } else if (IsX86 && (Name == "sse41.pmuldq" ||
2730                         Name == "avx2.pmul.dq" ||
2731                         Name == "avx512.pmul.dq.512" ||
2732                         Name.starts_with("avx512.mask.pmul.dq."))) {
2733      Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2734    } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2735                         Name == "sse2.cvtsi2sd" ||
2736                         Name == "sse.cvtsi642ss" ||
2737                         Name == "sse2.cvtsi642sd")) {
2738      Rep = Builder.CreateSIToFP(
2739          CI->getArgOperand(1),
2740          cast<VectorType>(CI->getType())->getElementType());
2741      Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2742    } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2743      Rep = Builder.CreateUIToFP(
2744          CI->getArgOperand(1),
2745          cast<VectorType>(CI->getType())->getElementType());
2746      Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2747    } else if (IsX86 && Name == "sse2.cvtss2sd") {
2748      Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2749      Rep = Builder.CreateFPExt(
2750          Rep, cast<VectorType>(CI->getType())->getElementType());
2751      Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2752    } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2753                         Name == "sse2.cvtdq2ps" ||
2754                         Name == "avx.cvtdq2.pd.256" ||
2755                         Name == "avx.cvtdq2.ps.256" ||
2756                         Name.starts_with("avx512.mask.cvtdq2pd.") ||
2757                         Name.starts_with("avx512.mask.cvtudq2pd.") ||
2758                         Name.starts_with("avx512.mask.cvtdq2ps.") ||
2759                         Name.starts_with("avx512.mask.cvtudq2ps.") ||
2760                         Name.starts_with("avx512.mask.cvtqq2pd.") ||
2761                         Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2762                         Name == "avx512.mask.cvtqq2ps.256" ||
2763                         Name == "avx512.mask.cvtqq2ps.512" ||
2764                         Name == "avx512.mask.cvtuqq2ps.256" ||
2765                         Name == "avx512.mask.cvtuqq2ps.512" ||
2766                         Name == "sse2.cvtps2pd" ||
2767                         Name == "avx.cvt.ps2.pd.256" ||
2768                         Name == "avx512.mask.cvtps2pd.128" ||
2769                         Name == "avx512.mask.cvtps2pd.256")) {
2770      auto *DstTy = cast<FixedVectorType>(CI->getType());
2771      Rep = CI->getArgOperand(0);
2772      auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2773
2774      unsigned NumDstElts = DstTy->getNumElements();
2775      if (NumDstElts < SrcTy->getNumElements()) {
2776        assert(NumDstElts == 2 && "Unexpected vector size");
2777        Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2778      }
2779
2780      bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2781      bool IsUnsigned = Name.contains("cvtu");
2782      if (IsPS2PD)
2783        Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2784      else if (CI->arg_size() == 4 &&
2785               (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2786                cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2787        Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2788                                       : Intrinsic::x86_avx512_sitofp_round;
2789        Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2790                                                { DstTy, SrcTy });
2791        Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2792      } else {
2793        Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2794                         : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2795      }
2796
2797      if (CI->arg_size() >= 3)
2798        Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2799                            CI->getArgOperand(1));
2800    } else if (IsX86 && (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2801                         Name.starts_with("vcvtph2ps."))) {
2802      auto *DstTy = cast<FixedVectorType>(CI->getType());
2803      Rep = CI->getArgOperand(0);
2804      auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2805      unsigned NumDstElts = DstTy->getNumElements();
2806      if (NumDstElts != SrcTy->getNumElements()) {
2807        assert(NumDstElts == 4 && "Unexpected vector size");
2808        Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2809      }
2810      Rep = Builder.CreateBitCast(
2811          Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2812      Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2813      if (CI->arg_size() >= 3)
2814        Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2815                            CI->getArgOperand(1));
2816    } else if (IsX86 && Name.starts_with("avx512.mask.load")) {
2817      // "avx512.mask.loadu." or "avx512.mask.load."
2818      bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2819      Rep =
2820          upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2821                            CI->getArgOperand(2), Aligned);
2822    } else if (IsX86 && Name.starts_with("avx512.mask.expand.load.")) {
2823      auto *ResultTy = cast<FixedVectorType>(CI->getType());
2824      Type *PtrTy = ResultTy->getElementType();
2825
2826      // Cast the pointer to element type.
2827      Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2828                                         llvm::PointerType::getUnqual(PtrTy));
2829
2830      Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2831                                     ResultTy->getNumElements());
2832
2833      Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2834                                                Intrinsic::masked_expandload,
2835                                                ResultTy);
2836      Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2837    } else if (IsX86 && Name.starts_with("avx512.mask.compress.store.")) {
2838      auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2839      Type *PtrTy = ResultTy->getElementType();
2840
2841      // Cast the pointer to element type.
2842      Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2843                                         llvm::PointerType::getUnqual(PtrTy));
2844
2845      Value *MaskVec =
2846          getX86MaskVec(Builder, CI->getArgOperand(2),
2847                        cast<FixedVectorType>(ResultTy)->getNumElements());
2848
2849      Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2850                                                Intrinsic::masked_compressstore,
2851                                                ResultTy);
2852      Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2853    } else if (IsX86 && (Name.starts_with("avx512.mask.compress.") ||
2854                         Name.starts_with("avx512.mask.expand."))) {
2855      auto *ResultTy = cast<FixedVectorType>(CI->getType());
2856
2857      Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2858                                     ResultTy->getNumElements());
2859
2860      bool IsCompress = Name[12] == 'c';
2861      Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2862                                     : Intrinsic::x86_avx512_mask_expand;
2863      Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2864      Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2865                                       MaskVec });
2866    } else if (IsX86 && Name.starts_with("xop.vpcom")) {
2867      bool IsSigned;
2868      if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
2869          Name.ends_with("uq"))
2870        IsSigned = false;
2871      else if (Name.ends_with("b") || Name.ends_with("w") || Name.ends_with("d") ||
2872               Name.ends_with("q"))
2873        IsSigned = true;
2874      else
2875        llvm_unreachable("Unknown suffix");
2876
2877      unsigned Imm;
2878      if (CI->arg_size() == 3) {
2879        Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2880      } else {
2881        Name = Name.substr(9); // strip off "xop.vpcom"
2882        if (Name.starts_with("lt"))
2883          Imm = 0;
2884        else if (Name.starts_with("le"))
2885          Imm = 1;
2886        else if (Name.starts_with("gt"))
2887          Imm = 2;
2888        else if (Name.starts_with("ge"))
2889          Imm = 3;
2890        else if (Name.starts_with("eq"))
2891          Imm = 4;
2892        else if (Name.starts_with("ne"))
2893          Imm = 5;
2894        else if (Name.starts_with("false"))
2895          Imm = 6;
2896        else if (Name.starts_with("true"))
2897          Imm = 7;
2898        else
2899          llvm_unreachable("Unknown condition");
2900      }
2901
2902      Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2903    } else if (IsX86 && Name.starts_with("xop.vpcmov")) {
2904      Value *Sel = CI->getArgOperand(2);
2905      Value *NotSel = Builder.CreateNot(Sel);
2906      Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2907      Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2908      Rep = Builder.CreateOr(Sel0, Sel1);
2909    } else if (IsX86 && (Name.starts_with("xop.vprot") ||
2910                         Name.starts_with("avx512.prol") ||
2911                         Name.starts_with("avx512.mask.prol"))) {
2912      Rep = upgradeX86Rotate(Builder, *CI, false);
2913    } else if (IsX86 && (Name.starts_with("avx512.pror") ||
2914                         Name.starts_with("avx512.mask.pror"))) {
2915      Rep = upgradeX86Rotate(Builder, *CI, true);
2916    } else if (IsX86 && (Name.starts_with("avx512.vpshld.") ||
2917                         Name.starts_with("avx512.mask.vpshld") ||
2918                         Name.starts_with("avx512.maskz.vpshld"))) {
2919      bool ZeroMask = Name[11] == 'z';
2920      Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2921    } else if (IsX86 && (Name.starts_with("avx512.vpshrd.") ||
2922                         Name.starts_with("avx512.mask.vpshrd") ||
2923                         Name.starts_with("avx512.maskz.vpshrd"))) {
2924      bool ZeroMask = Name[11] == 'z';
2925      Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2926    } else if (IsX86 && Name == "sse42.crc32.64.8") {
2927      Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2928                                               Intrinsic::x86_sse42_crc32_32_8);
2929      Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2930      Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2931      Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2932    } else if (IsX86 && (Name.starts_with("avx.vbroadcast.s") ||
2933                         Name.starts_with("avx512.vbroadcast.s"))) {
2934      // Replace broadcasts with a series of insertelements.
2935      auto *VecTy = cast<FixedVectorType>(CI->getType());
2936      Type *EltTy = VecTy->getElementType();
2937      unsigned EltNum = VecTy->getNumElements();
2938      Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
2939      Type *I32Ty = Type::getInt32Ty(C);
2940      Rep = PoisonValue::get(VecTy);
2941      for (unsigned I = 0; I < EltNum; ++I)
2942        Rep = Builder.CreateInsertElement(Rep, Load,
2943                                          ConstantInt::get(I32Ty, I));
2944    } else if (IsX86 && (Name.starts_with("sse41.pmovsx") ||
2945                         Name.starts_with("sse41.pmovzx") ||
2946                         Name.starts_with("avx2.pmovsx") ||
2947                         Name.starts_with("avx2.pmovzx") ||
2948                         Name.starts_with("avx512.mask.pmovsx") ||
2949                         Name.starts_with("avx512.mask.pmovzx"))) {
2950      auto *DstTy = cast<FixedVectorType>(CI->getType());
2951      unsigned NumDstElts = DstTy->getNumElements();
2952
2953      // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2954      SmallVector<int, 8> ShuffleMask(NumDstElts);
2955      for (unsigned i = 0; i != NumDstElts; ++i)
2956        ShuffleMask[i] = i;
2957
2958      Value *SV =
2959          Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2960
2961      bool DoSext = Name.contains("pmovsx");
2962      Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2963                   : Builder.CreateZExt(SV, DstTy);
2964      // If there are 3 arguments, it's a masked intrinsic so we need a select.
2965      if (CI->arg_size() == 3)
2966        Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2967                            CI->getArgOperand(1));
2968    } else if (Name == "avx512.mask.pmov.qd.256" ||
2969               Name == "avx512.mask.pmov.qd.512" ||
2970               Name == "avx512.mask.pmov.wb.256" ||
2971               Name == "avx512.mask.pmov.wb.512") {
2972      Type *Ty = CI->getArgOperand(1)->getType();
2973      Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2974      Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2975                          CI->getArgOperand(1));
2976    } else if (IsX86 && (Name.starts_with("avx.vbroadcastf128") ||
2977                         Name == "avx2.vbroadcasti128")) {
2978      // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2979      Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2980      unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2981      auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2982      Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2983                                            PointerType::getUnqual(VT));
2984      Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2985      if (NumSrcElts == 2)
2986        Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2987      else
2988        Rep = Builder.CreateShuffleVector(
2989            Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2990    } else if (IsX86 && (Name.starts_with("avx512.mask.shuf.i") ||
2991                         Name.starts_with("avx512.mask.shuf.f"))) {
2992      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2993      Type *VT = CI->getType();
2994      unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2995      unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2996      unsigned ControlBitsMask = NumLanes - 1;
2997      unsigned NumControlBits = NumLanes / 2;
2998      SmallVector<int, 8> ShuffleMask(0);
2999
3000      for (unsigned l = 0; l != NumLanes; ++l) {
3001        unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3002        // We actually need the other source.
3003        if (l >= NumLanes / 2)
3004          LaneMask += NumLanes;
3005        for (unsigned i = 0; i != NumElementsInLane; ++i)
3006          ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3007      }
3008      Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3009                                        CI->getArgOperand(1), ShuffleMask);
3010      Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3011                          CI->getArgOperand(3));
3012    }else if (IsX86 && (Name.starts_with("avx512.mask.broadcastf") ||
3013                         Name.starts_with("avx512.mask.broadcasti"))) {
3014      unsigned NumSrcElts =
3015          cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3016              ->getNumElements();
3017      unsigned NumDstElts =
3018          cast<FixedVectorType>(CI->getType())->getNumElements();
3019
3020      SmallVector<int, 8> ShuffleMask(NumDstElts);
3021      for (unsigned i = 0; i != NumDstElts; ++i)
3022        ShuffleMask[i] = i % NumSrcElts;
3023
3024      Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3025                                        CI->getArgOperand(0),
3026                                        ShuffleMask);
3027      Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3028                          CI->getArgOperand(1));
3029    } else if (IsX86 && (Name.starts_with("avx2.pbroadcast") ||
3030                         Name.starts_with("avx2.vbroadcast") ||
3031                         Name.starts_with("avx512.pbroadcast") ||
3032                         Name.starts_with("avx512.mask.broadcast.s"))) {
3033      // Replace vp?broadcasts with a vector shuffle.
3034      Value *Op = CI->getArgOperand(0);
3035      ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3036      Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3037      SmallVector<int, 8> M;
3038      ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
3039      Rep = Builder.CreateShuffleVector(Op, M);
3040
3041      if (CI->arg_size() == 3)
3042        Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3043                            CI->getArgOperand(1));
3044    } else if (IsX86 && (Name.starts_with("sse2.padds.") ||
3045                         Name.starts_with("avx2.padds.") ||
3046                         Name.starts_with("avx512.padds.") ||
3047                         Name.starts_with("avx512.mask.padds."))) {
3048      Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3049    } else if (IsX86 && (Name.starts_with("sse2.psubs.") ||
3050                         Name.starts_with("avx2.psubs.") ||
3051                         Name.starts_with("avx512.psubs.") ||
3052                         Name.starts_with("avx512.mask.psubs."))) {
3053      Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3054    } else if (IsX86 && (Name.starts_with("sse2.paddus.") ||
3055                         Name.starts_with("avx2.paddus.") ||
3056                         Name.starts_with("avx512.mask.paddus."))) {
3057      Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3058    } else if (IsX86 && (Name.starts_with("sse2.psubus.") ||
3059                         Name.starts_with("avx2.psubus.") ||
3060                         Name.starts_with("avx512.mask.psubus."))) {
3061      Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3062    } else if (IsX86 && Name.starts_with("avx512.mask.palignr.")) {
3063      Rep = upgradeX86ALIGNIntrinsics(
3064          Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3065          CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4),
3066          false);
3067    } else if (IsX86 && Name.starts_with("avx512.mask.valign.")) {
3068      Rep = upgradeX86ALIGNIntrinsics(
3069          Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3070          CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4),
3071          true);
3072    } else if (IsX86 && (Name == "sse2.psll.dq" ||
3073                         Name == "avx2.psll.dq")) {
3074      // 128/256-bit shift left specified in bits.
3075      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3076      Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3077                                       Shift / 8); // Shift is in bits.
3078    } else if (IsX86 && (Name == "sse2.psrl.dq" ||
3079                         Name == "avx2.psrl.dq")) {
3080      // 128/256-bit shift right specified in bits.
3081      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3082      Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3083                                       Shift / 8); // Shift is in bits.
3084    } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
3085                         Name == "avx2.psll.dq.bs" ||
3086                         Name == "avx512.psll.dq.512")) {
3087      // 128/256/512-bit shift left specified in bytes.
3088      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3089      Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3090    } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
3091                         Name == "avx2.psrl.dq.bs" ||
3092                         Name == "avx512.psrl.dq.512")) {
3093      // 128/256/512-bit shift right specified in bytes.
3094      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3095      Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3096    } else if (IsX86 && (Name == "sse41.pblendw" ||
3097                         Name.starts_with("sse41.blendp") ||
3098                         Name.starts_with("avx.blend.p") ||
3099                         Name == "avx2.pblendw" ||
3100                         Name.starts_with("avx2.pblendd."))) {
3101      Value *Op0 = CI->getArgOperand(0);
3102      Value *Op1 = CI->getArgOperand(1);
3103      unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3104      auto *VecTy = cast<FixedVectorType>(CI->getType());
3105      unsigned NumElts = VecTy->getNumElements();
3106
3107      SmallVector<int, 16> Idxs(NumElts);
3108      for (unsigned i = 0; i != NumElts; ++i)
3109        Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
3110
3111      Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3112    } else if (IsX86 && (Name.starts_with("avx.vinsertf128.") ||
3113                         Name == "avx2.vinserti128" ||
3114                         Name.starts_with("avx512.mask.insert"))) {
3115      Value *Op0 = CI->getArgOperand(0);
3116      Value *Op1 = CI->getArgOperand(1);
3117      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3118      unsigned DstNumElts =
3119          cast<FixedVectorType>(CI->getType())->getNumElements();
3120      unsigned SrcNumElts =
3121          cast<FixedVectorType>(Op1->getType())->getNumElements();
3122      unsigned Scale = DstNumElts / SrcNumElts;
3123
3124      // Mask off the high bits of the immediate value; hardware ignores those.
3125      Imm = Imm % Scale;
3126
3127      // Extend the second operand into a vector the size of the destination.
3128      SmallVector<int, 8> Idxs(DstNumElts);
3129      for (unsigned i = 0; i != SrcNumElts; ++i)
3130        Idxs[i] = i;
3131      for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3132        Idxs[i] = SrcNumElts;
3133      Rep = Builder.CreateShuffleVector(Op1, Idxs);
3134
3135      // Insert the second operand into the first operand.
3136
3137      // Note that there is no guarantee that instruction lowering will actually
3138      // produce a vinsertf128 instruction for the created shuffles. In
3139      // particular, the 0 immediate case involves no lane changes, so it can
3140      // be handled as a blend.
3141
3142      // Example of shuffle mask for 32-bit elements:
3143      // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
3144      // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
3145
3146      // First fill with identify mask.
3147      for (unsigned i = 0; i != DstNumElts; ++i)
3148        Idxs[i] = i;
3149      // Then replace the elements where we need to insert.
3150      for (unsigned i = 0; i != SrcNumElts; ++i)
3151        Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3152      Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3153
3154      // If the intrinsic has a mask operand, handle that.
3155      if (CI->arg_size() == 5)
3156        Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3157                            CI->getArgOperand(3));
3158    } else if (IsX86 && (Name.starts_with("avx.vextractf128.") ||
3159                         Name == "avx2.vextracti128" ||
3160                         Name.starts_with("avx512.mask.vextract"))) {
3161      Value *Op0 = CI->getArgOperand(0);
3162      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3163      unsigned DstNumElts =
3164          cast<FixedVectorType>(CI->getType())->getNumElements();
3165      unsigned SrcNumElts =
3166          cast<FixedVectorType>(Op0->getType())->getNumElements();
3167      unsigned Scale = SrcNumElts / DstNumElts;
3168
3169      // Mask off the high bits of the immediate value; hardware ignores those.
3170      Imm = Imm % Scale;
3171
3172      // Get indexes for the subvector of the input vector.
3173      SmallVector<int, 8> Idxs(DstNumElts);
3174      for (unsigned i = 0; i != DstNumElts; ++i) {
3175        Idxs[i] = i + (Imm * DstNumElts);
3176      }
3177      Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3178
3179      // If the intrinsic has a mask operand, handle that.
3180      if (CI->arg_size() == 4)
3181        Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3182                            CI->getArgOperand(2));
3183    } else if (!IsX86 && Name == "stackprotectorcheck") {
3184      Rep = nullptr;
3185    } else if (IsX86 && (Name.starts_with("avx512.mask.perm.df.") ||
3186                         Name.starts_with("avx512.mask.perm.di."))) {
3187      Value *Op0 = CI->getArgOperand(0);
3188      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3189      auto *VecTy = cast<FixedVectorType>(CI->getType());
3190      unsigned NumElts = VecTy->getNumElements();
3191
3192      SmallVector<int, 8> Idxs(NumElts);
3193      for (unsigned i = 0; i != NumElts; ++i)
3194        Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3195
3196      Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3197
3198      if (CI->arg_size() == 4)
3199        Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3200                            CI->getArgOperand(2));
3201    } else if (IsX86 && (Name.starts_with("avx.vperm2f128.") ||
3202                         Name == "avx2.vperm2i128")) {
3203      // The immediate permute control byte looks like this:
3204      //    [1:0] - select 128 bits from sources for low half of destination
3205      //    [2]   - ignore
3206      //    [3]   - zero low half of destination
3207      //    [5:4] - select 128 bits from sources for high half of destination
3208      //    [6]   - ignore
3209      //    [7]   - zero high half of destination
3210
3211      uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3212
3213      unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3214      unsigned HalfSize = NumElts / 2;
3215      SmallVector<int, 8> ShuffleMask(NumElts);
3216
3217      // Determine which operand(s) are actually in use for this instruction.
3218      Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3219      Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3220
3221      // If needed, replace operands based on zero mask.
3222      V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3223      V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3224
3225      // Permute low half of result.
3226      unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3227      for (unsigned i = 0; i < HalfSize; ++i)
3228        ShuffleMask[i] = StartIndex + i;
3229
3230      // Permute high half of result.
3231      StartIndex = (Imm & 0x10) ? HalfSize : 0;
3232      for (unsigned i = 0; i < HalfSize; ++i)
3233        ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3234
3235      Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3236
3237    } else if (IsX86 && (Name.starts_with("avx.vpermil.") ||
3238                         Name == "sse2.pshuf.d" ||
3239                         Name.starts_with("avx512.mask.vpermil.p") ||
3240                         Name.starts_with("avx512.mask.pshuf.d."))) {
3241      Value *Op0 = CI->getArgOperand(0);
3242      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3243      auto *VecTy = cast<FixedVectorType>(CI->getType());
3244      unsigned NumElts = VecTy->getNumElements();
3245      // Calculate the size of each index in the immediate.
3246      unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3247      unsigned IdxMask = ((1 << IdxSize) - 1);
3248
3249      SmallVector<int, 8> Idxs(NumElts);
3250      // Lookup the bits for this element, wrapping around the immediate every
3251      // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3252      // to offset by the first index of each group.
3253      for (unsigned i = 0; i != NumElts; ++i)
3254        Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3255
3256      Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3257
3258      if (CI->arg_size() == 4)
3259        Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3260                            CI->getArgOperand(2));
3261    } else if (IsX86 && (Name == "sse2.pshufl.w" ||
3262                         Name.starts_with("avx512.mask.pshufl.w."))) {
3263      Value *Op0 = CI->getArgOperand(0);
3264      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3265      unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3266
3267      SmallVector<int, 16> Idxs(NumElts);
3268      for (unsigned l = 0; l != NumElts; l += 8) {
3269        for (unsigned i = 0; i != 4; ++i)
3270          Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3271        for (unsigned i = 4; i != 8; ++i)
3272          Idxs[i + l] = i + l;
3273      }
3274
3275      Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3276
3277      if (CI->arg_size() == 4)
3278        Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3279                            CI->getArgOperand(2));
3280    } else if (IsX86 && (Name == "sse2.pshufh.w" ||
3281                         Name.starts_with("avx512.mask.pshufh.w."))) {
3282      Value *Op0 = CI->getArgOperand(0);
3283      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3284      unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3285
3286      SmallVector<int, 16> Idxs(NumElts);
3287      for (unsigned l = 0; l != NumElts; l += 8) {
3288        for (unsigned i = 0; i != 4; ++i)
3289          Idxs[i + l] = i + l;
3290        for (unsigned i = 0; i != 4; ++i)
3291          Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3292      }
3293
3294      Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3295
3296      if (CI->arg_size() == 4)
3297        Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3298                            CI->getArgOperand(2));
3299    } else if (IsX86 && Name.starts_with("avx512.mask.shuf.p")) {
3300      Value *Op0 = CI->getArgOperand(0);
3301      Value *Op1 = CI->getArgOperand(1);
3302      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3303      unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3304
3305      unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3306      unsigned HalfLaneElts = NumLaneElts / 2;
3307
3308      SmallVector<int, 16> Idxs(NumElts);
3309      for (unsigned i = 0; i != NumElts; ++i) {
3310        // Base index is the starting element of the lane.
3311        Idxs[i] = i - (i % NumLaneElts);
3312        // If we are half way through the lane switch to the other source.
3313        if ((i % NumLaneElts) >= HalfLaneElts)
3314          Idxs[i] += NumElts;
3315        // Now select the specific element. By adding HalfLaneElts bits from
3316        // the immediate. Wrapping around the immediate every 8-bits.
3317        Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3318      }
3319
3320      Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3321
3322      Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3323                          CI->getArgOperand(3));
3324    } else if (IsX86 && (Name.starts_with("avx512.mask.movddup") ||
3325                         Name.starts_with("avx512.mask.movshdup") ||
3326                         Name.starts_with("avx512.mask.movsldup"))) {
3327      Value *Op0 = CI->getArgOperand(0);
3328      unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3329      unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3330
3331      unsigned Offset = 0;
3332      if (Name.starts_with("avx512.mask.movshdup."))
3333        Offset = 1;
3334
3335      SmallVector<int, 16> Idxs(NumElts);
3336      for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3337        for (unsigned i = 0; i != NumLaneElts; i += 2) {
3338          Idxs[i + l + 0] = i + l + Offset;
3339          Idxs[i + l + 1] = i + l + Offset;
3340        }
3341
3342      Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3343
3344      Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3345                          CI->getArgOperand(1));
3346    } else if (IsX86 && (Name.starts_with("avx512.mask.punpckl") ||
3347                         Name.starts_with("avx512.mask.unpckl."))) {
3348      Value *Op0 = CI->getArgOperand(0);
3349      Value *Op1 = CI->getArgOperand(1);
3350      int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3351      int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3352
3353      SmallVector<int, 64> Idxs(NumElts);
3354      for (int l = 0; l != NumElts; l += NumLaneElts)
3355        for (int i = 0; i != NumLaneElts; ++i)
3356          Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3357
3358      Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3359
3360      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3361                          CI->getArgOperand(2));
3362    } else if (IsX86 && (Name.starts_with("avx512.mask.punpckh") ||
3363                         Name.starts_with("avx512.mask.unpckh."))) {
3364      Value *Op0 = CI->getArgOperand(0);
3365      Value *Op1 = CI->getArgOperand(1);
3366      int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3367      int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3368
3369      SmallVector<int, 64> Idxs(NumElts);
3370      for (int l = 0; l != NumElts; l += NumLaneElts)
3371        for (int i = 0; i != NumLaneElts; ++i)
3372          Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3373
3374      Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3375
3376      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3377                          CI->getArgOperand(2));
3378    } else if (IsX86 && (Name.starts_with("avx512.mask.and.") ||
3379                         Name.starts_with("avx512.mask.pand."))) {
3380      VectorType *FTy = cast<VectorType>(CI->getType());
3381      VectorType *ITy = VectorType::getInteger(FTy);
3382      Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3383                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3384      Rep = Builder.CreateBitCast(Rep, FTy);
3385      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3386                          CI->getArgOperand(2));
3387    } else if (IsX86 && (Name.starts_with("avx512.mask.andn.") ||
3388                         Name.starts_with("avx512.mask.pandn."))) {
3389      VectorType *FTy = cast<VectorType>(CI->getType());
3390      VectorType *ITy = VectorType::getInteger(FTy);
3391      Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3392      Rep = Builder.CreateAnd(Rep,
3393                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3394      Rep = Builder.CreateBitCast(Rep, FTy);
3395      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3396                          CI->getArgOperand(2));
3397    } else if (IsX86 && (Name.starts_with("avx512.mask.or.") ||
3398                         Name.starts_with("avx512.mask.por."))) {
3399      VectorType *FTy = cast<VectorType>(CI->getType());
3400      VectorType *ITy = VectorType::getInteger(FTy);
3401      Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3402                             Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3403      Rep = Builder.CreateBitCast(Rep, FTy);
3404      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3405                          CI->getArgOperand(2));
3406    } else if (IsX86 && (Name.starts_with("avx512.mask.xor.") ||
3407                         Name.starts_with("avx512.mask.pxor."))) {
3408      VectorType *FTy = cast<VectorType>(CI->getType());
3409      VectorType *ITy = VectorType::getInteger(FTy);
3410      Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3411                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3412      Rep = Builder.CreateBitCast(Rep, FTy);
3413      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3414                          CI->getArgOperand(2));
3415    } else if (IsX86 && Name.starts_with("avx512.mask.padd.")) {
3416      Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3417      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3418                          CI->getArgOperand(2));
3419    } else if (IsX86 && Name.starts_with("avx512.mask.psub.")) {
3420      Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3421      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3422                          CI->getArgOperand(2));
3423    } else if (IsX86 && Name.starts_with("avx512.mask.pmull.")) {
3424      Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3425      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3426                          CI->getArgOperand(2));
3427    } else if (IsX86 && Name.starts_with("avx512.mask.add.p")) {
3428      if (Name.ends_with(".512")) {
3429        Intrinsic::ID IID;
3430        if (Name[17] == 's')
3431          IID = Intrinsic::x86_avx512_add_ps_512;
3432        else
3433          IID = Intrinsic::x86_avx512_add_pd_512;
3434
3435        Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3436                                 { CI->getArgOperand(0), CI->getArgOperand(1),
3437                                   CI->getArgOperand(4) });
3438      } else {
3439        Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3440      }
3441      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3442                          CI->getArgOperand(2));
3443    } else if (IsX86 && Name.starts_with("avx512.mask.div.p")) {
3444      if (Name.ends_with(".512")) {
3445        Intrinsic::ID IID;
3446        if (Name[17] == 's')
3447          IID = Intrinsic::x86_avx512_div_ps_512;
3448        else
3449          IID = Intrinsic::x86_avx512_div_pd_512;
3450
3451        Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3452                                 { CI->getArgOperand(0), CI->getArgOperand(1),
3453                                   CI->getArgOperand(4) });
3454      } else {
3455        Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3456      }
3457      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3458                          CI->getArgOperand(2));
3459    } else if (IsX86 && Name.starts_with("avx512.mask.mul.p")) {
3460      if (Name.ends_with(".512")) {
3461        Intrinsic::ID IID;
3462        if (Name[17] == 's')
3463          IID = Intrinsic::x86_avx512_mul_ps_512;
3464        else
3465          IID = Intrinsic::x86_avx512_mul_pd_512;
3466
3467        Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3468                                 { CI->getArgOperand(0), CI->getArgOperand(1),
3469                                   CI->getArgOperand(4) });
3470      } else {
3471        Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3472      }
3473      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3474                          CI->getArgOperand(2));
3475    } else if (IsX86 && Name.starts_with("avx512.mask.sub.p")) {
3476      if (Name.ends_with(".512")) {
3477        Intrinsic::ID IID;
3478        if (Name[17] == 's')
3479          IID = Intrinsic::x86_avx512_sub_ps_512;
3480        else
3481          IID = Intrinsic::x86_avx512_sub_pd_512;
3482
3483        Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3484                                 { CI->getArgOperand(0), CI->getArgOperand(1),
3485                                   CI->getArgOperand(4) });
3486      } else {
3487        Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3488      }
3489      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3490                          CI->getArgOperand(2));
3491    } else if (IsX86 && (Name.starts_with("avx512.mask.max.p") ||
3492                         Name.starts_with("avx512.mask.min.p")) &&
3493               Name.drop_front(18) == ".512") {
3494      bool IsDouble = Name[17] == 'd';
3495      bool IsMin = Name[13] == 'i';
3496      static const Intrinsic::ID MinMaxTbl[2][2] = {
3497        { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
3498        { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
3499      };
3500      Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3501
3502      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3503                               { CI->getArgOperand(0), CI->getArgOperand(1),
3504                                 CI->getArgOperand(4) });
3505      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3506                          CI->getArgOperand(2));
3507    } else if (IsX86 && Name.starts_with("avx512.mask.lzcnt.")) {
3508      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
3509                                                         Intrinsic::ctlz,
3510                                                         CI->getType()),
3511                               { CI->getArgOperand(0), Builder.getInt1(false) });
3512      Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3513                          CI->getArgOperand(1));
3514    } else if (IsX86 && Name.starts_with("avx512.mask.psll")) {
3515      bool IsImmediate = Name[16] == 'i' ||
3516                         (Name.size() > 18 && Name[18] == 'i');
3517      bool IsVariable = Name[16] == 'v';
3518      char Size = Name[16] == '.' ? Name[17] :
3519                  Name[17] == '.' ? Name[18] :
3520                  Name[18] == '.' ? Name[19] :
3521                                    Name[20];
3522
3523      Intrinsic::ID IID;
3524      if (IsVariable && Name[17] != '.') {
3525        if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3526          IID = Intrinsic::x86_avx2_psllv_q;
3527        else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3528          IID = Intrinsic::x86_avx2_psllv_q_256;
3529        else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3530          IID = Intrinsic::x86_avx2_psllv_d;
3531        else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3532          IID = Intrinsic::x86_avx2_psllv_d_256;
3533        else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3534          IID = Intrinsic::x86_avx512_psllv_w_128;
3535        else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3536          IID = Intrinsic::x86_avx512_psllv_w_256;
3537        else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3538          IID = Intrinsic::x86_avx512_psllv_w_512;
3539        else
3540          llvm_unreachable("Unexpected size");
3541      } else if (Name.ends_with(".128")) {
3542        if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3543          IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3544                            : Intrinsic::x86_sse2_psll_d;
3545        else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3546          IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3547                            : Intrinsic::x86_sse2_psll_q;
3548        else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3549          IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3550                            : Intrinsic::x86_sse2_psll_w;
3551        else
3552          llvm_unreachable("Unexpected size");
3553      } else if (Name.ends_with(".256")) {
3554        if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3555          IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3556                            : Intrinsic::x86_avx2_psll_d;
3557        else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3558          IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3559                            : Intrinsic::x86_avx2_psll_q;
3560        else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3561          IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3562                            : Intrinsic::x86_avx2_psll_w;
3563        else
3564          llvm_unreachable("Unexpected size");
3565      } else {
3566        if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3567          IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3568                IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
3569                              Intrinsic::x86_avx512_psll_d_512;
3570        else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3571          IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3572                IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
3573                              Intrinsic::x86_avx512_psll_q_512;
3574        else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3575          IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3576                            : Intrinsic::x86_avx512_psll_w_512;
3577        else
3578          llvm_unreachable("Unexpected size");
3579      }
3580
3581      Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3582    } else if (IsX86 && Name.starts_with("avx512.mask.psrl")) {
3583      bool IsImmediate = Name[16] == 'i' ||
3584                         (Name.size() > 18 && Name[18] == 'i');
3585      bool IsVariable = Name[16] == 'v';
3586      char Size = Name[16] == '.' ? Name[17] :
3587                  Name[17] == '.' ? Name[18] :
3588                  Name[18] == '.' ? Name[19] :
3589                                    Name[20];
3590
3591      Intrinsic::ID IID;
3592      if (IsVariable && Name[17] != '.') {
3593        if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3594          IID = Intrinsic::x86_avx2_psrlv_q;
3595        else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3596          IID = Intrinsic::x86_avx2_psrlv_q_256;
3597        else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3598          IID = Intrinsic::x86_avx2_psrlv_d;
3599        else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3600          IID = Intrinsic::x86_avx2_psrlv_d_256;
3601        else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3602          IID = Intrinsic::x86_avx512_psrlv_w_128;
3603        else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3604          IID = Intrinsic::x86_avx512_psrlv_w_256;
3605        else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3606          IID = Intrinsic::x86_avx512_psrlv_w_512;
3607        else
3608          llvm_unreachable("Unexpected size");
3609      } else if (Name.ends_with(".128")) {
3610        if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3611          IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3612                            : Intrinsic::x86_sse2_psrl_d;
3613        else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3614          IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3615                            : Intrinsic::x86_sse2_psrl_q;
3616        else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3617          IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3618                            : Intrinsic::x86_sse2_psrl_w;
3619        else
3620          llvm_unreachable("Unexpected size");
3621      } else if (Name.ends_with(".256")) {
3622        if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3623          IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3624                            : Intrinsic::x86_avx2_psrl_d;
3625        else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3626          IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3627                            : Intrinsic::x86_avx2_psrl_q;
3628        else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3629          IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3630                            : Intrinsic::x86_avx2_psrl_w;
3631        else
3632          llvm_unreachable("Unexpected size");
3633      } else {
3634        if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3635          IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3636                IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
3637                              Intrinsic::x86_avx512_psrl_d_512;
3638        else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3639          IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3640                IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
3641                              Intrinsic::x86_avx512_psrl_q_512;
3642        else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3643          IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3644                            : Intrinsic::x86_avx512_psrl_w_512;
3645        else
3646          llvm_unreachable("Unexpected size");
3647      }
3648
3649      Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3650    } else if (IsX86 && Name.starts_with("avx512.mask.psra")) {
3651      bool IsImmediate = Name[16] == 'i' ||
3652                         (Name.size() > 18 && Name[18] == 'i');
3653      bool IsVariable = Name[16] == 'v';
3654      char Size = Name[16] == '.' ? Name[17] :
3655                  Name[17] == '.' ? Name[18] :
3656                  Name[18] == '.' ? Name[19] :
3657                                    Name[20];
3658
3659      Intrinsic::ID IID;
3660      if (IsVariable && Name[17] != '.') {
3661        if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3662          IID = Intrinsic::x86_avx2_psrav_d;
3663        else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3664          IID = Intrinsic::x86_avx2_psrav_d_256;
3665        else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3666          IID = Intrinsic::x86_avx512_psrav_w_128;
3667        else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3668          IID = Intrinsic::x86_avx512_psrav_w_256;
3669        else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3670          IID = Intrinsic::x86_avx512_psrav_w_512;
3671        else
3672          llvm_unreachable("Unexpected size");
3673      } else if (Name.ends_with(".128")) {
3674        if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3675          IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3676                            : Intrinsic::x86_sse2_psra_d;
3677        else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3678          IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3679                IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
3680                              Intrinsic::x86_avx512_psra_q_128;
3681        else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3682          IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3683                            : Intrinsic::x86_sse2_psra_w;
3684        else
3685          llvm_unreachable("Unexpected size");
3686      } else if (Name.ends_with(".256")) {
3687        if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3688          IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3689                            : Intrinsic::x86_avx2_psra_d;
3690        else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3691          IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3692                IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
3693                              Intrinsic::x86_avx512_psra_q_256;
3694        else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3695          IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3696                            : Intrinsic::x86_avx2_psra_w;
3697        else
3698          llvm_unreachable("Unexpected size");
3699      } else {
3700        if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3701          IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3702                IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
3703                              Intrinsic::x86_avx512_psra_d_512;
3704        else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3705          IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3706                IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
3707                              Intrinsic::x86_avx512_psra_q_512;
3708        else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3709          IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3710                            : Intrinsic::x86_avx512_psra_w_512;
3711        else
3712          llvm_unreachable("Unexpected size");
3713      }
3714
3715      Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3716    } else if (IsX86 && Name.starts_with("avx512.mask.move.s")) {
3717      Rep = upgradeMaskedMove(Builder, *CI);
3718    } else if (IsX86 && Name.starts_with("avx512.cvtmask2")) {
3719      Rep = upgradeMaskToInt(Builder, *CI);
3720    } else if (IsX86 && Name.ends_with(".movntdqa")) {
3721      MDNode *Node = MDNode::get(
3722          C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3723
3724      Value *Ptr = CI->getArgOperand(0);
3725
3726      // Convert the type of the pointer to a pointer to the stored type.
3727      Value *BC = Builder.CreateBitCast(
3728          Ptr, PointerType::getUnqual(CI->getType()), "cast");
3729      LoadInst *LI = Builder.CreateAlignedLoad(
3730          CI->getType(), BC,
3731          Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
3732      LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3733      Rep = LI;
3734    } else if (IsX86 && (Name.starts_with("fma.vfmadd.") ||
3735                         Name.starts_with("fma.vfmsub.") ||
3736                         Name.starts_with("fma.vfnmadd.") ||
3737                         Name.starts_with("fma.vfnmsub."))) {
3738      bool NegMul = Name[6] == 'n';
3739      bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3740      bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3741
3742      Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3743                       CI->getArgOperand(2) };
3744
3745      if (IsScalar) {
3746        Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3747        Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3748        Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3749      }
3750
3751      if (NegMul && !IsScalar)
3752        Ops[0] = Builder.CreateFNeg(Ops[0]);
3753      if (NegMul && IsScalar)
3754        Ops[1] = Builder.CreateFNeg(Ops[1]);
3755      if (NegAcc)
3756        Ops[2] = Builder.CreateFNeg(Ops[2]);
3757
3758      Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3759                                                         Intrinsic::fma,
3760                                                         Ops[0]->getType()),
3761                               Ops);
3762
3763      if (IsScalar)
3764        Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3765                                          (uint64_t)0);
3766    } else if (IsX86 && Name.starts_with("fma4.vfmadd.s")) {
3767      Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3768                       CI->getArgOperand(2) };
3769
3770      Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3771      Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3772      Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3773
3774      Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3775                                                         Intrinsic::fma,
3776                                                         Ops[0]->getType()),
3777                               Ops);
3778
3779      Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3780                                        Rep, (uint64_t)0);
3781    } else if (IsX86 && (Name.starts_with("avx512.mask.vfmadd.s") ||
3782                         Name.starts_with("avx512.maskz.vfmadd.s") ||
3783                         Name.starts_with("avx512.mask3.vfmadd.s") ||
3784                         Name.starts_with("avx512.mask3.vfmsub.s") ||
3785                         Name.starts_with("avx512.mask3.vfnmsub.s"))) {
3786      bool IsMask3 = Name[11] == '3';
3787      bool IsMaskZ = Name[11] == 'z';
3788      // Drop the "avx512.mask." to make it easier.
3789      Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3790      bool NegMul = Name[2] == 'n';
3791      bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3792
3793      Value *A = CI->getArgOperand(0);
3794      Value *B = CI->getArgOperand(1);
3795      Value *C = CI->getArgOperand(2);
3796
3797      if (NegMul && (IsMask3 || IsMaskZ))
3798        A = Builder.CreateFNeg(A);
3799      if (NegMul && !(IsMask3 || IsMaskZ))
3800        B = Builder.CreateFNeg(B);
3801      if (NegAcc)
3802        C = Builder.CreateFNeg(C);
3803
3804      A = Builder.CreateExtractElement(A, (uint64_t)0);
3805      B = Builder.CreateExtractElement(B, (uint64_t)0);
3806      C = Builder.CreateExtractElement(C, (uint64_t)0);
3807
3808      if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3809          cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3810        Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3811
3812        Intrinsic::ID IID;
3813        if (Name.back() == 'd')
3814          IID = Intrinsic::x86_avx512_vfmadd_f64;
3815        else
3816          IID = Intrinsic::x86_avx512_vfmadd_f32;
3817        Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3818        Rep = Builder.CreateCall(FMA, Ops);
3819      } else {
3820        Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3821                                                  Intrinsic::fma,
3822                                                  A->getType());
3823        Rep = Builder.CreateCall(FMA, { A, B, C });
3824      }
3825
3826      Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3827                        IsMask3 ? C : A;
3828
3829      // For Mask3 with NegAcc, we need to create a new extractelement that
3830      // avoids the negation above.
3831      if (NegAcc && IsMask3)
3832        PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3833                                                (uint64_t)0);
3834
3835      Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3836      Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3837                                        Rep, (uint64_t)0);
3838    } else if (IsX86 && (Name.starts_with("avx512.mask.vfmadd.p") ||
3839                         Name.starts_with("avx512.mask.vfnmadd.p") ||
3840                         Name.starts_with("avx512.mask.vfnmsub.p") ||
3841                         Name.starts_with("avx512.mask3.vfmadd.p") ||
3842                         Name.starts_with("avx512.mask3.vfmsub.p") ||
3843                         Name.starts_with("avx512.mask3.vfnmsub.p") ||
3844                         Name.starts_with("avx512.maskz.vfmadd.p"))) {
3845      bool IsMask3 = Name[11] == '3';
3846      bool IsMaskZ = Name[11] == 'z';
3847      // Drop the "avx512.mask." to make it easier.
3848      Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3849      bool NegMul = Name[2] == 'n';
3850      bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3851
3852      Value *A = CI->getArgOperand(0);
3853      Value *B = CI->getArgOperand(1);
3854      Value *C = CI->getArgOperand(2);
3855
3856      if (NegMul && (IsMask3 || IsMaskZ))
3857        A = Builder.CreateFNeg(A);
3858      if (NegMul && !(IsMask3 || IsMaskZ))
3859        B = Builder.CreateFNeg(B);
3860      if (NegAcc)
3861        C = Builder.CreateFNeg(C);
3862
3863      if (CI->arg_size() == 5 &&
3864          (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3865           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3866        Intrinsic::ID IID;
3867        // Check the character before ".512" in string.
3868        if (Name[Name.size()-5] == 's')
3869          IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3870        else
3871          IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3872
3873        Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3874                                 { A, B, C, CI->getArgOperand(4) });
3875      } else {
3876        Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3877                                                  Intrinsic::fma,
3878                                                  A->getType());
3879        Rep = Builder.CreateCall(FMA, { A, B, C });
3880      }
3881
3882      Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3883                        IsMask3 ? CI->getArgOperand(2) :
3884                                  CI->getArgOperand(0);
3885
3886      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3887    } else if (IsX86 &&  Name.starts_with("fma.vfmsubadd.p")) {
3888      unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3889      unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3890      Intrinsic::ID IID;
3891      if (VecWidth == 128 && EltWidth == 32)
3892        IID = Intrinsic::x86_fma_vfmaddsub_ps;
3893      else if (VecWidth == 256 && EltWidth == 32)
3894        IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3895      else if (VecWidth == 128 && EltWidth == 64)
3896        IID = Intrinsic::x86_fma_vfmaddsub_pd;
3897      else if (VecWidth == 256 && EltWidth == 64)
3898        IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3899      else
3900        llvm_unreachable("Unexpected intrinsic");
3901
3902      Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3903                       CI->getArgOperand(2) };
3904      Ops[2] = Builder.CreateFNeg(Ops[2]);
3905      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3906                               Ops);
3907    } else if (IsX86 && (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3908                         Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3909                         Name.starts_with("avx512.maskz.vfmaddsub.p") ||
3910                         Name.starts_with("avx512.mask3.vfmsubadd.p"))) {
3911      bool IsMask3 = Name[11] == '3';
3912      bool IsMaskZ = Name[11] == 'z';
3913      // Drop the "avx512.mask." to make it easier.
3914      Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3915      bool IsSubAdd = Name[3] == 's';
3916      if (CI->arg_size() == 5) {
3917        Intrinsic::ID IID;
3918        // Check the character before ".512" in string.
3919        if (Name[Name.size()-5] == 's')
3920          IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3921        else
3922          IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3923
3924        Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3925                         CI->getArgOperand(2), CI->getArgOperand(4) };
3926        if (IsSubAdd)
3927          Ops[2] = Builder.CreateFNeg(Ops[2]);
3928
3929        Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3930                                 Ops);
3931      } else {
3932        int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3933
3934        Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3935                         CI->getArgOperand(2) };
3936
3937        Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3938                                                  Ops[0]->getType());
3939        Value *Odd = Builder.CreateCall(FMA, Ops);
3940        Ops[2] = Builder.CreateFNeg(Ops[2]);
3941        Value *Even = Builder.CreateCall(FMA, Ops);
3942
3943        if (IsSubAdd)
3944          std::swap(Even, Odd);
3945
3946        SmallVector<int, 32> Idxs(NumElts);
3947        for (int i = 0; i != NumElts; ++i)
3948          Idxs[i] = i + (i % 2) * NumElts;
3949
3950        Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3951      }
3952
3953      Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3954                        IsMask3 ? CI->getArgOperand(2) :
3955                                  CI->getArgOperand(0);
3956
3957      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3958    } else if (IsX86 && (Name.starts_with("avx512.mask.pternlog.") ||
3959                         Name.starts_with("avx512.maskz.pternlog."))) {
3960      bool ZeroMask = Name[11] == 'z';
3961      unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3962      unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3963      Intrinsic::ID IID;
3964      if (VecWidth == 128 && EltWidth == 32)
3965        IID = Intrinsic::x86_avx512_pternlog_d_128;
3966      else if (VecWidth == 256 && EltWidth == 32)
3967        IID = Intrinsic::x86_avx512_pternlog_d_256;
3968      else if (VecWidth == 512 && EltWidth == 32)
3969        IID = Intrinsic::x86_avx512_pternlog_d_512;
3970      else if (VecWidth == 128 && EltWidth == 64)
3971        IID = Intrinsic::x86_avx512_pternlog_q_128;
3972      else if (VecWidth == 256 && EltWidth == 64)
3973        IID = Intrinsic::x86_avx512_pternlog_q_256;
3974      else if (VecWidth == 512 && EltWidth == 64)
3975        IID = Intrinsic::x86_avx512_pternlog_q_512;
3976      else
3977        llvm_unreachable("Unexpected intrinsic");
3978
3979      Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3980                        CI->getArgOperand(2), CI->getArgOperand(3) };
3981      Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3982                               Args);
3983      Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3984                                 : CI->getArgOperand(0);
3985      Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3986    } else if (IsX86 && (Name.starts_with("avx512.mask.vpmadd52") ||
3987                         Name.starts_with("avx512.maskz.vpmadd52"))) {
3988      bool ZeroMask = Name[11] == 'z';
3989      bool High = Name[20] == 'h' || Name[21] == 'h';
3990      unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3991      Intrinsic::ID IID;
3992      if (VecWidth == 128 && !High)
3993        IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3994      else if (VecWidth == 256 && !High)
3995        IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3996      else if (VecWidth == 512 && !High)
3997        IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3998      else if (VecWidth == 128 && High)
3999        IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4000      else if (VecWidth == 256 && High)
4001        IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4002      else if (VecWidth == 512 && High)
4003        IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4004      else
4005        llvm_unreachable("Unexpected intrinsic");
4006
4007      Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
4008                        CI->getArgOperand(2) };
4009      Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4010                               Args);
4011      Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4012                                 : CI->getArgOperand(0);
4013      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4014    } else if (IsX86 && (Name.starts_with("avx512.mask.vpermi2var.") ||
4015                         Name.starts_with("avx512.mask.vpermt2var.") ||
4016                         Name.starts_with("avx512.maskz.vpermt2var."))) {
4017      bool ZeroMask = Name[11] == 'z';
4018      bool IndexForm = Name[17] == 'i';
4019      Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4020    } else if (IsX86 && (Name.starts_with("avx512.mask.vpdpbusd.") ||
4021                         Name.starts_with("avx512.maskz.vpdpbusd.") ||
4022                         Name.starts_with("avx512.mask.vpdpbusds.") ||
4023                         Name.starts_with("avx512.maskz.vpdpbusds."))) {
4024      bool ZeroMask = Name[11] == 'z';
4025      bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4026      unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4027      Intrinsic::ID IID;
4028      if (VecWidth == 128 && !IsSaturating)
4029        IID = Intrinsic::x86_avx512_vpdpbusd_128;
4030      else if (VecWidth == 256 && !IsSaturating)
4031        IID = Intrinsic::x86_avx512_vpdpbusd_256;
4032      else if (VecWidth == 512 && !IsSaturating)
4033        IID = Intrinsic::x86_avx512_vpdpbusd_512;
4034      else if (VecWidth == 128 && IsSaturating)
4035        IID = Intrinsic::x86_avx512_vpdpbusds_128;
4036      else if (VecWidth == 256 && IsSaturating)
4037        IID = Intrinsic::x86_avx512_vpdpbusds_256;
4038      else if (VecWidth == 512 && IsSaturating)
4039        IID = Intrinsic::x86_avx512_vpdpbusds_512;
4040      else
4041        llvm_unreachable("Unexpected intrinsic");
4042
4043      Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4044                        CI->getArgOperand(2)  };
4045      Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4046                               Args);
4047      Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4048                                 : CI->getArgOperand(0);
4049      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4050    } else if (IsX86 && (Name.starts_with("avx512.mask.vpdpwssd.") ||
4051                         Name.starts_with("avx512.maskz.vpdpwssd.") ||
4052                         Name.starts_with("avx512.mask.vpdpwssds.") ||
4053                         Name.starts_with("avx512.maskz.vpdpwssds."))) {
4054      bool ZeroMask = Name[11] == 'z';
4055      bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4056      unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4057      Intrinsic::ID IID;
4058      if (VecWidth == 128 && !IsSaturating)
4059        IID = Intrinsic::x86_avx512_vpdpwssd_128;
4060      else if (VecWidth == 256 && !IsSaturating)
4061        IID = Intrinsic::x86_avx512_vpdpwssd_256;
4062      else if (VecWidth == 512 && !IsSaturating)
4063        IID = Intrinsic::x86_avx512_vpdpwssd_512;
4064      else if (VecWidth == 128 && IsSaturating)
4065        IID = Intrinsic::x86_avx512_vpdpwssds_128;
4066      else if (VecWidth == 256 && IsSaturating)
4067        IID = Intrinsic::x86_avx512_vpdpwssds_256;
4068      else if (VecWidth == 512 && IsSaturating)
4069        IID = Intrinsic::x86_avx512_vpdpwssds_512;
4070      else
4071        llvm_unreachable("Unexpected intrinsic");
4072
4073      Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4074                        CI->getArgOperand(2)  };
4075      Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4076                               Args);
4077      Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4078                                 : CI->getArgOperand(0);
4079      Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4080    } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4081                         Name == "addcarry.u32" || Name == "addcarry.u64" ||
4082                         Name == "subborrow.u32" || Name == "subborrow.u64")) {
4083      Intrinsic::ID IID;
4084      if (Name[0] == 'a' && Name.back() == '2')
4085        IID = Intrinsic::x86_addcarry_32;
4086      else if (Name[0] == 'a' && Name.back() == '4')
4087        IID = Intrinsic::x86_addcarry_64;
4088      else if (Name[0] == 's' && Name.back() == '2')
4089        IID = Intrinsic::x86_subborrow_32;
4090      else if (Name[0] == 's' && Name.back() == '4')
4091        IID = Intrinsic::x86_subborrow_64;
4092      else
4093        llvm_unreachable("Unexpected intrinsic");
4094
4095      // Make a call with 3 operands.
4096      Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4097                        CI->getArgOperand(2)};
4098      Value *NewCall = Builder.CreateCall(
4099                                Intrinsic::getDeclaration(CI->getModule(), IID),
4100                                Args);
4101
4102      // Extract the second result and store it.
4103      Value *Data = Builder.CreateExtractValue(NewCall, 1);
4104      // Cast the pointer to the right type.
4105      Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
4106                                 llvm::PointerType::getUnqual(Data->getType()));
4107      Builder.CreateAlignedStore(Data, Ptr, Align(1));
4108      // Replace the original call result with the first result of the new call.
4109      Value *CF = Builder.CreateExtractValue(NewCall, 0);
4110
4111      CI->replaceAllUsesWith(CF);
4112      Rep = nullptr;
4113    } else if (IsX86 && Name.starts_with("avx512.mask.") &&
4114               upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4115      // Rep will be updated by the call in the condition.
4116    } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
4117      Value *Arg = CI->getArgOperand(0);
4118      Value *Neg = Builder.CreateNeg(Arg, "neg");
4119      Value *Cmp = Builder.CreateICmpSGE(
4120          Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
4121      Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
4122    } else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") ||
4123                          Name.starts_with("atomic.load.add.f64.p"))) {
4124      Value *Ptr = CI->getArgOperand(0);
4125      Value *Val = CI->getArgOperand(1);
4126      Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
4127                                    AtomicOrdering::SequentiallyConsistent);
4128    } else if (IsNVVM && Name.consume_front("max.") &&
4129               (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4130                Name == "ui" || Name == "ull")) {
4131      Value *Arg0 = CI->getArgOperand(0);
4132      Value *Arg1 = CI->getArgOperand(1);
4133      Value *Cmp = Name.starts_with("u")
4134                       ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
4135                       : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
4136      Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
4137    } else if (IsNVVM && Name.consume_front("min.") &&
4138               (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4139                Name == "ui" || Name == "ull")) {
4140      Value *Arg0 = CI->getArgOperand(0);
4141      Value *Arg1 = CI->getArgOperand(1);
4142      Value *Cmp = Name.starts_with("u")
4143                       ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
4144                       : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
4145      Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
4146    } else if (IsNVVM && Name == "clz.ll") {
4147      // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
4148      Value *Arg = CI->getArgOperand(0);
4149      Value *Ctlz = Builder.CreateCall(
4150          Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
4151                                    {Arg->getType()}),
4152          {Arg, Builder.getFalse()}, "ctlz");
4153      Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
4154    } else if (IsNVVM && Name == "popc.ll") {
4155      // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
4156      // i64.
4157      Value *Arg = CI->getArgOperand(0);
4158      Value *Popc = Builder.CreateCall(
4159          Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
4160                                    {Arg->getType()}),
4161          Arg, "ctpop");
4162      Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
4163    } else if (IsNVVM) {
4164      if (Name == "h2f") {
4165        Rep =
4166            Builder.CreateCall(Intrinsic::getDeclaration(
4167                                   F->getParent(), Intrinsic::convert_from_fp16,
4168                                   {Builder.getFloatTy()}),
4169                               CI->getArgOperand(0), "h2f");
4170      } else {
4171        Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
4172        if (IID != Intrinsic::not_intrinsic &&
4173            !F->getReturnType()->getScalarType()->isBFloatTy()) {
4174          rename(F);
4175          NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
4176          SmallVector<Value *, 2> Args;
4177          for (size_t I = 0; I < NewFn->arg_size(); ++I) {
4178            Value *Arg = CI->getArgOperand(I);
4179            Type *OldType = Arg->getType();
4180            Type *NewType = NewFn->getArg(I)->getType();
4181            Args.push_back((OldType->isIntegerTy() &&
4182                            NewType->getScalarType()->isBFloatTy())
4183                               ? Builder.CreateBitCast(Arg, NewType)
4184                               : Arg);
4185          }
4186          Rep = Builder.CreateCall(NewFn, Args);
4187          if (F->getReturnType()->isIntegerTy())
4188            Rep = Builder.CreateBitCast(Rep, F->getReturnType());
4189        }
4190      }
4191    } else if (IsARM) {
4192      Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4193    } else if (IsAMDGCN) {
4194      Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4195    } else {
4196      llvm_unreachable("Unknown function for CallBase upgrade.");
4197    }
4198
4199    if (Rep)
4200      CI->replaceAllUsesWith(Rep);
4201    CI->eraseFromParent();
4202    return;
4203  }
4204
4205  const auto &DefaultCase = [&]() -> void {
4206    if (CI->getFunctionType() == NewFn->getFunctionType()) {
4207      // Handle generic mangling change.
4208      assert(
4209          (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4210          "Unknown function for CallBase upgrade and isn't just a name change");
4211      CI->setCalledFunction(NewFn);
4212      return;
4213    }
4214
4215    // This must be an upgrade from a named to a literal struct.
4216    if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4217      assert(OldST != NewFn->getReturnType() &&
4218             "Return type must have changed");
4219      assert(OldST->getNumElements() ==
4220                 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4221             "Must have same number of elements");
4222
4223      SmallVector<Value *> Args(CI->args());
4224      Value *NewCI = Builder.CreateCall(NewFn, Args);
4225      Value *Res = PoisonValue::get(OldST);
4226      for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4227        Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4228        Res = Builder.CreateInsertValue(Res, Elem, Idx);
4229      }
4230      CI->replaceAllUsesWith(Res);
4231      CI->eraseFromParent();
4232      return;
4233    }
4234
4235    // We're probably about to produce something invalid. Let the verifier catch
4236    // it instead of dying here.
4237    CI->setCalledOperand(
4238        ConstantExpr::getPointerCast(NewFn, CI->getCalledOperand()->getType()));
4239    return;
4240  };
4241  CallInst *NewCall = nullptr;
4242  switch (NewFn->getIntrinsicID()) {
4243  default: {
4244    DefaultCase();
4245    return;
4246  }
4247  case Intrinsic::arm_neon_vst1:
4248  case Intrinsic::arm_neon_vst2:
4249  case Intrinsic::arm_neon_vst3:
4250  case Intrinsic::arm_neon_vst4:
4251  case Intrinsic::arm_neon_vst2lane:
4252  case Intrinsic::arm_neon_vst3lane:
4253  case Intrinsic::arm_neon_vst4lane: {
4254    SmallVector<Value *, 4> Args(CI->args());
4255    NewCall = Builder.CreateCall(NewFn, Args);
4256    break;
4257  }
4258  case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4259  case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4260  case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4261    LLVMContext &Ctx = F->getParent()->getContext();
4262    SmallVector<Value *, 4> Args(CI->args());
4263    Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4264                               cast<ConstantInt>(Args[3])->getZExtValue());
4265    NewCall = Builder.CreateCall(NewFn, Args);
4266    break;
4267  }
4268  case Intrinsic::aarch64_sve_ld3_sret:
4269  case Intrinsic::aarch64_sve_ld4_sret:
4270  case Intrinsic::aarch64_sve_ld2_sret: {
4271    StringRef Name = F->getName();
4272    Name = Name.substr(5);
4273    unsigned N = StringSwitch<unsigned>(Name)
4274                     .StartsWith("aarch64.sve.ld2", 2)
4275                     .StartsWith("aarch64.sve.ld3", 3)
4276                     .StartsWith("aarch64.sve.ld4", 4)
4277                     .Default(0);
4278    ScalableVectorType *RetTy =
4279        dyn_cast<ScalableVectorType>(F->getReturnType());
4280    unsigned MinElts = RetTy->getMinNumElements() / N;
4281    SmallVector<Value *, 2> Args(CI->args());
4282    Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4283    Value *Ret = llvm::PoisonValue::get(RetTy);
4284    for (unsigned I = 0; I < N; I++) {
4285      Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4286      Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4287      Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
4288    }
4289    NewCall = dyn_cast<CallInst>(Ret);
4290    break;
4291  }
4292
4293  case Intrinsic::coro_end: {
4294    SmallVector<Value *, 3> Args(CI->args());
4295    Args.push_back(ConstantTokenNone::get(CI->getContext()));
4296    NewCall = Builder.CreateCall(NewFn, Args);
4297    break;
4298  }
4299
4300  case Intrinsic::vector_extract: {
4301    StringRef Name = F->getName();
4302    Name = Name.substr(5); // Strip llvm
4303    if (!Name.starts_with("aarch64.sve.tuple.get")) {
4304      DefaultCase();
4305      return;
4306    }
4307    ScalableVectorType *RetTy =
4308        dyn_cast<ScalableVectorType>(F->getReturnType());
4309    unsigned MinElts = RetTy->getMinNumElements();
4310    unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4311    Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4312    NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4313    break;
4314  }
4315
4316  case Intrinsic::vector_insert: {
4317    StringRef Name = F->getName();
4318    Name = Name.substr(5);
4319    if (!Name.starts_with("aarch64.sve.tuple")) {
4320      DefaultCase();
4321      return;
4322    }
4323    if (Name.starts_with("aarch64.sve.tuple.set")) {
4324      unsigned I = dyn_cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4325      ScalableVectorType *Ty =
4326          dyn_cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4327      Value *NewIdx =
4328          ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4329      NewCall = Builder.CreateCall(
4330          NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4331      break;
4332    }
4333    if (Name.starts_with("aarch64.sve.tuple.create")) {
4334      unsigned N = StringSwitch<unsigned>(Name)
4335                       .StartsWith("aarch64.sve.tuple.create2", 2)
4336                       .StartsWith("aarch64.sve.tuple.create3", 3)
4337                       .StartsWith("aarch64.sve.tuple.create4", 4)
4338                       .Default(0);
4339      assert(N > 1 && "Create is expected to be between 2-4");
4340      ScalableVectorType *RetTy =
4341          dyn_cast<ScalableVectorType>(F->getReturnType());
4342      Value *Ret = llvm::PoisonValue::get(RetTy);
4343      unsigned MinElts = RetTy->getMinNumElements() / N;
4344      for (unsigned I = 0; I < N; I++) {
4345        Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4346        Value *V = CI->getArgOperand(I);
4347        Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4348      }
4349      NewCall = dyn_cast<CallInst>(Ret);
4350    }
4351    break;
4352  }
4353
4354  case Intrinsic::arm_neon_bfdot:
4355  case Intrinsic::arm_neon_bfmmla:
4356  case Intrinsic::arm_neon_bfmlalb:
4357  case Intrinsic::arm_neon_bfmlalt:
4358  case Intrinsic::aarch64_neon_bfdot:
4359  case Intrinsic::aarch64_neon_bfmmla:
4360  case Intrinsic::aarch64_neon_bfmlalb:
4361  case Intrinsic::aarch64_neon_bfmlalt: {
4362    SmallVector<Value *, 3> Args;
4363    assert(CI->arg_size() == 3 &&
4364           "Mismatch between function args and call args");
4365    size_t OperandWidth =
4366        CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
4367    assert((OperandWidth == 64 || OperandWidth == 128) &&
4368           "Unexpected operand width");
4369    Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4370    auto Iter = CI->args().begin();
4371    Args.push_back(*Iter++);
4372    Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4373    Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4374    NewCall = Builder.CreateCall(NewFn, Args);
4375    break;
4376  }
4377
4378  case Intrinsic::bitreverse:
4379    NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4380    break;
4381
4382  case Intrinsic::ctlz:
4383  case Intrinsic::cttz:
4384    assert(CI->arg_size() == 1 &&
4385           "Mismatch between function args and call args");
4386    NewCall =
4387        Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4388    break;
4389
4390  case Intrinsic::objectsize: {
4391    Value *NullIsUnknownSize =
4392        CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4393    Value *Dynamic =
4394        CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4395    NewCall = Builder.CreateCall(
4396        NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4397    break;
4398  }
4399
4400  case Intrinsic::ctpop:
4401    NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4402    break;
4403
4404  case Intrinsic::convert_from_fp16:
4405    NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4406    break;
4407
4408  case Intrinsic::dbg_value: {
4409    StringRef Name = F->getName();
4410    Name = Name.substr(5); // Strip llvm.
4411    // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4412    if (Name.starts_with("dbg.addr")) {
4413      DIExpression *Expr = cast<DIExpression>(
4414          cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4415      Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4416      NewCall =
4417          Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4418                                     MetadataAsValue::get(C, Expr)});
4419      break;
4420    }
4421
4422    // Upgrade from the old version that had an extra offset argument.
4423    assert(CI->arg_size() == 4);
4424    // Drop nonzero offsets instead of attempting to upgrade them.
4425    if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4426      if (Offset->isZeroValue()) {
4427        NewCall = Builder.CreateCall(
4428            NewFn,
4429            {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4430        break;
4431      }
4432    CI->eraseFromParent();
4433    return;
4434  }
4435
4436  case Intrinsic::ptr_annotation:
4437    // Upgrade from versions that lacked the annotation attribute argument.
4438    if (CI->arg_size() != 4) {
4439      DefaultCase();
4440      return;
4441    }
4442
4443    // Create a new call with an added null annotation attribute argument.
4444    NewCall =
4445        Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4446                                   CI->getArgOperand(2), CI->getArgOperand(3),
4447                                   Constant::getNullValue(Builder.getPtrTy())});
4448    NewCall->takeName(CI);
4449    CI->replaceAllUsesWith(NewCall);
4450    CI->eraseFromParent();
4451    return;
4452
4453  case Intrinsic::var_annotation:
4454    // Upgrade from versions that lacked the annotation attribute argument.
4455    if (CI->arg_size() != 4) {
4456      DefaultCase();
4457      return;
4458    }
4459    // Create a new call with an added null annotation attribute argument.
4460    NewCall =
4461        Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4462                                   CI->getArgOperand(2), CI->getArgOperand(3),
4463                                   Constant::getNullValue(Builder.getPtrTy())});
4464    NewCall->takeName(CI);
4465    CI->replaceAllUsesWith(NewCall);
4466    CI->eraseFromParent();
4467    return;
4468
4469  case Intrinsic::riscv_aes32dsi:
4470  case Intrinsic::riscv_aes32dsmi:
4471  case Intrinsic::riscv_aes32esi:
4472  case Intrinsic::riscv_aes32esmi:
4473  case Intrinsic::riscv_sm4ks:
4474  case Intrinsic::riscv_sm4ed: {
4475    // The last argument to these intrinsics used to be i8 and changed to i32.
4476    // The type overload for sm4ks and sm4ed was removed.
4477    Value *Arg2 = CI->getArgOperand(2);
4478    if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4479      return;
4480
4481    Value *Arg0 = CI->getArgOperand(0);
4482    Value *Arg1 = CI->getArgOperand(1);
4483    if (CI->getType()->isIntegerTy(64)) {
4484      Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4485      Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4486    }
4487
4488    Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4489                            cast<ConstantInt>(Arg2)->getZExtValue());
4490
4491    NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4492    Value *Res = NewCall;
4493    if (Res->getType() != CI->getType())
4494      Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4495    NewCall->takeName(CI);
4496    CI->replaceAllUsesWith(Res);
4497    CI->eraseFromParent();
4498    return;
4499  }
4500  case Intrinsic::riscv_sha256sig0:
4501  case Intrinsic::riscv_sha256sig1:
4502  case Intrinsic::riscv_sha256sum0:
4503  case Intrinsic::riscv_sha256sum1:
4504  case Intrinsic::riscv_sm3p0:
4505  case Intrinsic::riscv_sm3p1: {
4506    // The last argument to these intrinsics used to be i8 and changed to i32.
4507    // The type overload for sm4ks and sm4ed was removed.
4508    if (!CI->getType()->isIntegerTy(64))
4509      return;
4510
4511    Value *Arg =
4512        Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4513
4514    NewCall = Builder.CreateCall(NewFn, Arg);
4515    Value *Res =
4516        Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4517    NewCall->takeName(CI);
4518    CI->replaceAllUsesWith(Res);
4519    CI->eraseFromParent();
4520    return;
4521  }
4522
4523  case Intrinsic::x86_xop_vfrcz_ss:
4524  case Intrinsic::x86_xop_vfrcz_sd:
4525    NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4526    break;
4527
4528  case Intrinsic::x86_xop_vpermil2pd:
4529  case Intrinsic::x86_xop_vpermil2ps:
4530  case Intrinsic::x86_xop_vpermil2pd_256:
4531  case Intrinsic::x86_xop_vpermil2ps_256: {
4532    SmallVector<Value *, 4> Args(CI->args());
4533    VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4534    VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4535    Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4536    NewCall = Builder.CreateCall(NewFn, Args);
4537    break;
4538  }
4539
4540  case Intrinsic::x86_sse41_ptestc:
4541  case Intrinsic::x86_sse41_ptestz:
4542  case Intrinsic::x86_sse41_ptestnzc: {
4543    // The arguments for these intrinsics used to be v4f32, and changed
4544    // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4545    // So, the only thing required is a bitcast for both arguments.
4546    // First, check the arguments have the old type.
4547    Value *Arg0 = CI->getArgOperand(0);
4548    if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4549      return;
4550
4551    // Old intrinsic, add bitcasts
4552    Value *Arg1 = CI->getArgOperand(1);
4553
4554    auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4555
4556    Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4557    Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4558
4559    NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4560    break;
4561  }
4562
4563  case Intrinsic::x86_rdtscp: {
4564    // This used to take 1 arguments. If we have no arguments, it is already
4565    // upgraded.
4566    if (CI->getNumOperands() == 0)
4567      return;
4568
4569    NewCall = Builder.CreateCall(NewFn);
4570    // Extract the second result and store it.
4571    Value *Data = Builder.CreateExtractValue(NewCall, 1);
4572    // Cast the pointer to the right type.
4573    Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
4574                                 llvm::PointerType::getUnqual(Data->getType()));
4575    Builder.CreateAlignedStore(Data, Ptr, Align(1));
4576    // Replace the original call result with the first result of the new call.
4577    Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4578
4579    NewCall->takeName(CI);
4580    CI->replaceAllUsesWith(TSC);
4581    CI->eraseFromParent();
4582    return;
4583  }
4584
4585  case Intrinsic::x86_sse41_insertps:
4586  case Intrinsic::x86_sse41_dppd:
4587  case Intrinsic::x86_sse41_dpps:
4588  case Intrinsic::x86_sse41_mpsadbw:
4589  case Intrinsic::x86_avx_dp_ps_256:
4590  case Intrinsic::x86_avx2_mpsadbw: {
4591    // Need to truncate the last argument from i32 to i8 -- this argument models
4592    // an inherently 8-bit immediate operand to these x86 instructions.
4593    SmallVector<Value *, 4> Args(CI->args());
4594
4595    // Replace the last argument with a trunc.
4596    Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4597    NewCall = Builder.CreateCall(NewFn, Args);
4598    break;
4599  }
4600
4601  case Intrinsic::x86_avx512_mask_cmp_pd_128:
4602  case Intrinsic::x86_avx512_mask_cmp_pd_256:
4603  case Intrinsic::x86_avx512_mask_cmp_pd_512:
4604  case Intrinsic::x86_avx512_mask_cmp_ps_128:
4605  case Intrinsic::x86_avx512_mask_cmp_ps_256:
4606  case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4607    SmallVector<Value *, 4> Args(CI->args());
4608    unsigned NumElts =
4609        cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4610    Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4611
4612    NewCall = Builder.CreateCall(NewFn, Args);
4613    Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4614
4615    NewCall->takeName(CI);
4616    CI->replaceAllUsesWith(Res);
4617    CI->eraseFromParent();
4618    return;
4619  }
4620
4621  case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4622  case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4623  case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4624  case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4625  case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4626  case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4627    SmallVector<Value *, 4> Args(CI->args());
4628    unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4629    if (NewFn->getIntrinsicID() ==
4630        Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4631      Args[1] = Builder.CreateBitCast(
4632          Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4633
4634    NewCall = Builder.CreateCall(NewFn, Args);
4635    Value *Res = Builder.CreateBitCast(
4636        NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4637
4638    NewCall->takeName(CI);
4639    CI->replaceAllUsesWith(Res);
4640    CI->eraseFromParent();
4641    return;
4642  }
4643  case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4644  case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4645  case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4646    SmallVector<Value *, 4> Args(CI->args());
4647    unsigned NumElts =
4648        cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4649    Args[1] = Builder.CreateBitCast(
4650        Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4651    Args[2] = Builder.CreateBitCast(
4652        Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4653
4654    NewCall = Builder.CreateCall(NewFn, Args);
4655    break;
4656  }
4657
4658  case Intrinsic::thread_pointer: {
4659    NewCall = Builder.CreateCall(NewFn, {});
4660    break;
4661  }
4662
4663  case Intrinsic::memcpy:
4664  case Intrinsic::memmove:
4665  case Intrinsic::memset: {
4666    // We have to make sure that the call signature is what we're expecting.
4667    // We only want to change the old signatures by removing the alignment arg:
4668    //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4669    //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4670    //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4671    //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
4672    // Note: i8*'s in the above can be any pointer type
4673    if (CI->arg_size() != 5) {
4674      DefaultCase();
4675      return;
4676    }
4677    // Remove alignment argument (3), and add alignment attributes to the
4678    // dest/src pointers.
4679    Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4680                      CI->getArgOperand(2), CI->getArgOperand(4)};
4681    NewCall = Builder.CreateCall(NewFn, Args);
4682    AttributeList OldAttrs = CI->getAttributes();
4683    AttributeList NewAttrs = AttributeList::get(
4684        C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4685        {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4686         OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4687    NewCall->setAttributes(NewAttrs);
4688    auto *MemCI = cast<MemIntrinsic>(NewCall);
4689    // All mem intrinsics support dest alignment.
4690    const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4691    MemCI->setDestAlignment(Align->getMaybeAlignValue());
4692    // Memcpy/Memmove also support source alignment.
4693    if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4694      MTI->setSourceAlignment(Align->getMaybeAlignValue());
4695    break;
4696  }
4697  }
4698  assert(NewCall && "Should have either set this variable or returned through "
4699                    "the default case");
4700  NewCall->takeName(CI);
4701  CI->replaceAllUsesWith(NewCall);
4702  CI->eraseFromParent();
4703}
4704
4705void llvm::UpgradeCallsToIntrinsic(Function *F) {
4706  assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4707
4708  // Check if this function should be upgraded and get the replacement function
4709  // if there is one.
4710  Function *NewFn;
4711  if (UpgradeIntrinsicFunction(F, NewFn)) {
4712    // Replace all users of the old function with the new function or new
4713    // instructions. This is not a range loop because the call is deleted.
4714    for (User *U : make_early_inc_range(F->users()))
4715      if (CallBase *CB = dyn_cast<CallBase>(U))
4716        UpgradeIntrinsicCall(CB, NewFn);
4717
4718    // Remove old function, no longer used, from the module.
4719    F->eraseFromParent();
4720  }
4721}
4722
4723MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
4724  const unsigned NumOperands = MD.getNumOperands();
4725  if (NumOperands == 0)
4726    return &MD; // Invalid, punt to a verifier error.
4727
4728  // Check if the tag uses struct-path aware TBAA format.
4729  if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
4730    return &MD;
4731
4732  auto &Context = MD.getContext();
4733  if (NumOperands == 3) {
4734    Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4735    MDNode *ScalarType = MDNode::get(Context, Elts);
4736    // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4737    Metadata *Elts2[] = {ScalarType, ScalarType,
4738                         ConstantAsMetadata::get(
4739                             Constant::getNullValue(Type::getInt64Ty(Context))),
4740                         MD.getOperand(2)};
4741    return MDNode::get(Context, Elts2);
4742  }
4743  // Create a MDNode <MD, MD, offset 0>
4744  Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
4745                                    Type::getInt64Ty(Context)))};
4746  return MDNode::get(Context, Elts);
4747}
4748
4749Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4750                                      Instruction *&Temp) {
4751  if (Opc != Instruction::BitCast)
4752    return nullptr;
4753
4754  Temp = nullptr;
4755  Type *SrcTy = V->getType();
4756  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4757      SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4758    LLVMContext &Context = V->getContext();
4759
4760    // We have no information about target data layout, so we assume that
4761    // the maximum pointer size is 64bit.
4762    Type *MidTy = Type::getInt64Ty(Context);
4763    Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4764
4765    return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4766  }
4767
4768  return nullptr;
4769}
4770
4771Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4772  if (Opc != Instruction::BitCast)
4773    return nullptr;
4774
4775  Type *SrcTy = C->getType();
4776  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4777      SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4778    LLVMContext &Context = C->getContext();
4779
4780    // We have no information about target data layout, so we assume that
4781    // the maximum pointer size is 64bit.
4782    Type *MidTy = Type::getInt64Ty(Context);
4783
4784    return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
4785                                     DestTy);
4786  }
4787
4788  return nullptr;
4789}
4790
4791/// Check the debug info version number, if it is out-dated, drop the debug
4792/// info. Return true if module is modified.
4793bool llvm::UpgradeDebugInfo(Module &M) {
4794  if (DisableAutoUpgradeDebugInfo)
4795    return false;
4796
4797  unsigned Version = getDebugMetadataVersionFromModule(M);
4798  if (Version == DEBUG_METADATA_VERSION) {
4799    bool BrokenDebugInfo = false;
4800    if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4801      report_fatal_error("Broken module found, compilation aborted!");
4802    if (!BrokenDebugInfo)
4803      // Everything is ok.
4804      return false;
4805    else {
4806      // Diagnose malformed debug info.
4807      DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
4808      M.getContext().diagnose(Diag);
4809    }
4810  }
4811  bool Modified = StripDebugInfo(M);
4812  if (Modified && Version != DEBUG_METADATA_VERSION) {
4813    // Diagnose a version mismatch.
4814    DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4815    M.getContext().diagnose(DiagVersion);
4816  }
4817  return Modified;
4818}
4819
4820/// This checks for objc retain release marker which should be upgraded. It
4821/// returns true if module is modified.
4822static bool upgradeRetainReleaseMarker(Module &M) {
4823  bool Changed = false;
4824  const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4825  NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4826  if (ModRetainReleaseMarker) {
4827    MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4828    if (Op) {
4829      MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4830      if (ID) {
4831        SmallVector<StringRef, 4> ValueComp;
4832        ID->getString().split(ValueComp, "#");
4833        if (ValueComp.size() == 2) {
4834          std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4835          ID = MDString::get(M.getContext(), NewValue);
4836        }
4837        M.addModuleFlag(Module::Error, MarkerKey, ID);
4838        M.eraseNamedMetadata(ModRetainReleaseMarker);
4839        Changed = true;
4840      }
4841    }
4842  }
4843  return Changed;
4844}
4845
4846void llvm::UpgradeARCRuntime(Module &M) {
4847  // This lambda converts normal function calls to ARC runtime functions to
4848  // intrinsic calls.
4849  auto UpgradeToIntrinsic = [&](const char *OldFunc,
4850                                llvm::Intrinsic::ID IntrinsicFunc) {
4851    Function *Fn = M.getFunction(OldFunc);
4852
4853    if (!Fn)
4854      return;
4855
4856    Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4857
4858    for (User *U : make_early_inc_range(Fn->users())) {
4859      CallInst *CI = dyn_cast<CallInst>(U);
4860      if (!CI || CI->getCalledFunction() != Fn)
4861        continue;
4862
4863      IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4864      FunctionType *NewFuncTy = NewFn->getFunctionType();
4865      SmallVector<Value *, 2> Args;
4866
4867      // Don't upgrade the intrinsic if it's not valid to bitcast the return
4868      // value to the return type of the old function.
4869      if (NewFuncTy->getReturnType() != CI->getType() &&
4870          !CastInst::castIsValid(Instruction::BitCast, CI,
4871                                 NewFuncTy->getReturnType()))
4872        continue;
4873
4874      bool InvalidCast = false;
4875
4876      for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
4877        Value *Arg = CI->getArgOperand(I);
4878
4879        // Bitcast argument to the parameter type of the new function if it's
4880        // not a variadic argument.
4881        if (I < NewFuncTy->getNumParams()) {
4882          // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4883          // to the parameter type of the new function.
4884          if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4885                                     NewFuncTy->getParamType(I))) {
4886            InvalidCast = true;
4887            break;
4888          }
4889          Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4890        }
4891        Args.push_back(Arg);
4892      }
4893
4894      if (InvalidCast)
4895        continue;
4896
4897      // Create a call instruction that calls the new function.
4898      CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4899      NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4900      NewCall->takeName(CI);
4901
4902      // Bitcast the return value back to the type of the old call.
4903      Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4904
4905      if (!CI->use_empty())
4906        CI->replaceAllUsesWith(NewRetVal);
4907      CI->eraseFromParent();
4908    }
4909
4910    if (Fn->use_empty())
4911      Fn->eraseFromParent();
4912  };
4913
4914  // Unconditionally convert a call to "clang.arc.use" to a call to
4915  // "llvm.objc.clang.arc.use".
4916  UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
4917
4918  // Upgrade the retain release marker. If there is no need to upgrade
4919  // the marker, that means either the module is already new enough to contain
4920  // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
4921  if (!upgradeRetainReleaseMarker(M))
4922    return;
4923
4924  std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
4925      {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
4926      {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
4927      {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
4928      {"objc_autoreleaseReturnValue",
4929       llvm::Intrinsic::objc_autoreleaseReturnValue},
4930      {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
4931      {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
4932      {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
4933      {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
4934      {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
4935      {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
4936      {"objc_release", llvm::Intrinsic::objc_release},
4937      {"objc_retain", llvm::Intrinsic::objc_retain},
4938      {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
4939      {"objc_retainAutoreleaseReturnValue",
4940       llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
4941      {"objc_retainAutoreleasedReturnValue",
4942       llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
4943      {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
4944      {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
4945      {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
4946      {"objc_unsafeClaimAutoreleasedReturnValue",
4947       llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
4948      {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
4949      {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
4950      {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
4951      {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
4952      {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
4953      {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
4954      {"objc_arc_annotation_topdown_bbstart",
4955       llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
4956      {"objc_arc_annotation_topdown_bbend",
4957       llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
4958      {"objc_arc_annotation_bottomup_bbstart",
4959       llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
4960      {"objc_arc_annotation_bottomup_bbend",
4961       llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
4962
4963  for (auto &I : RuntimeFuncs)
4964    UpgradeToIntrinsic(I.first, I.second);
4965}
4966
4967bool llvm::UpgradeModuleFlags(Module &M) {
4968  NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4969  if (!ModFlags)
4970    return false;
4971
4972  bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4973  bool HasSwiftVersionFlag = false;
4974  uint8_t SwiftMajorVersion, SwiftMinorVersion;
4975  uint32_t SwiftABIVersion;
4976  auto Int8Ty = Type::getInt8Ty(M.getContext());
4977  auto Int32Ty = Type::getInt32Ty(M.getContext());
4978
4979  for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4980    MDNode *Op = ModFlags->getOperand(I);
4981    if (Op->getNumOperands() != 3)
4982      continue;
4983    MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4984    if (!ID)
4985      continue;
4986    auto SetBehavior = [&](Module::ModFlagBehavior B) {
4987      Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
4988                              Type::getInt32Ty(M.getContext()), B)),
4989                          MDString::get(M.getContext(), ID->getString()),
4990                          Op->getOperand(2)};
4991      ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4992      Changed = true;
4993    };
4994
4995    if (ID->getString() == "Objective-C Image Info Version")
4996      HasObjCFlag = true;
4997    if (ID->getString() == "Objective-C Class Properties")
4998      HasClassProperties = true;
4999    // Upgrade PIC from Error/Max to Min.
5000    if (ID->getString() == "PIC Level") {
5001      if (auto *Behavior =
5002              mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5003        uint64_t V = Behavior->getLimitedValue();
5004        if (V == Module::Error || V == Module::Max)
5005          SetBehavior(Module::Min);
5006      }
5007    }
5008    // Upgrade "PIE Level" from Error to Max.
5009    if (ID->getString() == "PIE Level")
5010      if (auto *Behavior =
5011              mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5012        if (Behavior->getLimitedValue() == Module::Error)
5013          SetBehavior(Module::Max);
5014
5015    // Upgrade branch protection and return address signing module flags. The
5016    // module flag behavior for these fields were Error and now they are Min.
5017    if (ID->getString() == "branch-target-enforcement" ||
5018        ID->getString().starts_with("sign-return-address")) {
5019      if (auto *Behavior =
5020              mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5021        if (Behavior->getLimitedValue() == Module::Error) {
5022          Type *Int32Ty = Type::getInt32Ty(M.getContext());
5023          Metadata *Ops[3] = {
5024              ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5025              Op->getOperand(1), Op->getOperand(2)};
5026          ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5027          Changed = true;
5028        }
5029      }
5030    }
5031
5032    // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5033    // section name so that llvm-lto will not complain about mismatching
5034    // module flags that is functionally the same.
5035    if (ID->getString() == "Objective-C Image Info Section") {
5036      if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5037        SmallVector<StringRef, 4> ValueComp;
5038        Value->getString().split(ValueComp, " ");
5039        if (ValueComp.size() != 1) {
5040          std::string NewValue;
5041          for (auto &S : ValueComp)
5042            NewValue += S.str();
5043          Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5044                              MDString::get(M.getContext(), NewValue)};
5045          ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5046          Changed = true;
5047        }
5048      }
5049    }
5050
5051    // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5052    // If the higher bits are set, it adds new module flag for swift info.
5053    if (ID->getString() == "Objective-C Garbage Collection") {
5054      auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5055      if (Md) {
5056        assert(Md->getValue() && "Expected non-empty metadata");
5057        auto Type = Md->getValue()->getType();
5058        if (Type == Int8Ty)
5059          continue;
5060        unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5061        if ((Val & 0xff) != Val) {
5062          HasSwiftVersionFlag = true;
5063          SwiftABIVersion = (Val & 0xff00) >> 8;
5064          SwiftMajorVersion = (Val & 0xff000000) >> 24;
5065          SwiftMinorVersion = (Val & 0xff0000) >> 16;
5066        }
5067        Metadata *Ops[3] = {
5068          ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5069          Op->getOperand(1),
5070          ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5071        ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5072        Changed = true;
5073      }
5074    }
5075  }
5076
5077  // "Objective-C Class Properties" is recently added for Objective-C. We
5078  // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5079  // flag of value 0, so we can correclty downgrade this flag when trying to
5080  // link an ObjC bitcode without this module flag with an ObjC bitcode with
5081  // this module flag.
5082  if (HasObjCFlag && !HasClassProperties) {
5083    M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5084                    (uint32_t)0);
5085    Changed = true;
5086  }
5087
5088  if (HasSwiftVersionFlag) {
5089    M.addModuleFlag(Module::Error, "Swift ABI Version",
5090                    SwiftABIVersion);
5091    M.addModuleFlag(Module::Error, "Swift Major Version",
5092                    ConstantInt::get(Int8Ty, SwiftMajorVersion));
5093    M.addModuleFlag(Module::Error, "Swift Minor Version",
5094                    ConstantInt::get(Int8Ty, SwiftMinorVersion));
5095    Changed = true;
5096  }
5097
5098  return Changed;
5099}
5100
5101void llvm::UpgradeSectionAttributes(Module &M) {
5102  auto TrimSpaces = [](StringRef Section) -> std::string {
5103    SmallVector<StringRef, 5> Components;
5104    Section.split(Components, ',');
5105
5106    SmallString<32> Buffer;
5107    raw_svector_ostream OS(Buffer);
5108
5109    for (auto Component : Components)
5110      OS << ',' << Component.trim();
5111
5112    return std::string(OS.str().substr(1));
5113  };
5114
5115  for (auto &GV : M.globals()) {
5116    if (!GV.hasSection())
5117      continue;
5118
5119    StringRef Section = GV.getSection();
5120
5121    if (!Section.starts_with("__DATA, __objc_catlist"))
5122      continue;
5123
5124    // __DATA, __objc_catlist, regular, no_dead_strip
5125    // __DATA,__objc_catlist,regular,no_dead_strip
5126    GV.setSection(TrimSpaces(Section));
5127  }
5128}
5129
5130namespace {
5131// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5132// callsites within a function that did not also have the strictfp attribute.
5133// Since 10.0, if strict FP semantics are needed within a function, the
5134// function must have the strictfp attribute and all calls within the function
5135// must also have the strictfp attribute. This latter restriction is
5136// necessary to prevent unwanted libcall simplification when a function is
5137// being cloned (such as for inlining).
5138//
5139// The "dangling" strictfp attribute usage was only used to prevent constant
5140// folding and other libcall simplification. The nobuiltin attribute on the
5141// callsite has the same effect.
5142struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5143  StrictFPUpgradeVisitor() = default;
5144
5145  void visitCallBase(CallBase &Call) {
5146    if (!Call.isStrictFP())
5147      return;
5148    if (isa<ConstrainedFPIntrinsic>(&Call))
5149      return;
5150    // If we get here, the caller doesn't have the strictfp attribute
5151    // but this callsite does. Replace the strictfp attribute with nobuiltin.
5152    Call.removeFnAttr(Attribute::StrictFP);
5153    Call.addFnAttr(Attribute::NoBuiltin);
5154  }
5155};
5156} // namespace
5157
5158void llvm::UpgradeFunctionAttributes(Function &F) {
5159  // If a function definition doesn't have the strictfp attribute,
5160  // convert any callsite strictfp attributes to nobuiltin.
5161  if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5162    StrictFPUpgradeVisitor SFPV;
5163    SFPV.visit(F);
5164  }
5165
5166  // Remove all incompatibile attributes from function.
5167  F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
5168  for (auto &Arg : F.args())
5169    Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
5170}
5171
5172static bool isOldLoopArgument(Metadata *MD) {
5173  auto *T = dyn_cast_or_null<MDTuple>(MD);
5174  if (!T)
5175    return false;
5176  if (T->getNumOperands() < 1)
5177    return false;
5178  auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5179  if (!S)
5180    return false;
5181  return S->getString().starts_with("llvm.vectorizer.");
5182}
5183
5184static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
5185  StringRef OldPrefix = "llvm.vectorizer.";
5186  assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5187
5188  if (OldTag == "llvm.vectorizer.unroll")
5189    return MDString::get(C, "llvm.loop.interleave.count");
5190
5191  return MDString::get(
5192      C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5193             .str());
5194}
5195
5196static Metadata *upgradeLoopArgument(Metadata *MD) {
5197  auto *T = dyn_cast_or_null<MDTuple>(MD);
5198  if (!T)
5199    return MD;
5200  if (T->getNumOperands() < 1)
5201    return MD;
5202  auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5203  if (!OldTag)
5204    return MD;
5205  if (!OldTag->getString().starts_with("llvm.vectorizer."))
5206    return MD;
5207
5208  // This has an old tag.  Upgrade it.
5209  SmallVector<Metadata *, 8> Ops;
5210  Ops.reserve(T->getNumOperands());
5211  Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5212  for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5213    Ops.push_back(T->getOperand(I));
5214
5215  return MDTuple::get(T->getContext(), Ops);
5216}
5217
5218MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
5219  auto *T = dyn_cast<MDTuple>(&N);
5220  if (!T)
5221    return &N;
5222
5223  if (none_of(T->operands(), isOldLoopArgument))
5224    return &N;
5225
5226  SmallVector<Metadata *, 8> Ops;
5227  Ops.reserve(T->getNumOperands());
5228  for (Metadata *MD : T->operands())
5229    Ops.push_back(upgradeLoopArgument(MD));
5230
5231  return MDTuple::get(T->getContext(), Ops);
5232}
5233
5234std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
5235  Triple T(TT);
5236  // The only data layout upgrades needed for pre-GCN are setting the address
5237  // space of globals to 1.
5238  if (T.isAMDGPU() && !T.isAMDGCN() && !DL.contains("-G") &&
5239      !DL.starts_with("G")) {
5240    return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5241  }
5242
5243  if (T.isRISCV64()) {
5244    // Make i32 a native type for 64-bit RISC-V.
5245    auto I = DL.find("-n64-");
5246    if (I != StringRef::npos)
5247      return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5248    return DL.str();
5249  }
5250
5251  std::string Res = DL.str();
5252  // AMDGCN data layout upgrades.
5253  if (T.isAMDGCN()) {
5254    // Define address spaces for constants.
5255    if (!DL.contains("-G") && !DL.starts_with("G"))
5256      Res.append(Res.empty() ? "G1" : "-G1");
5257
5258    // Add missing non-integral declarations.
5259    // This goes before adding new address spaces to prevent incoherent string
5260    // values.
5261    if (!DL.contains("-ni") && !DL.starts_with("ni"))
5262      Res.append("-ni:7:8:9");
5263    // Update ni:7 to ni:7:8:9.
5264    if (DL.ends_with("ni:7"))
5265      Res.append(":8:9");
5266    if (DL.ends_with("ni:7:8"))
5267      Res.append(":9");
5268
5269    // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5270    // resources) An empty data layout has already been upgraded to G1 by now.
5271    if (!DL.contains("-p7") && !DL.starts_with("p7"))
5272      Res.append("-p7:160:256:256:32");
5273    if (!DL.contains("-p8") && !DL.starts_with("p8"))
5274      Res.append("-p8:128:128");
5275    if (!DL.contains("-p9") && !DL.starts_with("p9"))
5276      Res.append("-p9:192:256:256:32");
5277
5278    return Res;
5279  }
5280
5281  if (!T.isX86())
5282    return Res;
5283
5284  // If the datalayout matches the expected format, add pointer size address
5285  // spaces to the datalayout.
5286  std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
5287  if (StringRef Ref = Res; !Ref.contains(AddrSpaces)) {
5288    SmallVector<StringRef, 4> Groups;
5289    Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
5290    if (R.match(Res, &Groups))
5291      Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5292  }
5293
5294  // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5295  // for i128 operations prior to this being reflected in the data layout, and
5296  // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5297  // boundaries, so although this is a breaking change, the upgrade is expected
5298  // to fix more IR than it breaks.
5299  // Intel MCU is an exception and uses 4-byte-alignment.
5300  if (!T.isOSIAMCU()) {
5301    std::string I128 = "-i128:128";
5302    if (StringRef Ref = Res; !Ref.contains(I128)) {
5303      SmallVector<StringRef, 4> Groups;
5304      Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5305      if (R.match(Res, &Groups))
5306        Res = (Groups[1] + I128 + Groups[3]).str();
5307    }
5308  }
5309
5310  // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5311  // Raising the alignment is safe because Clang did not produce f80 values in
5312  // the MSVC environment before this upgrade was added.
5313  if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5314    StringRef Ref = Res;
5315    auto I = Ref.find("-f80:32-");
5316    if (I != StringRef::npos)
5317      Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5318  }
5319
5320  return Res;
5321}
5322
5323void llvm::UpgradeAttributes(AttrBuilder &B) {
5324  StringRef FramePointer;
5325  Attribute A = B.getAttribute("no-frame-pointer-elim");
5326  if (A.isValid()) {
5327    // The value can be "true" or "false".
5328    FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5329    B.removeAttribute("no-frame-pointer-elim");
5330  }
5331  if (B.contains("no-frame-pointer-elim-non-leaf")) {
5332    // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5333    if (FramePointer != "all")
5334      FramePointer = "non-leaf";
5335    B.removeAttribute("no-frame-pointer-elim-non-leaf");
5336  }
5337  if (!FramePointer.empty())
5338    B.addAttribute("frame-pointer", FramePointer);
5339
5340  A = B.getAttribute("null-pointer-is-valid");
5341  if (A.isValid()) {
5342    // The value can be "true" or "false".
5343    bool NullPointerIsValid = A.getValueAsString() == "true";
5344    B.removeAttribute("null-pointer-is-valid");
5345    if (NullPointerIsValid)
5346      B.addAttribute(Attribute::NullPointerIsValid);
5347  }
5348}
5349
5350void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5351  // clang.arc.attachedcall bundles are now required to have an operand.
5352  // If they don't, it's okay to drop them entirely: when there is an operand,
5353  // the "attachedcall" is meaningful and required, but without an operand,
5354  // it's just a marker NOP.  Dropping it merely prevents an optimization.
5355  erase_if(Bundles, [&](OperandBundleDef &OBD) {
5356    return OBD.getTag() == "clang.arc.attachedcall" &&
5357           OBD.inputs().empty();
5358  });
5359}
5360