1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/StringSwitch.h"
17#include "llvm/IR/Constants.h"
18#include "llvm/IR/DIBuilder.h"
19#include "llvm/IR/DebugInfo.h"
20#include "llvm/IR/DiagnosticInfo.h"
21#include "llvm/IR/Function.h"
22#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/Instruction.h"
24#include "llvm/IR/IntrinsicInst.h"
25#include "llvm/IR/IntrinsicsAArch64.h"
26#include "llvm/IR/IntrinsicsARM.h"
27#include "llvm/IR/IntrinsicsX86.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/IR/Module.h"
30#include "llvm/IR/Verifier.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/Support/Regex.h"
33#include <cstring>
34using namespace llvm;
35
36static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
37
38// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
39// changed their type from v4f32 to v2i64.
40static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
41                                  Function *&NewFn) {
42  // Check whether this is an old version of the function, which received
43  // v4f32 arguments.
44  Type *Arg0Type = F->getFunctionType()->getParamType(0);
45  if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
46    return false;
47
48  // Yes, it's old, replace it with new version.
49  rename(F);
50  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
51  return true;
52}
53
54// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
55// arguments have changed their type from i32 to i8.
56static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
57                                             Function *&NewFn) {
58  // Check that the last argument is an i32.
59  Type *LastArgType = F->getFunctionType()->getParamType(
60     F->getFunctionType()->getNumParams() - 1);
61  if (!LastArgType->isIntegerTy(32))
62    return false;
63
64  // Move this function aside and map down.
65  rename(F);
66  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
67  return true;
68}
69
70static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
71  // All of the intrinsics matches below should be marked with which llvm
72  // version started autoupgrading them. At some point in the future we would
73  // like to use this information to remove upgrade code for some older
74  // intrinsics. It is currently undecided how we will determine that future
75  // point.
76  if (Name == "addcarryx.u32" || // Added in 8.0
77      Name == "addcarryx.u64" || // Added in 8.0
78      Name == "addcarry.u32" || // Added in 8.0
79      Name == "addcarry.u64" || // Added in 8.0
80      Name == "subborrow.u32" || // Added in 8.0
81      Name == "subborrow.u64" || // Added in 8.0
82      Name.startswith("sse2.padds.") || // Added in 8.0
83      Name.startswith("sse2.psubs.") || // Added in 8.0
84      Name.startswith("sse2.paddus.") || // Added in 8.0
85      Name.startswith("sse2.psubus.") || // Added in 8.0
86      Name.startswith("avx2.padds.") || // Added in 8.0
87      Name.startswith("avx2.psubs.") || // Added in 8.0
88      Name.startswith("avx2.paddus.") || // Added in 8.0
89      Name.startswith("avx2.psubus.") || // Added in 8.0
90      Name.startswith("avx512.padds.") || // Added in 8.0
91      Name.startswith("avx512.psubs.") || // Added in 8.0
92      Name.startswith("avx512.mask.padds.") || // Added in 8.0
93      Name.startswith("avx512.mask.psubs.") || // Added in 8.0
94      Name.startswith("avx512.mask.paddus.") || // Added in 8.0
95      Name.startswith("avx512.mask.psubus.") || // Added in 8.0
96      Name=="ssse3.pabs.b.128" || // Added in 6.0
97      Name=="ssse3.pabs.w.128" || // Added in 6.0
98      Name=="ssse3.pabs.d.128" || // Added in 6.0
99      Name.startswith("fma4.vfmadd.s") || // Added in 7.0
100      Name.startswith("fma.vfmadd.") || // Added in 7.0
101      Name.startswith("fma.vfmsub.") || // Added in 7.0
102      Name.startswith("fma.vfmaddsub.") || // Added in 7.0
103      Name.startswith("fma.vfmsubadd.") || // Added in 7.0
104      Name.startswith("fma.vfnmadd.") || // Added in 7.0
105      Name.startswith("fma.vfnmsub.") || // Added in 7.0
106      Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
107      Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
108      Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
109      Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
110      Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
111      Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
112      Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
113      Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
114      Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
115      Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
116      Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
117      Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
118      Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
119      Name.startswith("avx512.kunpck") || //added in 6.0
120      Name.startswith("avx2.pabs.") || // Added in 6.0
121      Name.startswith("avx512.mask.pabs.") || // Added in 6.0
122      Name.startswith("avx512.broadcastm") || // Added in 6.0
123      Name == "sse.sqrt.ss" || // Added in 7.0
124      Name == "sse2.sqrt.sd" || // Added in 7.0
125      Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
126      Name.startswith("avx.sqrt.p") || // Added in 7.0
127      Name.startswith("sse2.sqrt.p") || // Added in 7.0
128      Name.startswith("sse.sqrt.p") || // Added in 7.0
129      Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
130      Name.startswith("sse2.pcmpeq.") || // Added in 3.1
131      Name.startswith("sse2.pcmpgt.") || // Added in 3.1
132      Name.startswith("avx2.pcmpeq.") || // Added in 3.1
133      Name.startswith("avx2.pcmpgt.") || // Added in 3.1
134      Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
135      Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
136      Name.startswith("avx.vperm2f128.") || // Added in 6.0
137      Name == "avx2.vperm2i128" || // Added in 6.0
138      Name == "sse.add.ss" || // Added in 4.0
139      Name == "sse2.add.sd" || // Added in 4.0
140      Name == "sse.sub.ss" || // Added in 4.0
141      Name == "sse2.sub.sd" || // Added in 4.0
142      Name == "sse.mul.ss" || // Added in 4.0
143      Name == "sse2.mul.sd" || // Added in 4.0
144      Name == "sse.div.ss" || // Added in 4.0
145      Name == "sse2.div.sd" || // Added in 4.0
146      Name == "sse41.pmaxsb" || // Added in 3.9
147      Name == "sse2.pmaxs.w" || // Added in 3.9
148      Name == "sse41.pmaxsd" || // Added in 3.9
149      Name == "sse2.pmaxu.b" || // Added in 3.9
150      Name == "sse41.pmaxuw" || // Added in 3.9
151      Name == "sse41.pmaxud" || // Added in 3.9
152      Name == "sse41.pminsb" || // Added in 3.9
153      Name == "sse2.pmins.w" || // Added in 3.9
154      Name == "sse41.pminsd" || // Added in 3.9
155      Name == "sse2.pminu.b" || // Added in 3.9
156      Name == "sse41.pminuw" || // Added in 3.9
157      Name == "sse41.pminud" || // Added in 3.9
158      Name == "avx512.kand.w" || // Added in 7.0
159      Name == "avx512.kandn.w" || // Added in 7.0
160      Name == "avx512.knot.w" || // Added in 7.0
161      Name == "avx512.kor.w" || // Added in 7.0
162      Name == "avx512.kxor.w" || // Added in 7.0
163      Name == "avx512.kxnor.w" || // Added in 7.0
164      Name == "avx512.kortestc.w" || // Added in 7.0
165      Name == "avx512.kortestz.w" || // Added in 7.0
166      Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
167      Name.startswith("avx2.pmax") || // Added in 3.9
168      Name.startswith("avx2.pmin") || // Added in 3.9
169      Name.startswith("avx512.mask.pmax") || // Added in 4.0
170      Name.startswith("avx512.mask.pmin") || // Added in 4.0
171      Name.startswith("avx2.vbroadcast") || // Added in 3.8
172      Name.startswith("avx2.pbroadcast") || // Added in 3.8
173      Name.startswith("avx.vpermil.") || // Added in 3.1
174      Name.startswith("sse2.pshuf") || // Added in 3.9
175      Name.startswith("avx512.pbroadcast") || // Added in 3.9
176      Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
177      Name.startswith("avx512.mask.movddup") || // Added in 3.9
178      Name.startswith("avx512.mask.movshdup") || // Added in 3.9
179      Name.startswith("avx512.mask.movsldup") || // Added in 3.9
180      Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
181      Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
182      Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
183      Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
184      Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
185      Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
186      Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
187      Name.startswith("avx512.mask.punpckl") || // Added in 3.9
188      Name.startswith("avx512.mask.punpckh") || // Added in 3.9
189      Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
190      Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
191      Name.startswith("avx512.mask.pand.") || // Added in 3.9
192      Name.startswith("avx512.mask.pandn.") || // Added in 3.9
193      Name.startswith("avx512.mask.por.") || // Added in 3.9
194      Name.startswith("avx512.mask.pxor.") || // Added in 3.9
195      Name.startswith("avx512.mask.and.") || // Added in 3.9
196      Name.startswith("avx512.mask.andn.") || // Added in 3.9
197      Name.startswith("avx512.mask.or.") || // Added in 3.9
198      Name.startswith("avx512.mask.xor.") || // Added in 3.9
199      Name.startswith("avx512.mask.padd.") || // Added in 4.0
200      Name.startswith("avx512.mask.psub.") || // Added in 4.0
201      Name.startswith("avx512.mask.pmull.") || // Added in 4.0
202      Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
203      Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
204      Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
205      Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
206      Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
207      Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
208      Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
209      Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
210      Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
211      Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
212      Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
213      Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
214      Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
215      Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
216      Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
217      Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
218      Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
219      Name == "avx512.cvtusi2sd" || // Added in 7.0
220      Name.startswith("avx512.mask.permvar.") || // Added in 7.0
221      Name == "sse2.pmulu.dq" || // Added in 7.0
222      Name == "sse41.pmuldq" || // Added in 7.0
223      Name == "avx2.pmulu.dq" || // Added in 7.0
224      Name == "avx2.pmul.dq" || // Added in 7.0
225      Name == "avx512.pmulu.dq.512" || // Added in 7.0
226      Name == "avx512.pmul.dq.512" || // Added in 7.0
227      Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
228      Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
229      Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
230      Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
231      Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
232      Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
233      Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
234      Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
235      Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
236      Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
237      Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
238      Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
239      Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
240      Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
241      Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
242      Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
243      Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
244      Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
245      Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
246      Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
247      Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
248      Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
249      Name.startswith("avx512.mask.psll.d") || // Added in 4.0
250      Name.startswith("avx512.mask.psll.q") || // Added in 4.0
251      Name.startswith("avx512.mask.psll.w") || // Added in 4.0
252      Name.startswith("avx512.mask.psra.d") || // Added in 4.0
253      Name.startswith("avx512.mask.psra.q") || // Added in 4.0
254      Name.startswith("avx512.mask.psra.w") || // Added in 4.0
255      Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
256      Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
257      Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
258      Name.startswith("avx512.mask.pslli") || // Added in 4.0
259      Name.startswith("avx512.mask.psrai") || // Added in 4.0
260      Name.startswith("avx512.mask.psrli") || // Added in 4.0
261      Name.startswith("avx512.mask.psllv") || // Added in 4.0
262      Name.startswith("avx512.mask.psrav") || // Added in 4.0
263      Name.startswith("avx512.mask.psrlv") || // Added in 4.0
264      Name.startswith("sse41.pmovsx") || // Added in 3.8
265      Name.startswith("sse41.pmovzx") || // Added in 3.9
266      Name.startswith("avx2.pmovsx") || // Added in 3.9
267      Name.startswith("avx2.pmovzx") || // Added in 3.9
268      Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
269      Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
270      Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
271      Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
272      Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
273      Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
274      Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
275      Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
276      Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
277      Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
278      Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
279      Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
280      Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
281      Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
282      Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
283      Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
284      Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
285      Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
286      Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
287      Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
288      Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
289      Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
290      Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
291      Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
292      Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
293      Name.startswith("avx512.vpshld.") || // Added in 8.0
294      Name.startswith("avx512.vpshrd.") || // Added in 8.0
295      Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
296      Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
297      Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
298      Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
299      Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
300      Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
301      Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
302      Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
303      Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
304      Name.startswith("avx512.mask.conflict.") || // Added in 9.0
305      Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
306      Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
307      Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
308      Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
309      Name == "sse.cvtsi2ss" || // Added in 7.0
310      Name == "sse.cvtsi642ss" || // Added in 7.0
311      Name == "sse2.cvtsi2sd" || // Added in 7.0
312      Name == "sse2.cvtsi642sd" || // Added in 7.0
313      Name == "sse2.cvtss2sd" || // Added in 7.0
314      Name == "sse2.cvtdq2pd" || // Added in 3.9
315      Name == "sse2.cvtdq2ps" || // Added in 7.0
316      Name == "sse2.cvtps2pd" || // Added in 3.9
317      Name == "avx.cvtdq2.pd.256" || // Added in 3.9
318      Name == "avx.cvtdq2.ps.256" || // Added in 7.0
319      Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
320      Name.startswith("avx.vinsertf128.") || // Added in 3.7
321      Name == "avx2.vinserti128" || // Added in 3.7
322      Name.startswith("avx512.mask.insert") || // Added in 4.0
323      Name.startswith("avx.vextractf128.") || // Added in 3.7
324      Name == "avx2.vextracti128" || // Added in 3.7
325      Name.startswith("avx512.mask.vextract") || // Added in 4.0
326      Name.startswith("sse4a.movnt.") || // Added in 3.9
327      Name.startswith("avx.movnt.") || // Added in 3.2
328      Name.startswith("avx512.storent.") || // Added in 3.9
329      Name == "sse41.movntdqa" || // Added in 5.0
330      Name == "avx2.movntdqa" || // Added in 5.0
331      Name == "avx512.movntdqa" || // Added in 5.0
332      Name == "sse2.storel.dq" || // Added in 3.9
333      Name.startswith("sse.storeu.") || // Added in 3.9
334      Name.startswith("sse2.storeu.") || // Added in 3.9
335      Name.startswith("avx.storeu.") || // Added in 3.9
336      Name.startswith("avx512.mask.storeu.") || // Added in 3.9
337      Name.startswith("avx512.mask.store.p") || // Added in 3.9
338      Name.startswith("avx512.mask.store.b.") || // Added in 3.9
339      Name.startswith("avx512.mask.store.w.") || // Added in 3.9
340      Name.startswith("avx512.mask.store.d.") || // Added in 3.9
341      Name.startswith("avx512.mask.store.q.") || // Added in 3.9
342      Name == "avx512.mask.store.ss" || // Added in 7.0
343      Name.startswith("avx512.mask.loadu.") || // Added in 3.9
344      Name.startswith("avx512.mask.load.") || // Added in 3.9
345      Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
346      Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
347      Name.startswith("avx512.mask.expand.b") || // Added in 9.0
348      Name.startswith("avx512.mask.expand.w") || // Added in 9.0
349      Name.startswith("avx512.mask.expand.d") || // Added in 9.0
350      Name.startswith("avx512.mask.expand.q") || // Added in 9.0
351      Name.startswith("avx512.mask.expand.p") || // Added in 9.0
352      Name.startswith("avx512.mask.compress.b") || // Added in 9.0
353      Name.startswith("avx512.mask.compress.w") || // Added in 9.0
354      Name.startswith("avx512.mask.compress.d") || // Added in 9.0
355      Name.startswith("avx512.mask.compress.q") || // Added in 9.0
356      Name.startswith("avx512.mask.compress.p") || // Added in 9.0
357      Name == "sse42.crc32.64.8" || // Added in 3.4
358      Name.startswith("avx.vbroadcast.s") || // Added in 3.5
359      Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
360      Name.startswith("avx512.mask.palignr.") || // Added in 3.9
361      Name.startswith("avx512.mask.valign.") || // Added in 4.0
362      Name.startswith("sse2.psll.dq") || // Added in 3.7
363      Name.startswith("sse2.psrl.dq") || // Added in 3.7
364      Name.startswith("avx2.psll.dq") || // Added in 3.7
365      Name.startswith("avx2.psrl.dq") || // Added in 3.7
366      Name.startswith("avx512.psll.dq") || // Added in 3.9
367      Name.startswith("avx512.psrl.dq") || // Added in 3.9
368      Name == "sse41.pblendw" || // Added in 3.7
369      Name.startswith("sse41.blendp") || // Added in 3.7
370      Name.startswith("avx.blend.p") || // Added in 3.7
371      Name == "avx2.pblendw" || // Added in 3.7
372      Name.startswith("avx2.pblendd.") || // Added in 3.7
373      Name.startswith("avx.vbroadcastf128") || // Added in 4.0
374      Name == "avx2.vbroadcasti128" || // Added in 3.7
375      Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
376      Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
377      Name == "xop.vpcmov" || // Added in 3.8
378      Name == "xop.vpcmov.256" || // Added in 5.0
379      Name.startswith("avx512.mask.move.s") || // Added in 4.0
380      Name.startswith("avx512.cvtmask2") || // Added in 5.0
381      Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
382      Name.startswith("xop.vprot") || // Added in 8.0
383      Name.startswith("avx512.prol") || // Added in 8.0
384      Name.startswith("avx512.pror") || // Added in 8.0
385      Name.startswith("avx512.mask.prorv.") || // Added in 8.0
386      Name.startswith("avx512.mask.pror.") ||  // Added in 8.0
387      Name.startswith("avx512.mask.prolv.") || // Added in 8.0
388      Name.startswith("avx512.mask.prol.") ||  // Added in 8.0
389      Name.startswith("avx512.ptestm") || //Added in 6.0
390      Name.startswith("avx512.ptestnm") || //Added in 6.0
391      Name.startswith("avx512.mask.pavg")) // Added in 6.0
392    return true;
393
394  return false;
395}
396
397static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
398                                        Function *&NewFn) {
399  // Only handle intrinsics that start with "x86.".
400  if (!Name.startswith("x86."))
401    return false;
402  // Remove "x86." prefix.
403  Name = Name.substr(4);
404
405  if (ShouldUpgradeX86Intrinsic(F, Name)) {
406    NewFn = nullptr;
407    return true;
408  }
409
410  if (Name == "rdtscp") { // Added in 8.0
411    // If this intrinsic has 0 operands, it's the new version.
412    if (F->getFunctionType()->getNumParams() == 0)
413      return false;
414
415    rename(F);
416    NewFn = Intrinsic::getDeclaration(F->getParent(),
417                                      Intrinsic::x86_rdtscp);
418    return true;
419  }
420
421  // SSE4.1 ptest functions may have an old signature.
422  if (Name.startswith("sse41.ptest")) { // Added in 3.2
423    if (Name.substr(11) == "c")
424      return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
425    if (Name.substr(11) == "z")
426      return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
427    if (Name.substr(11) == "nzc")
428      return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
429  }
430  // Several blend and other instructions with masks used the wrong number of
431  // bits.
432  if (Name == "sse41.insertps") // Added in 3.6
433    return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
434                                            NewFn);
435  if (Name == "sse41.dppd") // Added in 3.6
436    return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
437                                            NewFn);
438  if (Name == "sse41.dpps") // Added in 3.6
439    return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
440                                            NewFn);
441  if (Name == "sse41.mpsadbw") // Added in 3.6
442    return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
443                                            NewFn);
444  if (Name == "avx.dp.ps.256") // Added in 3.6
445    return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
446                                            NewFn);
447  if (Name == "avx2.mpsadbw") // Added in 3.6
448    return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
449                                            NewFn);
450
451  // frcz.ss/sd may need to have an argument dropped. Added in 3.2
452  if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
453    rename(F);
454    NewFn = Intrinsic::getDeclaration(F->getParent(),
455                                      Intrinsic::x86_xop_vfrcz_ss);
456    return true;
457  }
458  if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
459    rename(F);
460    NewFn = Intrinsic::getDeclaration(F->getParent(),
461                                      Intrinsic::x86_xop_vfrcz_sd);
462    return true;
463  }
464  // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
465  if (Name.startswith("xop.vpermil2")) { // Added in 3.9
466    auto Idx = F->getFunctionType()->getParamType(2);
467    if (Idx->isFPOrFPVectorTy()) {
468      rename(F);
469      unsigned IdxSize = Idx->getPrimitiveSizeInBits();
470      unsigned EltSize = Idx->getScalarSizeInBits();
471      Intrinsic::ID Permil2ID;
472      if (EltSize == 64 && IdxSize == 128)
473        Permil2ID = Intrinsic::x86_xop_vpermil2pd;
474      else if (EltSize == 32 && IdxSize == 128)
475        Permil2ID = Intrinsic::x86_xop_vpermil2ps;
476      else if (EltSize == 64 && IdxSize == 256)
477        Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
478      else
479        Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
480      NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
481      return true;
482    }
483  }
484
485  if (Name == "seh.recoverfp") {
486    NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
487    return true;
488  }
489
490  return false;
491}
492
493static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
494  assert(F && "Illegal to upgrade a non-existent Function.");
495
496  // Quickly eliminate it, if it's not a candidate.
497  StringRef Name = F->getName();
498  if (Name.size() <= 8 || !Name.startswith("llvm."))
499    return false;
500  Name = Name.substr(5); // Strip off "llvm."
501
502  switch (Name[0]) {
503  default: break;
504  case 'a': {
505    if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
506      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
507                                        F->arg_begin()->getType());
508      return true;
509    }
510    if (Name.startswith("arm.neon.vclz")) {
511      Type* args[2] = {
512        F->arg_begin()->getType(),
513        Type::getInt1Ty(F->getContext())
514      };
515      // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
516      // the end of the name. Change name from llvm.arm.neon.vclz.* to
517      //  llvm.ctlz.*
518      FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
519      NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
520                               "llvm.ctlz." + Name.substr(14), F->getParent());
521      return true;
522    }
523    if (Name.startswith("arm.neon.vcnt")) {
524      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
525                                        F->arg_begin()->getType());
526      return true;
527    }
528    static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
529    if (vldRegex.match(Name)) {
530      auto fArgs = F->getFunctionType()->params();
531      SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
532      // Can't use Intrinsic::getDeclaration here as the return types might
533      // then only be structurally equal.
534      FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
535      NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
536                               "llvm." + Name + ".p0i8", F->getParent());
537      return true;
538    }
539    static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
540    if (vstRegex.match(Name)) {
541      static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
542                                                Intrinsic::arm_neon_vst2,
543                                                Intrinsic::arm_neon_vst3,
544                                                Intrinsic::arm_neon_vst4};
545
546      static const Intrinsic::ID StoreLaneInts[] = {
547        Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
548        Intrinsic::arm_neon_vst4lane
549      };
550
551      auto fArgs = F->getFunctionType()->params();
552      Type *Tys[] = {fArgs[0], fArgs[1]};
553      if (Name.find("lane") == StringRef::npos)
554        NewFn = Intrinsic::getDeclaration(F->getParent(),
555                                          StoreInts[fArgs.size() - 3], Tys);
556      else
557        NewFn = Intrinsic::getDeclaration(F->getParent(),
558                                          StoreLaneInts[fArgs.size() - 5], Tys);
559      return true;
560    }
561    if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
562      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
563      return true;
564    }
565    if (Name.startswith("arm.neon.vqadds.")) {
566      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
567                                        F->arg_begin()->getType());
568      return true;
569    }
570    if (Name.startswith("arm.neon.vqaddu.")) {
571      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
572                                        F->arg_begin()->getType());
573      return true;
574    }
575    if (Name.startswith("arm.neon.vqsubs.")) {
576      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
577                                        F->arg_begin()->getType());
578      return true;
579    }
580    if (Name.startswith("arm.neon.vqsubu.")) {
581      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
582                                        F->arg_begin()->getType());
583      return true;
584    }
585    if (Name.startswith("aarch64.neon.addp")) {
586      if (F->arg_size() != 2)
587        break; // Invalid IR.
588      VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
589      if (Ty && Ty->getElementType()->isFloatingPointTy()) {
590        NewFn = Intrinsic::getDeclaration(F->getParent(),
591                                          Intrinsic::aarch64_neon_faddp, Ty);
592        return true;
593      }
594    }
595    break;
596  }
597
598  case 'c': {
599    if (Name.startswith("ctlz.") && F->arg_size() == 1) {
600      rename(F);
601      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
602                                        F->arg_begin()->getType());
603      return true;
604    }
605    if (Name.startswith("cttz.") && F->arg_size() == 1) {
606      rename(F);
607      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
608                                        F->arg_begin()->getType());
609      return true;
610    }
611    break;
612  }
613  case 'd': {
614    if (Name == "dbg.value" && F->arg_size() == 4) {
615      rename(F);
616      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
617      return true;
618    }
619    break;
620  }
621  case 'e': {
622    SmallVector<StringRef, 2> Groups;
623    static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
624    if (R.match(Name, &Groups)) {
625      Intrinsic::ID ID = Intrinsic::not_intrinsic;
626      if (Groups[1] == "fadd")
627        ID = Intrinsic::experimental_vector_reduce_v2_fadd;
628      if (Groups[1] == "fmul")
629        ID = Intrinsic::experimental_vector_reduce_v2_fmul;
630
631      if (ID != Intrinsic::not_intrinsic) {
632        rename(F);
633        auto Args = F->getFunctionType()->params();
634        Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]};
635        NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
636        return true;
637      }
638    }
639    break;
640  }
641  case 'i':
642  case 'l': {
643    bool IsLifetimeStart = Name.startswith("lifetime.start");
644    if (IsLifetimeStart || Name.startswith("invariant.start")) {
645      Intrinsic::ID ID = IsLifetimeStart ?
646        Intrinsic::lifetime_start : Intrinsic::invariant_start;
647      auto Args = F->getFunctionType()->params();
648      Type* ObjectPtr[1] = {Args[1]};
649      if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
650        rename(F);
651        NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
652        return true;
653      }
654    }
655
656    bool IsLifetimeEnd = Name.startswith("lifetime.end");
657    if (IsLifetimeEnd || Name.startswith("invariant.end")) {
658      Intrinsic::ID ID = IsLifetimeEnd ?
659        Intrinsic::lifetime_end : Intrinsic::invariant_end;
660
661      auto Args = F->getFunctionType()->params();
662      Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
663      if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
664        rename(F);
665        NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
666        return true;
667      }
668    }
669    if (Name.startswith("invariant.group.barrier")) {
670      // Rename invariant.group.barrier to launder.invariant.group
671      auto Args = F->getFunctionType()->params();
672      Type* ObjectPtr[1] = {Args[0]};
673      rename(F);
674      NewFn = Intrinsic::getDeclaration(F->getParent(),
675          Intrinsic::launder_invariant_group, ObjectPtr);
676      return true;
677
678    }
679
680    break;
681  }
682  case 'm': {
683    if (Name.startswith("masked.load.")) {
684      Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
685      if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
686        rename(F);
687        NewFn = Intrinsic::getDeclaration(F->getParent(),
688                                          Intrinsic::masked_load,
689                                          Tys);
690        return true;
691      }
692    }
693    if (Name.startswith("masked.store.")) {
694      auto Args = F->getFunctionType()->params();
695      Type *Tys[] = { Args[0], Args[1] };
696      if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
697        rename(F);
698        NewFn = Intrinsic::getDeclaration(F->getParent(),
699                                          Intrinsic::masked_store,
700                                          Tys);
701        return true;
702      }
703    }
704    // Renaming gather/scatter intrinsics with no address space overloading
705    // to the new overload which includes an address space
706    if (Name.startswith("masked.gather.")) {
707      Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
708      if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
709        rename(F);
710        NewFn = Intrinsic::getDeclaration(F->getParent(),
711                                          Intrinsic::masked_gather, Tys);
712        return true;
713      }
714    }
715    if (Name.startswith("masked.scatter.")) {
716      auto Args = F->getFunctionType()->params();
717      Type *Tys[] = {Args[0], Args[1]};
718      if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
719        rename(F);
720        NewFn = Intrinsic::getDeclaration(F->getParent(),
721                                          Intrinsic::masked_scatter, Tys);
722        return true;
723      }
724    }
725    // Updating the memory intrinsics (memcpy/memmove/memset) that have an
726    // alignment parameter to embedding the alignment as an attribute of
727    // the pointer args.
728    if (Name.startswith("memcpy.") && F->arg_size() == 5) {
729      rename(F);
730      // Get the types of dest, src, and len
731      ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
732      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
733                                        ParamTypes);
734      return true;
735    }
736    if (Name.startswith("memmove.") && F->arg_size() == 5) {
737      rename(F);
738      // Get the types of dest, src, and len
739      ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
740      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
741                                        ParamTypes);
742      return true;
743    }
744    if (Name.startswith("memset.") && F->arg_size() == 5) {
745      rename(F);
746      // Get the types of dest, and len
747      const auto *FT = F->getFunctionType();
748      Type *ParamTypes[2] = {
749          FT->getParamType(0), // Dest
750          FT->getParamType(2)  // len
751      };
752      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
753                                        ParamTypes);
754      return true;
755    }
756    break;
757  }
758  case 'n': {
759    if (Name.startswith("nvvm.")) {
760      Name = Name.substr(5);
761
762      // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
763      Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
764                              .Cases("brev32", "brev64", Intrinsic::bitreverse)
765                              .Case("clz.i", Intrinsic::ctlz)
766                              .Case("popc.i", Intrinsic::ctpop)
767                              .Default(Intrinsic::not_intrinsic);
768      if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
769        NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
770                                          {F->getReturnType()});
771        return true;
772      }
773
774      // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
775      // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
776      //
777      // TODO: We could add lohi.i2d.
778      bool Expand = StringSwitch<bool>(Name)
779                        .Cases("abs.i", "abs.ll", true)
780                        .Cases("clz.ll", "popc.ll", "h2f", true)
781                        .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
782                        .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
783                        .StartsWith("atomic.load.add.f32.p", true)
784                        .StartsWith("atomic.load.add.f64.p", true)
785                        .Default(false);
786      if (Expand) {
787        NewFn = nullptr;
788        return true;
789      }
790    }
791    break;
792  }
793  case 'o':
794    // We only need to change the name to match the mangling including the
795    // address space.
796    if (Name.startswith("objectsize.")) {
797      Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
798      if (F->arg_size() == 2 || F->arg_size() == 3 ||
799          F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
800        rename(F);
801        NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
802                                          Tys);
803        return true;
804      }
805    }
806    break;
807
808  case 'p':
809    if (Name == "prefetch") {
810      // Handle address space overloading.
811      Type *Tys[] = {F->arg_begin()->getType()};
812      if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) {
813        rename(F);
814        NewFn =
815            Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
816        return true;
817      }
818    }
819    break;
820
821  case 's':
822    if (Name == "stackprotectorcheck") {
823      NewFn = nullptr;
824      return true;
825    }
826    break;
827
828  case 'x':
829    if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
830      return true;
831  }
832  // Remangle our intrinsic since we upgrade the mangling
833  auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
834  if (Result != None) {
835    NewFn = Result.getValue();
836    return true;
837  }
838
839  //  This may not belong here. This function is effectively being overloaded
840  //  to both detect an intrinsic which needs upgrading, and to provide the
841  //  upgraded form of the intrinsic. We should perhaps have two separate
842  //  functions for this.
843  return false;
844}
845
846bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
847  NewFn = nullptr;
848  bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
849  assert(F != NewFn && "Intrinsic function upgraded to the same function");
850
851  // Upgrade intrinsic attributes.  This does not change the function.
852  if (NewFn)
853    F = NewFn;
854  if (Intrinsic::ID id = F->getIntrinsicID())
855    F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
856  return Upgraded;
857}
858
859GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
860  if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
861                          GV->getName() == "llvm.global_dtors")) ||
862      !GV->hasInitializer())
863    return nullptr;
864  ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
865  if (!ATy)
866    return nullptr;
867  StructType *STy = dyn_cast<StructType>(ATy->getElementType());
868  if (!STy || STy->getNumElements() != 2)
869    return nullptr;
870
871  LLVMContext &C = GV->getContext();
872  IRBuilder<> IRB(C);
873  auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
874                               IRB.getInt8PtrTy());
875  Constant *Init = GV->getInitializer();
876  unsigned N = Init->getNumOperands();
877  std::vector<Constant *> NewCtors(N);
878  for (unsigned i = 0; i != N; ++i) {
879    auto Ctor = cast<Constant>(Init->getOperand(i));
880    NewCtors[i] = ConstantStruct::get(
881        EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
882        Constant::getNullValue(IRB.getInt8PtrTy()));
883  }
884  Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
885
886  return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
887                            NewInit, GV->getName());
888}
889
890// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
891// to byte shuffles.
892static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
893                                         Value *Op, unsigned Shift) {
894  Type *ResultTy = Op->getType();
895  unsigned NumElts = ResultTy->getVectorNumElements() * 8;
896
897  // Bitcast from a 64-bit element type to a byte element type.
898  Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
899  Op = Builder.CreateBitCast(Op, VecTy, "cast");
900
901  // We'll be shuffling in zeroes.
902  Value *Res = Constant::getNullValue(VecTy);
903
904  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
905  // we'll just return the zero vector.
906  if (Shift < 16) {
907    uint32_t Idxs[64];
908    // 256/512-bit version is split into 2/4 16-byte lanes.
909    for (unsigned l = 0; l != NumElts; l += 16)
910      for (unsigned i = 0; i != 16; ++i) {
911        unsigned Idx = NumElts + i - Shift;
912        if (Idx < NumElts)
913          Idx -= NumElts - 16; // end of lane, switch operand.
914        Idxs[l + i] = Idx + l;
915      }
916
917    Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
918  }
919
920  // Bitcast back to a 64-bit element type.
921  return Builder.CreateBitCast(Res, ResultTy, "cast");
922}
923
924// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
925// to byte shuffles.
926static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
927                                         unsigned Shift) {
928  Type *ResultTy = Op->getType();
929  unsigned NumElts = ResultTy->getVectorNumElements() * 8;
930
931  // Bitcast from a 64-bit element type to a byte element type.
932  Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
933  Op = Builder.CreateBitCast(Op, VecTy, "cast");
934
935  // We'll be shuffling in zeroes.
936  Value *Res = Constant::getNullValue(VecTy);
937
938  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
939  // we'll just return the zero vector.
940  if (Shift < 16) {
941    uint32_t Idxs[64];
942    // 256/512-bit version is split into 2/4 16-byte lanes.
943    for (unsigned l = 0; l != NumElts; l += 16)
944      for (unsigned i = 0; i != 16; ++i) {
945        unsigned Idx = i + Shift;
946        if (Idx >= 16)
947          Idx += NumElts - 16; // end of lane, switch operand.
948        Idxs[l + i] = Idx + l;
949      }
950
951    Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
952  }
953
954  // Bitcast back to a 64-bit element type.
955  return Builder.CreateBitCast(Res, ResultTy, "cast");
956}
957
958static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
959                            unsigned NumElts) {
960  llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
961                             cast<IntegerType>(Mask->getType())->getBitWidth());
962  Mask = Builder.CreateBitCast(Mask, MaskTy);
963
964  // If we have less than 8 elements, then the starting mask was an i8 and
965  // we need to extract down to the right number of elements.
966  if (NumElts < 8) {
967    uint32_t Indices[4];
968    for (unsigned i = 0; i != NumElts; ++i)
969      Indices[i] = i;
970    Mask = Builder.CreateShuffleVector(Mask, Mask,
971                                       makeArrayRef(Indices, NumElts),
972                                       "extract");
973  }
974
975  return Mask;
976}
977
978static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
979                            Value *Op0, Value *Op1) {
980  // If the mask is all ones just emit the first operation.
981  if (const auto *C = dyn_cast<Constant>(Mask))
982    if (C->isAllOnesValue())
983      return Op0;
984
985  Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
986  return Builder.CreateSelect(Mask, Op0, Op1);
987}
988
989static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
990                                  Value *Op0, Value *Op1) {
991  // If the mask is all ones just emit the first operation.
992  if (const auto *C = dyn_cast<Constant>(Mask))
993    if (C->isAllOnesValue())
994      return Op0;
995
996  llvm::VectorType *MaskTy =
997    llvm::VectorType::get(Builder.getInt1Ty(),
998                          Mask->getType()->getIntegerBitWidth());
999  Mask = Builder.CreateBitCast(Mask, MaskTy);
1000  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1001  return Builder.CreateSelect(Mask, Op0, Op1);
1002}
1003
1004// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1005// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1006// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1007static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1008                                        Value *Op1, Value *Shift,
1009                                        Value *Passthru, Value *Mask,
1010                                        bool IsVALIGN) {
1011  unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1012
1013  unsigned NumElts = Op0->getType()->getVectorNumElements();
1014  assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1015  assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1016  assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1017
1018  // Mask the immediate for VALIGN.
1019  if (IsVALIGN)
1020    ShiftVal &= (NumElts - 1);
1021
1022  // If palignr is shifting the pair of vectors more than the size of two
1023  // lanes, emit zero.
1024  if (ShiftVal >= 32)
1025    return llvm::Constant::getNullValue(Op0->getType());
1026
1027  // If palignr is shifting the pair of input vectors more than one lane,
1028  // but less than two lanes, convert to shifting in zeroes.
1029  if (ShiftVal > 16) {
1030    ShiftVal -= 16;
1031    Op1 = Op0;
1032    Op0 = llvm::Constant::getNullValue(Op0->getType());
1033  }
1034
1035  uint32_t Indices[64];
1036  // 256-bit palignr operates on 128-bit lanes so we need to handle that
1037  for (unsigned l = 0; l < NumElts; l += 16) {
1038    for (unsigned i = 0; i != 16; ++i) {
1039      unsigned Idx = ShiftVal + i;
1040      if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1041        Idx += NumElts - 16; // End of lane, switch operand.
1042      Indices[l + i] = Idx + l;
1043    }
1044  }
1045
1046  Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1047                                             makeArrayRef(Indices, NumElts),
1048                                             "palignr");
1049
1050  return EmitX86Select(Builder, Mask, Align, Passthru);
1051}
1052
1053static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
1054                                          bool ZeroMask, bool IndexForm) {
1055  Type *Ty = CI.getType();
1056  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1057  unsigned EltWidth = Ty->getScalarSizeInBits();
1058  bool IsFloat = Ty->isFPOrFPVectorTy();
1059  Intrinsic::ID IID;
1060  if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1061    IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1062  else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1063    IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1064  else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1065    IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1066  else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1067    IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1068  else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1069    IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1070  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1071    IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1072  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1073    IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1074  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1075    IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1076  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1077    IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1078  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1079    IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1080  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1081    IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1082  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1083    IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1084  else if (VecWidth == 128 && EltWidth == 16)
1085    IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1086  else if (VecWidth == 256 && EltWidth == 16)
1087    IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1088  else if (VecWidth == 512 && EltWidth == 16)
1089    IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1090  else if (VecWidth == 128 && EltWidth == 8)
1091    IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1092  else if (VecWidth == 256 && EltWidth == 8)
1093    IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1094  else if (VecWidth == 512 && EltWidth == 8)
1095    IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1096  else
1097    llvm_unreachable("Unexpected intrinsic");
1098
1099  Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1100                    CI.getArgOperand(2) };
1101
1102  // If this isn't index form we need to swap operand 0 and 1.
1103  if (!IndexForm)
1104    std::swap(Args[0], Args[1]);
1105
1106  Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1107                                Args);
1108  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1109                             : Builder.CreateBitCast(CI.getArgOperand(1),
1110                                                     Ty);
1111  return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1112}
1113
1114static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
1115                                            bool IsSigned, bool IsAddition) {
1116  Type *Ty = CI.getType();
1117  Value *Op0 = CI.getOperand(0);
1118  Value *Op1 = CI.getOperand(1);
1119
1120  Intrinsic::ID IID =
1121      IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
1122               : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
1123  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1124  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1125
1126  if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1127    Value *VecSrc = CI.getOperand(2);
1128    Value *Mask = CI.getOperand(3);
1129    Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1130  }
1131  return Res;
1132}
1133
1134static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
1135                               bool IsRotateRight) {
1136  Type *Ty = CI.getType();
1137  Value *Src = CI.getArgOperand(0);
1138  Value *Amt = CI.getArgOperand(1);
1139
1140  // Amount may be scalar immediate, in which case create a splat vector.
1141  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1142  // we only care about the lowest log2 bits anyway.
1143  if (Amt->getType() != Ty) {
1144    unsigned NumElts = Ty->getVectorNumElements();
1145    Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1146    Amt = Builder.CreateVectorSplat(NumElts, Amt);
1147  }
1148
1149  Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1150  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1151  Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1152
1153  if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1154    Value *VecSrc = CI.getOperand(2);
1155    Value *Mask = CI.getOperand(3);
1156    Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1157  }
1158  return Res;
1159}
1160
1161static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1162                              bool IsSigned) {
1163  Type *Ty = CI.getType();
1164  Value *LHS = CI.getArgOperand(0);
1165  Value *RHS = CI.getArgOperand(1);
1166
1167  CmpInst::Predicate Pred;
1168  switch (Imm) {
1169  case 0x0:
1170    Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1171    break;
1172  case 0x1:
1173    Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1174    break;
1175  case 0x2:
1176    Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1177    break;
1178  case 0x3:
1179    Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1180    break;
1181  case 0x4:
1182    Pred = ICmpInst::ICMP_EQ;
1183    break;
1184  case 0x5:
1185    Pred = ICmpInst::ICMP_NE;
1186    break;
1187  case 0x6:
1188    return Constant::getNullValue(Ty); // FALSE
1189  case 0x7:
1190    return Constant::getAllOnesValue(Ty); // TRUE
1191  default:
1192    llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1193  }
1194
1195  Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1196  Value *Ext = Builder.CreateSExt(Cmp, Ty);
1197  return Ext;
1198}
1199
1200static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
1201                                    bool IsShiftRight, bool ZeroMask) {
1202  Type *Ty = CI.getType();
1203  Value *Op0 = CI.getArgOperand(0);
1204  Value *Op1 = CI.getArgOperand(1);
1205  Value *Amt = CI.getArgOperand(2);
1206
1207  if (IsShiftRight)
1208    std::swap(Op0, Op1);
1209
1210  // Amount may be scalar immediate, in which case create a splat vector.
1211  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1212  // we only care about the lowest log2 bits anyway.
1213  if (Amt->getType() != Ty) {
1214    unsigned NumElts = Ty->getVectorNumElements();
1215    Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1216    Amt = Builder.CreateVectorSplat(NumElts, Amt);
1217  }
1218
1219  Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1220  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1221  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1222
1223  unsigned NumArgs = CI.getNumArgOperands();
1224  if (NumArgs >= 4) { // For masked intrinsics.
1225    Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1226                    ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1227                                   CI.getArgOperand(0);
1228    Value *Mask = CI.getOperand(NumArgs - 1);
1229    Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1230  }
1231  return Res;
1232}
1233
1234static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1235                                 Value *Ptr, Value *Data, Value *Mask,
1236                                 bool Aligned) {
1237  // Cast the pointer to the right type.
1238  Ptr = Builder.CreateBitCast(Ptr,
1239                              llvm::PointerType::getUnqual(Data->getType()));
1240  unsigned Align =
1241    Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
1242
1243  // If the mask is all ones just emit a regular store.
1244  if (const auto *C = dyn_cast<Constant>(Mask))
1245    if (C->isAllOnesValue())
1246      return Builder.CreateAlignedStore(Data, Ptr, Align);
1247
1248  // Convert the mask from an integer type to a vector of i1.
1249  unsigned NumElts = Data->getType()->getVectorNumElements();
1250  Mask = getX86MaskVec(Builder, Mask, NumElts);
1251  return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
1252}
1253
1254static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1255                                Value *Ptr, Value *Passthru, Value *Mask,
1256                                bool Aligned) {
1257  Type *ValTy = Passthru->getType();
1258  // Cast the pointer to the right type.
1259  Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1260  unsigned Align =
1261    Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
1262
1263  // If the mask is all ones just emit a regular store.
1264  if (const auto *C = dyn_cast<Constant>(Mask))
1265    if (C->isAllOnesValue())
1266      return Builder.CreateAlignedLoad(ValTy, Ptr, Align);
1267
1268  // Convert the mask from an integer type to a vector of i1.
1269  unsigned NumElts = Passthru->getType()->getVectorNumElements();
1270  Mask = getX86MaskVec(Builder, Mask, NumElts);
1271  return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
1272}
1273
1274static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1275  Value *Op0 = CI.getArgOperand(0);
1276  llvm::Type *Ty = Op0->getType();
1277  Value *Zero = llvm::Constant::getNullValue(Ty);
1278  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
1279  Value *Neg = Builder.CreateNeg(Op0);
1280  Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
1281
1282  if (CI.getNumArgOperands() == 3)
1283    Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
1284
1285  return Res;
1286}
1287
1288static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
1289                               ICmpInst::Predicate Pred) {
1290  Value *Op0 = CI.getArgOperand(0);
1291  Value *Op1 = CI.getArgOperand(1);
1292  Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
1293  Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
1294
1295  if (CI.getNumArgOperands() == 4)
1296    Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1297
1298  return Res;
1299}
1300
1301static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1302  Type *Ty = CI.getType();
1303
1304  // Arguments have a vXi32 type so cast to vXi64.
1305  Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1306  Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1307
1308  if (IsSigned) {
1309    // Shift left then arithmetic shift right.
1310    Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1311    LHS = Builder.CreateShl(LHS, ShiftAmt);
1312    LHS = Builder.CreateAShr(LHS, ShiftAmt);
1313    RHS = Builder.CreateShl(RHS, ShiftAmt);
1314    RHS = Builder.CreateAShr(RHS, ShiftAmt);
1315  } else {
1316    // Clear the upper bits.
1317    Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1318    LHS = Builder.CreateAnd(LHS, Mask);
1319    RHS = Builder.CreateAnd(RHS, Mask);
1320  }
1321
1322  Value *Res = Builder.CreateMul(LHS, RHS);
1323
1324  if (CI.getNumArgOperands() == 4)
1325    Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1326
1327  return Res;
1328}
1329
1330// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1331static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1332                                     Value *Mask) {
1333  unsigned NumElts = Vec->getType()->getVectorNumElements();
1334  if (Mask) {
1335    const auto *C = dyn_cast<Constant>(Mask);
1336    if (!C || !C->isAllOnesValue())
1337      Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1338  }
1339
1340  if (NumElts < 8) {
1341    uint32_t Indices[8];
1342    for (unsigned i = 0; i != NumElts; ++i)
1343      Indices[i] = i;
1344    for (unsigned i = NumElts; i != 8; ++i)
1345      Indices[i] = NumElts + i % NumElts;
1346    Vec = Builder.CreateShuffleVector(Vec,
1347                                      Constant::getNullValue(Vec->getType()),
1348                                      Indices);
1349  }
1350  return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1351}
1352
1353static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1354                                   unsigned CC, bool Signed) {
1355  Value *Op0 = CI.getArgOperand(0);
1356  unsigned NumElts = Op0->getType()->getVectorNumElements();
1357
1358  Value *Cmp;
1359  if (CC == 3) {
1360    Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1361  } else if (CC == 7) {
1362    Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1363  } else {
1364    ICmpInst::Predicate Pred;
1365    switch (CC) {
1366    default: llvm_unreachable("Unknown condition code");
1367    case 0: Pred = ICmpInst::ICMP_EQ;  break;
1368    case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1369    case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1370    case 4: Pred = ICmpInst::ICMP_NE;  break;
1371    case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1372    case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1373    }
1374    Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1375  }
1376
1377  Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1378
1379  return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1380}
1381
1382// Replace a masked intrinsic with an older unmasked intrinsic.
1383static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1384                                    Intrinsic::ID IID) {
1385  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1386  Value *Rep = Builder.CreateCall(Intrin,
1387                                 { CI.getArgOperand(0), CI.getArgOperand(1) });
1388  return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1389}
1390
1391static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1392  Value* A = CI.getArgOperand(0);
1393  Value* B = CI.getArgOperand(1);
1394  Value* Src = CI.getArgOperand(2);
1395  Value* Mask = CI.getArgOperand(3);
1396
1397  Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1398  Value* Cmp = Builder.CreateIsNotNull(AndNode);
1399  Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1400  Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1401  Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1402  return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1403}
1404
1405
1406static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1407  Value* Op = CI.getArgOperand(0);
1408  Type* ReturnOp = CI.getType();
1409  unsigned NumElts = CI.getType()->getVectorNumElements();
1410  Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1411  return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1412}
1413
1414// Replace intrinsic with unmasked version and a select.
1415static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1416                                      CallInst &CI, Value *&Rep) {
1417  Name = Name.substr(12); // Remove avx512.mask.
1418
1419  unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1420  unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1421  Intrinsic::ID IID;
1422  if (Name.startswith("max.p")) {
1423    if (VecWidth == 128 && EltWidth == 32)
1424      IID = Intrinsic::x86_sse_max_ps;
1425    else if (VecWidth == 128 && EltWidth == 64)
1426      IID = Intrinsic::x86_sse2_max_pd;
1427    else if (VecWidth == 256 && EltWidth == 32)
1428      IID = Intrinsic::x86_avx_max_ps_256;
1429    else if (VecWidth == 256 && EltWidth == 64)
1430      IID = Intrinsic::x86_avx_max_pd_256;
1431    else
1432      llvm_unreachable("Unexpected intrinsic");
1433  } else if (Name.startswith("min.p")) {
1434    if (VecWidth == 128 && EltWidth == 32)
1435      IID = Intrinsic::x86_sse_min_ps;
1436    else if (VecWidth == 128 && EltWidth == 64)
1437      IID = Intrinsic::x86_sse2_min_pd;
1438    else if (VecWidth == 256 && EltWidth == 32)
1439      IID = Intrinsic::x86_avx_min_ps_256;
1440    else if (VecWidth == 256 && EltWidth == 64)
1441      IID = Intrinsic::x86_avx_min_pd_256;
1442    else
1443      llvm_unreachable("Unexpected intrinsic");
1444  } else if (Name.startswith("pshuf.b.")) {
1445    if (VecWidth == 128)
1446      IID = Intrinsic::x86_ssse3_pshuf_b_128;
1447    else if (VecWidth == 256)
1448      IID = Intrinsic::x86_avx2_pshuf_b;
1449    else if (VecWidth == 512)
1450      IID = Intrinsic::x86_avx512_pshuf_b_512;
1451    else
1452      llvm_unreachable("Unexpected intrinsic");
1453  } else if (Name.startswith("pmul.hr.sw.")) {
1454    if (VecWidth == 128)
1455      IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1456    else if (VecWidth == 256)
1457      IID = Intrinsic::x86_avx2_pmul_hr_sw;
1458    else if (VecWidth == 512)
1459      IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1460    else
1461      llvm_unreachable("Unexpected intrinsic");
1462  } else if (Name.startswith("pmulh.w.")) {
1463    if (VecWidth == 128)
1464      IID = Intrinsic::x86_sse2_pmulh_w;
1465    else if (VecWidth == 256)
1466      IID = Intrinsic::x86_avx2_pmulh_w;
1467    else if (VecWidth == 512)
1468      IID = Intrinsic::x86_avx512_pmulh_w_512;
1469    else
1470      llvm_unreachable("Unexpected intrinsic");
1471  } else if (Name.startswith("pmulhu.w.")) {
1472    if (VecWidth == 128)
1473      IID = Intrinsic::x86_sse2_pmulhu_w;
1474    else if (VecWidth == 256)
1475      IID = Intrinsic::x86_avx2_pmulhu_w;
1476    else if (VecWidth == 512)
1477      IID = Intrinsic::x86_avx512_pmulhu_w_512;
1478    else
1479      llvm_unreachable("Unexpected intrinsic");
1480  } else if (Name.startswith("pmaddw.d.")) {
1481    if (VecWidth == 128)
1482      IID = Intrinsic::x86_sse2_pmadd_wd;
1483    else if (VecWidth == 256)
1484      IID = Intrinsic::x86_avx2_pmadd_wd;
1485    else if (VecWidth == 512)
1486      IID = Intrinsic::x86_avx512_pmaddw_d_512;
1487    else
1488      llvm_unreachable("Unexpected intrinsic");
1489  } else if (Name.startswith("pmaddubs.w.")) {
1490    if (VecWidth == 128)
1491      IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1492    else if (VecWidth == 256)
1493      IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1494    else if (VecWidth == 512)
1495      IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1496    else
1497      llvm_unreachable("Unexpected intrinsic");
1498  } else if (Name.startswith("packsswb.")) {
1499    if (VecWidth == 128)
1500      IID = Intrinsic::x86_sse2_packsswb_128;
1501    else if (VecWidth == 256)
1502      IID = Intrinsic::x86_avx2_packsswb;
1503    else if (VecWidth == 512)
1504      IID = Intrinsic::x86_avx512_packsswb_512;
1505    else
1506      llvm_unreachable("Unexpected intrinsic");
1507  } else if (Name.startswith("packssdw.")) {
1508    if (VecWidth == 128)
1509      IID = Intrinsic::x86_sse2_packssdw_128;
1510    else if (VecWidth == 256)
1511      IID = Intrinsic::x86_avx2_packssdw;
1512    else if (VecWidth == 512)
1513      IID = Intrinsic::x86_avx512_packssdw_512;
1514    else
1515      llvm_unreachable("Unexpected intrinsic");
1516  } else if (Name.startswith("packuswb.")) {
1517    if (VecWidth == 128)
1518      IID = Intrinsic::x86_sse2_packuswb_128;
1519    else if (VecWidth == 256)
1520      IID = Intrinsic::x86_avx2_packuswb;
1521    else if (VecWidth == 512)
1522      IID = Intrinsic::x86_avx512_packuswb_512;
1523    else
1524      llvm_unreachable("Unexpected intrinsic");
1525  } else if (Name.startswith("packusdw.")) {
1526    if (VecWidth == 128)
1527      IID = Intrinsic::x86_sse41_packusdw;
1528    else if (VecWidth == 256)
1529      IID = Intrinsic::x86_avx2_packusdw;
1530    else if (VecWidth == 512)
1531      IID = Intrinsic::x86_avx512_packusdw_512;
1532    else
1533      llvm_unreachable("Unexpected intrinsic");
1534  } else if (Name.startswith("vpermilvar.")) {
1535    if (VecWidth == 128 && EltWidth == 32)
1536      IID = Intrinsic::x86_avx_vpermilvar_ps;
1537    else if (VecWidth == 128 && EltWidth == 64)
1538      IID = Intrinsic::x86_avx_vpermilvar_pd;
1539    else if (VecWidth == 256 && EltWidth == 32)
1540      IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1541    else if (VecWidth == 256 && EltWidth == 64)
1542      IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1543    else if (VecWidth == 512 && EltWidth == 32)
1544      IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1545    else if (VecWidth == 512 && EltWidth == 64)
1546      IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1547    else
1548      llvm_unreachable("Unexpected intrinsic");
1549  } else if (Name == "cvtpd2dq.256") {
1550    IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1551  } else if (Name == "cvtpd2ps.256") {
1552    IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1553  } else if (Name == "cvttpd2dq.256") {
1554    IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1555  } else if (Name == "cvttps2dq.128") {
1556    IID = Intrinsic::x86_sse2_cvttps2dq;
1557  } else if (Name == "cvttps2dq.256") {
1558    IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1559  } else if (Name.startswith("permvar.")) {
1560    bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1561    if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1562      IID = Intrinsic::x86_avx2_permps;
1563    else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1564      IID = Intrinsic::x86_avx2_permd;
1565    else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1566      IID = Intrinsic::x86_avx512_permvar_df_256;
1567    else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1568      IID = Intrinsic::x86_avx512_permvar_di_256;
1569    else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1570      IID = Intrinsic::x86_avx512_permvar_sf_512;
1571    else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1572      IID = Intrinsic::x86_avx512_permvar_si_512;
1573    else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1574      IID = Intrinsic::x86_avx512_permvar_df_512;
1575    else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1576      IID = Intrinsic::x86_avx512_permvar_di_512;
1577    else if (VecWidth == 128 && EltWidth == 16)
1578      IID = Intrinsic::x86_avx512_permvar_hi_128;
1579    else if (VecWidth == 256 && EltWidth == 16)
1580      IID = Intrinsic::x86_avx512_permvar_hi_256;
1581    else if (VecWidth == 512 && EltWidth == 16)
1582      IID = Intrinsic::x86_avx512_permvar_hi_512;
1583    else if (VecWidth == 128 && EltWidth == 8)
1584      IID = Intrinsic::x86_avx512_permvar_qi_128;
1585    else if (VecWidth == 256 && EltWidth == 8)
1586      IID = Intrinsic::x86_avx512_permvar_qi_256;
1587    else if (VecWidth == 512 && EltWidth == 8)
1588      IID = Intrinsic::x86_avx512_permvar_qi_512;
1589    else
1590      llvm_unreachable("Unexpected intrinsic");
1591  } else if (Name.startswith("dbpsadbw.")) {
1592    if (VecWidth == 128)
1593      IID = Intrinsic::x86_avx512_dbpsadbw_128;
1594    else if (VecWidth == 256)
1595      IID = Intrinsic::x86_avx512_dbpsadbw_256;
1596    else if (VecWidth == 512)
1597      IID = Intrinsic::x86_avx512_dbpsadbw_512;
1598    else
1599      llvm_unreachable("Unexpected intrinsic");
1600  } else if (Name.startswith("pmultishift.qb.")) {
1601    if (VecWidth == 128)
1602      IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1603    else if (VecWidth == 256)
1604      IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1605    else if (VecWidth == 512)
1606      IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1607    else
1608      llvm_unreachable("Unexpected intrinsic");
1609  } else if (Name.startswith("conflict.")) {
1610    if (Name[9] == 'd' && VecWidth == 128)
1611      IID = Intrinsic::x86_avx512_conflict_d_128;
1612    else if (Name[9] == 'd' && VecWidth == 256)
1613      IID = Intrinsic::x86_avx512_conflict_d_256;
1614    else if (Name[9] == 'd' && VecWidth == 512)
1615      IID = Intrinsic::x86_avx512_conflict_d_512;
1616    else if (Name[9] == 'q' && VecWidth == 128)
1617      IID = Intrinsic::x86_avx512_conflict_q_128;
1618    else if (Name[9] == 'q' && VecWidth == 256)
1619      IID = Intrinsic::x86_avx512_conflict_q_256;
1620    else if (Name[9] == 'q' && VecWidth == 512)
1621      IID = Intrinsic::x86_avx512_conflict_q_512;
1622    else
1623      llvm_unreachable("Unexpected intrinsic");
1624  } else if (Name.startswith("pavg.")) {
1625    if (Name[5] == 'b' && VecWidth == 128)
1626      IID = Intrinsic::x86_sse2_pavg_b;
1627    else if (Name[5] == 'b' && VecWidth == 256)
1628      IID = Intrinsic::x86_avx2_pavg_b;
1629    else if (Name[5] == 'b' && VecWidth == 512)
1630      IID = Intrinsic::x86_avx512_pavg_b_512;
1631    else if (Name[5] == 'w' && VecWidth == 128)
1632      IID = Intrinsic::x86_sse2_pavg_w;
1633    else if (Name[5] == 'w' && VecWidth == 256)
1634      IID = Intrinsic::x86_avx2_pavg_w;
1635    else if (Name[5] == 'w' && VecWidth == 512)
1636      IID = Intrinsic::x86_avx512_pavg_w_512;
1637    else
1638      llvm_unreachable("Unexpected intrinsic");
1639  } else
1640    return false;
1641
1642  SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
1643                               CI.arg_operands().end());
1644  Args.pop_back();
1645  Args.pop_back();
1646  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1647                           Args);
1648  unsigned NumArgs = CI.getNumArgOperands();
1649  Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1650                      CI.getArgOperand(NumArgs - 2));
1651  return true;
1652}
1653
1654/// Upgrade comment in call to inline asm that represents an objc retain release
1655/// marker.
1656void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1657  size_t Pos;
1658  if (AsmStr->find("mov\tfp") == 0 &&
1659      AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1660      (Pos = AsmStr->find("# marker")) != std::string::npos) {
1661    AsmStr->replace(Pos, 1, ";");
1662  }
1663  return;
1664}
1665
1666/// Upgrade a call to an old intrinsic. All argument and return casting must be
1667/// provided to seamlessly integrate with existing context.
1668void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1669  Function *F = CI->getCalledFunction();
1670  LLVMContext &C = CI->getContext();
1671  IRBuilder<> Builder(C);
1672  Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1673
1674  assert(F && "Intrinsic call is not direct?");
1675
1676  if (!NewFn) {
1677    // Get the Function's name.
1678    StringRef Name = F->getName();
1679
1680    assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1681    Name = Name.substr(5);
1682
1683    bool IsX86 = Name.startswith("x86.");
1684    if (IsX86)
1685      Name = Name.substr(4);
1686    bool IsNVVM = Name.startswith("nvvm.");
1687    if (IsNVVM)
1688      Name = Name.substr(5);
1689
1690    if (IsX86 && Name.startswith("sse4a.movnt.")) {
1691      Module *M = F->getParent();
1692      SmallVector<Metadata *, 1> Elts;
1693      Elts.push_back(
1694          ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1695      MDNode *Node = MDNode::get(C, Elts);
1696
1697      Value *Arg0 = CI->getArgOperand(0);
1698      Value *Arg1 = CI->getArgOperand(1);
1699
1700      // Nontemporal (unaligned) store of the 0'th element of the float/double
1701      // vector.
1702      Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1703      PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1704      Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1705      Value *Extract =
1706          Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1707
1708      StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
1709      SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1710
1711      // Remove intrinsic.
1712      CI->eraseFromParent();
1713      return;
1714    }
1715
1716    if (IsX86 && (Name.startswith("avx.movnt.") ||
1717                  Name.startswith("avx512.storent."))) {
1718      Module *M = F->getParent();
1719      SmallVector<Metadata *, 1> Elts;
1720      Elts.push_back(
1721          ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1722      MDNode *Node = MDNode::get(C, Elts);
1723
1724      Value *Arg0 = CI->getArgOperand(0);
1725      Value *Arg1 = CI->getArgOperand(1);
1726
1727      // Convert the type of the pointer to a pointer to the stored type.
1728      Value *BC = Builder.CreateBitCast(Arg0,
1729                                        PointerType::getUnqual(Arg1->getType()),
1730                                        "cast");
1731      VectorType *VTy = cast<VectorType>(Arg1->getType());
1732      StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
1733                                                 VTy->getBitWidth() / 8);
1734      SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1735
1736      // Remove intrinsic.
1737      CI->eraseFromParent();
1738      return;
1739    }
1740
1741    if (IsX86 && Name == "sse2.storel.dq") {
1742      Value *Arg0 = CI->getArgOperand(0);
1743      Value *Arg1 = CI->getArgOperand(1);
1744
1745      Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1746      Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1747      Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1748      Value *BC = Builder.CreateBitCast(Arg0,
1749                                        PointerType::getUnqual(Elt->getType()),
1750                                        "cast");
1751      Builder.CreateAlignedStore(Elt, BC, 1);
1752
1753      // Remove intrinsic.
1754      CI->eraseFromParent();
1755      return;
1756    }
1757
1758    if (IsX86 && (Name.startswith("sse.storeu.") ||
1759                  Name.startswith("sse2.storeu.") ||
1760                  Name.startswith("avx.storeu."))) {
1761      Value *Arg0 = CI->getArgOperand(0);
1762      Value *Arg1 = CI->getArgOperand(1);
1763
1764      Arg0 = Builder.CreateBitCast(Arg0,
1765                                   PointerType::getUnqual(Arg1->getType()),
1766                                   "cast");
1767      Builder.CreateAlignedStore(Arg1, Arg0, 1);
1768
1769      // Remove intrinsic.
1770      CI->eraseFromParent();
1771      return;
1772    }
1773
1774    if (IsX86 && Name == "avx512.mask.store.ss") {
1775      Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1776      UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1777                         Mask, false);
1778
1779      // Remove intrinsic.
1780      CI->eraseFromParent();
1781      return;
1782    }
1783
1784    if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1785      // "avx512.mask.storeu." or "avx512.mask.store."
1786      bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1787      UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1788                         CI->getArgOperand(2), Aligned);
1789
1790      // Remove intrinsic.
1791      CI->eraseFromParent();
1792      return;
1793    }
1794
1795    Value *Rep;
1796    // Upgrade packed integer vector compare intrinsics to compare instructions.
1797    if (IsX86 && (Name.startswith("sse2.pcmp") ||
1798                  Name.startswith("avx2.pcmp"))) {
1799      // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1800      bool CmpEq = Name[9] == 'e';
1801      Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1802                               CI->getArgOperand(0), CI->getArgOperand(1));
1803      Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1804    } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1805      Type *ExtTy = Type::getInt32Ty(C);
1806      if (CI->getOperand(0)->getType()->isIntegerTy(8))
1807        ExtTy = Type::getInt64Ty(C);
1808      unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1809                         ExtTy->getPrimitiveSizeInBits();
1810      Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1811      Rep = Builder.CreateVectorSplat(NumElts, Rep);
1812    } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1813                         Name == "sse2.sqrt.sd")) {
1814      Value *Vec = CI->getArgOperand(0);
1815      Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1816      Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1817                                                 Intrinsic::sqrt, Elt0->getType());
1818      Elt0 = Builder.CreateCall(Intr, Elt0);
1819      Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1820    } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1821                         Name.startswith("sse2.sqrt.p") ||
1822                         Name.startswith("sse.sqrt.p"))) {
1823      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1824                                                         Intrinsic::sqrt,
1825                                                         CI->getType()),
1826                               {CI->getArgOperand(0)});
1827    } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1828      if (CI->getNumArgOperands() == 4 &&
1829          (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1830           cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1831        Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1832                                            : Intrinsic::x86_avx512_sqrt_pd_512;
1833
1834        Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1835        Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1836                                                           IID), Args);
1837      } else {
1838        Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1839                                                           Intrinsic::sqrt,
1840                                                           CI->getType()),
1841                                 {CI->getArgOperand(0)});
1842      }
1843      Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1844                          CI->getArgOperand(1));
1845    } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1846                         Name.startswith("avx512.ptestnm"))) {
1847      Value *Op0 = CI->getArgOperand(0);
1848      Value *Op1 = CI->getArgOperand(1);
1849      Value *Mask = CI->getArgOperand(2);
1850      Rep = Builder.CreateAnd(Op0, Op1);
1851      llvm::Type *Ty = Op0->getType();
1852      Value *Zero = llvm::Constant::getNullValue(Ty);
1853      ICmpInst::Predicate Pred =
1854        Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1855      Rep = Builder.CreateICmp(Pred, Rep, Zero);
1856      Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1857    } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1858      unsigned NumElts =
1859          CI->getArgOperand(1)->getType()->getVectorNumElements();
1860      Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1861      Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1862                          CI->getArgOperand(1));
1863    } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1864      unsigned NumElts = CI->getType()->getScalarSizeInBits();
1865      Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1866      Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1867      uint32_t Indices[64];
1868      for (unsigned i = 0; i != NumElts; ++i)
1869        Indices[i] = i;
1870
1871      // First extract half of each vector. This gives better codegen than
1872      // doing it in a single shuffle.
1873      LHS = Builder.CreateShuffleVector(LHS, LHS,
1874                                        makeArrayRef(Indices, NumElts / 2));
1875      RHS = Builder.CreateShuffleVector(RHS, RHS,
1876                                        makeArrayRef(Indices, NumElts / 2));
1877      // Concat the vectors.
1878      // NOTE: Operands have to be swapped to match intrinsic definition.
1879      Rep = Builder.CreateShuffleVector(RHS, LHS,
1880                                        makeArrayRef(Indices, NumElts));
1881      Rep = Builder.CreateBitCast(Rep, CI->getType());
1882    } else if (IsX86 && Name == "avx512.kand.w") {
1883      Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1884      Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1885      Rep = Builder.CreateAnd(LHS, RHS);
1886      Rep = Builder.CreateBitCast(Rep, CI->getType());
1887    } else if (IsX86 && Name == "avx512.kandn.w") {
1888      Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1889      Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1890      LHS = Builder.CreateNot(LHS);
1891      Rep = Builder.CreateAnd(LHS, RHS);
1892      Rep = Builder.CreateBitCast(Rep, CI->getType());
1893    } else if (IsX86 && Name == "avx512.kor.w") {
1894      Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1895      Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1896      Rep = Builder.CreateOr(LHS, RHS);
1897      Rep = Builder.CreateBitCast(Rep, CI->getType());
1898    } else if (IsX86 && Name == "avx512.kxor.w") {
1899      Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1900      Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1901      Rep = Builder.CreateXor(LHS, RHS);
1902      Rep = Builder.CreateBitCast(Rep, CI->getType());
1903    } else if (IsX86 && Name == "avx512.kxnor.w") {
1904      Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1905      Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1906      LHS = Builder.CreateNot(LHS);
1907      Rep = Builder.CreateXor(LHS, RHS);
1908      Rep = Builder.CreateBitCast(Rep, CI->getType());
1909    } else if (IsX86 && Name == "avx512.knot.w") {
1910      Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1911      Rep = Builder.CreateNot(Rep);
1912      Rep = Builder.CreateBitCast(Rep, CI->getType());
1913    } else if (IsX86 &&
1914               (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
1915      Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1916      Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1917      Rep = Builder.CreateOr(LHS, RHS);
1918      Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
1919      Value *C;
1920      if (Name[14] == 'c')
1921        C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
1922      else
1923        C = ConstantInt::getNullValue(Builder.getInt16Ty());
1924      Rep = Builder.CreateICmpEQ(Rep, C);
1925      Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1926    } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
1927                         Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
1928                         Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
1929                         Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1930      Type *I32Ty = Type::getInt32Ty(C);
1931      Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1932                                                 ConstantInt::get(I32Ty, 0));
1933      Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1934                                                 ConstantInt::get(I32Ty, 0));
1935      Value *EltOp;
1936      if (Name.contains(".add."))
1937        EltOp = Builder.CreateFAdd(Elt0, Elt1);
1938      else if (Name.contains(".sub."))
1939        EltOp = Builder.CreateFSub(Elt0, Elt1);
1940      else if (Name.contains(".mul."))
1941        EltOp = Builder.CreateFMul(Elt0, Elt1);
1942      else
1943        EltOp = Builder.CreateFDiv(Elt0, Elt1);
1944      Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
1945                                        ConstantInt::get(I32Ty, 0));
1946    } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1947      // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1948      bool CmpEq = Name[16] == 'e';
1949      Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1950    } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
1951      Type *OpTy = CI->getArgOperand(0)->getType();
1952      unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1953      Intrinsic::ID IID;
1954      switch (VecWidth) {
1955      default: llvm_unreachable("Unexpected intrinsic");
1956      case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
1957      case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
1958      case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
1959      }
1960
1961      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1962                               { CI->getOperand(0), CI->getArgOperand(1) });
1963      Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1964    } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1965      Type *OpTy = CI->getArgOperand(0)->getType();
1966      unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1967      unsigned EltWidth = OpTy->getScalarSizeInBits();
1968      Intrinsic::ID IID;
1969      if (VecWidth == 128 && EltWidth == 32)
1970        IID = Intrinsic::x86_avx512_fpclass_ps_128;
1971      else if (VecWidth == 256 && EltWidth == 32)
1972        IID = Intrinsic::x86_avx512_fpclass_ps_256;
1973      else if (VecWidth == 512 && EltWidth == 32)
1974        IID = Intrinsic::x86_avx512_fpclass_ps_512;
1975      else if (VecWidth == 128 && EltWidth == 64)
1976        IID = Intrinsic::x86_avx512_fpclass_pd_128;
1977      else if (VecWidth == 256 && EltWidth == 64)
1978        IID = Intrinsic::x86_avx512_fpclass_pd_256;
1979      else if (VecWidth == 512 && EltWidth == 64)
1980        IID = Intrinsic::x86_avx512_fpclass_pd_512;
1981      else
1982        llvm_unreachable("Unexpected intrinsic");
1983
1984      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1985                               { CI->getOperand(0), CI->getArgOperand(1) });
1986      Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1987    } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
1988      Type *OpTy = CI->getArgOperand(0)->getType();
1989      unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1990      unsigned EltWidth = OpTy->getScalarSizeInBits();
1991      Intrinsic::ID IID;
1992      if (VecWidth == 128 && EltWidth == 32)
1993        IID = Intrinsic::x86_avx512_cmp_ps_128;
1994      else if (VecWidth == 256 && EltWidth == 32)
1995        IID = Intrinsic::x86_avx512_cmp_ps_256;
1996      else if (VecWidth == 512 && EltWidth == 32)
1997        IID = Intrinsic::x86_avx512_cmp_ps_512;
1998      else if (VecWidth == 128 && EltWidth == 64)
1999        IID = Intrinsic::x86_avx512_cmp_pd_128;
2000      else if (VecWidth == 256 && EltWidth == 64)
2001        IID = Intrinsic::x86_avx512_cmp_pd_256;
2002      else if (VecWidth == 512 && EltWidth == 64)
2003        IID = Intrinsic::x86_avx512_cmp_pd_512;
2004      else
2005        llvm_unreachable("Unexpected intrinsic");
2006
2007      SmallVector<Value *, 4> Args;
2008      Args.push_back(CI->getArgOperand(0));
2009      Args.push_back(CI->getArgOperand(1));
2010      Args.push_back(CI->getArgOperand(2));
2011      if (CI->getNumArgOperands() == 5)
2012        Args.push_back(CI->getArgOperand(4));
2013
2014      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2015                               Args);
2016      Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
2017    } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
2018               Name[16] != 'p') {
2019      // Integer compare intrinsics.
2020      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2021      Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2022    } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2023      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2024      Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2025    } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2026                         Name.startswith("avx512.cvtw2mask.") ||
2027                         Name.startswith("avx512.cvtd2mask.") ||
2028                         Name.startswith("avx512.cvtq2mask."))) {
2029      Value *Op = CI->getArgOperand(0);
2030      Value *Zero = llvm::Constant::getNullValue(Op->getType());
2031      Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2032      Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2033    } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2034                        Name == "ssse3.pabs.w.128" ||
2035                        Name == "ssse3.pabs.d.128" ||
2036                        Name.startswith("avx2.pabs") ||
2037                        Name.startswith("avx512.mask.pabs"))) {
2038      Rep = upgradeAbs(Builder, *CI);
2039    } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2040                         Name == "sse2.pmaxs.w" ||
2041                         Name == "sse41.pmaxsd" ||
2042                         Name.startswith("avx2.pmaxs") ||
2043                         Name.startswith("avx512.mask.pmaxs"))) {
2044      Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
2045    } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2046                         Name == "sse41.pmaxuw" ||
2047                         Name == "sse41.pmaxud" ||
2048                         Name.startswith("avx2.pmaxu") ||
2049                         Name.startswith("avx512.mask.pmaxu"))) {
2050      Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
2051    } else if (IsX86 && (Name == "sse41.pminsb" ||
2052                         Name == "sse2.pmins.w" ||
2053                         Name == "sse41.pminsd" ||
2054                         Name.startswith("avx2.pmins") ||
2055                         Name.startswith("avx512.mask.pmins"))) {
2056      Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
2057    } else if (IsX86 && (Name == "sse2.pminu.b" ||
2058                         Name == "sse41.pminuw" ||
2059                         Name == "sse41.pminud" ||
2060                         Name.startswith("avx2.pminu") ||
2061                         Name.startswith("avx512.mask.pminu"))) {
2062      Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
2063    } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2064                         Name == "avx2.pmulu.dq" ||
2065                         Name == "avx512.pmulu.dq.512" ||
2066                         Name.startswith("avx512.mask.pmulu.dq."))) {
2067      Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2068    } else if (IsX86 && (Name == "sse41.pmuldq" ||
2069                         Name == "avx2.pmul.dq" ||
2070                         Name == "avx512.pmul.dq.512" ||
2071                         Name.startswith("avx512.mask.pmul.dq."))) {
2072      Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2073    } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2074                         Name == "sse2.cvtsi2sd" ||
2075                         Name == "sse.cvtsi642ss" ||
2076                         Name == "sse2.cvtsi642sd")) {
2077      Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
2078                                 CI->getType()->getVectorElementType());
2079      Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2080    } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2081      Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
2082                                 CI->getType()->getVectorElementType());
2083      Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2084    } else if (IsX86 && Name == "sse2.cvtss2sd") {
2085      Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2086      Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
2087      Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2088    } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2089                         Name == "sse2.cvtdq2ps" ||
2090                         Name == "avx.cvtdq2.pd.256" ||
2091                         Name == "avx.cvtdq2.ps.256" ||
2092                         Name.startswith("avx512.mask.cvtdq2pd.") ||
2093                         Name.startswith("avx512.mask.cvtudq2pd.") ||
2094                         Name.startswith("avx512.mask.cvtdq2ps.") ||
2095                         Name.startswith("avx512.mask.cvtudq2ps.") ||
2096                         Name.startswith("avx512.mask.cvtqq2pd.") ||
2097                         Name.startswith("avx512.mask.cvtuqq2pd.") ||
2098                         Name == "avx512.mask.cvtqq2ps.256" ||
2099                         Name == "avx512.mask.cvtqq2ps.512" ||
2100                         Name == "avx512.mask.cvtuqq2ps.256" ||
2101                         Name == "avx512.mask.cvtuqq2ps.512" ||
2102                         Name == "sse2.cvtps2pd" ||
2103                         Name == "avx.cvt.ps2.pd.256" ||
2104                         Name == "avx512.mask.cvtps2pd.128" ||
2105                         Name == "avx512.mask.cvtps2pd.256")) {
2106      Type *DstTy = CI->getType();
2107      Rep = CI->getArgOperand(0);
2108      Type *SrcTy = Rep->getType();
2109
2110      unsigned NumDstElts = DstTy->getVectorNumElements();
2111      if (NumDstElts < SrcTy->getVectorNumElements()) {
2112        assert(NumDstElts == 2 && "Unexpected vector size");
2113        uint32_t ShuffleMask[2] = { 0, 1 };
2114        Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
2115      }
2116
2117      bool IsPS2PD = SrcTy->getVectorElementType()->isFloatTy();
2118      bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2119      if (IsPS2PD)
2120        Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2121      else if (CI->getNumArgOperands() == 4 &&
2122               (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2123                cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2124        Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2125                                       : Intrinsic::x86_avx512_sitofp_round;
2126        Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2127                                                { DstTy, SrcTy });
2128        Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2129      } else {
2130        Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2131                         : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2132      }
2133
2134      if (CI->getNumArgOperands() >= 3)
2135        Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2136                            CI->getArgOperand(1));
2137    } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
2138      Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2139                              CI->getArgOperand(1), CI->getArgOperand(2),
2140                              /*Aligned*/false);
2141    } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
2142      Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2143                              CI->getArgOperand(1),CI->getArgOperand(2),
2144                              /*Aligned*/true);
2145    } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2146      Type *ResultTy = CI->getType();
2147      Type *PtrTy = ResultTy->getVectorElementType();
2148
2149      // Cast the pointer to element type.
2150      Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2151                                         llvm::PointerType::getUnqual(PtrTy));
2152
2153      Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2154                                     ResultTy->getVectorNumElements());
2155
2156      Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2157                                                Intrinsic::masked_expandload,
2158                                                ResultTy);
2159      Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2160    } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2161      Type *ResultTy = CI->getArgOperand(1)->getType();
2162      Type *PtrTy = ResultTy->getVectorElementType();
2163
2164      // Cast the pointer to element type.
2165      Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2166                                         llvm::PointerType::getUnqual(PtrTy));
2167
2168      Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2169                                     ResultTy->getVectorNumElements());
2170
2171      Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2172                                                Intrinsic::masked_compressstore,
2173                                                ResultTy);
2174      Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2175    } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2176                         Name.startswith("avx512.mask.expand."))) {
2177      Type *ResultTy = CI->getType();
2178
2179      Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2180                                     ResultTy->getVectorNumElements());
2181
2182      bool IsCompress = Name[12] == 'c';
2183      Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2184                                     : Intrinsic::x86_avx512_mask_expand;
2185      Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2186      Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2187                                       MaskVec });
2188    } else if (IsX86 && Name.startswith("xop.vpcom")) {
2189      bool IsSigned;
2190      if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2191          Name.endswith("uq"))
2192        IsSigned = false;
2193      else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2194               Name.endswith("q"))
2195        IsSigned = true;
2196      else
2197        llvm_unreachable("Unknown suffix");
2198
2199      unsigned Imm;
2200      if (CI->getNumArgOperands() == 3) {
2201        Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2202      } else {
2203        Name = Name.substr(9); // strip off "xop.vpcom"
2204        if (Name.startswith("lt"))
2205          Imm = 0;
2206        else if (Name.startswith("le"))
2207          Imm = 1;
2208        else if (Name.startswith("gt"))
2209          Imm = 2;
2210        else if (Name.startswith("ge"))
2211          Imm = 3;
2212        else if (Name.startswith("eq"))
2213          Imm = 4;
2214        else if (Name.startswith("ne"))
2215          Imm = 5;
2216        else if (Name.startswith("false"))
2217          Imm = 6;
2218        else if (Name.startswith("true"))
2219          Imm = 7;
2220        else
2221          llvm_unreachable("Unknown condition");
2222      }
2223
2224      Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2225    } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2226      Value *Sel = CI->getArgOperand(2);
2227      Value *NotSel = Builder.CreateNot(Sel);
2228      Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2229      Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2230      Rep = Builder.CreateOr(Sel0, Sel1);
2231    } else if (IsX86 && (Name.startswith("xop.vprot") ||
2232                         Name.startswith("avx512.prol") ||
2233                         Name.startswith("avx512.mask.prol"))) {
2234      Rep = upgradeX86Rotate(Builder, *CI, false);
2235    } else if (IsX86 && (Name.startswith("avx512.pror") ||
2236                         Name.startswith("avx512.mask.pror"))) {
2237      Rep = upgradeX86Rotate(Builder, *CI, true);
2238    } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2239                         Name.startswith("avx512.mask.vpshld") ||
2240                         Name.startswith("avx512.maskz.vpshld"))) {
2241      bool ZeroMask = Name[11] == 'z';
2242      Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2243    } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2244                         Name.startswith("avx512.mask.vpshrd") ||
2245                         Name.startswith("avx512.maskz.vpshrd"))) {
2246      bool ZeroMask = Name[11] == 'z';
2247      Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2248    } else if (IsX86 && Name == "sse42.crc32.64.8") {
2249      Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2250                                               Intrinsic::x86_sse42_crc32_32_8);
2251      Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2252      Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2253      Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2254    } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2255                         Name.startswith("avx512.vbroadcast.s"))) {
2256      // Replace broadcasts with a series of insertelements.
2257      Type *VecTy = CI->getType();
2258      Type *EltTy = VecTy->getVectorElementType();
2259      unsigned EltNum = VecTy->getVectorNumElements();
2260      Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2261                                          EltTy->getPointerTo());
2262      Value *Load = Builder.CreateLoad(EltTy, Cast);
2263      Type *I32Ty = Type::getInt32Ty(C);
2264      Rep = UndefValue::get(VecTy);
2265      for (unsigned I = 0; I < EltNum; ++I)
2266        Rep = Builder.CreateInsertElement(Rep, Load,
2267                                          ConstantInt::get(I32Ty, I));
2268    } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2269                         Name.startswith("sse41.pmovzx") ||
2270                         Name.startswith("avx2.pmovsx") ||
2271                         Name.startswith("avx2.pmovzx") ||
2272                         Name.startswith("avx512.mask.pmovsx") ||
2273                         Name.startswith("avx512.mask.pmovzx"))) {
2274      VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
2275      VectorType *DstTy = cast<VectorType>(CI->getType());
2276      unsigned NumDstElts = DstTy->getNumElements();
2277
2278      // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2279      SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2280      for (unsigned i = 0; i != NumDstElts; ++i)
2281        ShuffleMask[i] = i;
2282
2283      Value *SV = Builder.CreateShuffleVector(
2284          CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
2285
2286      bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2287      Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2288                   : Builder.CreateZExt(SV, DstTy);
2289      // If there are 3 arguments, it's a masked intrinsic so we need a select.
2290      if (CI->getNumArgOperands() == 3)
2291        Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2292                            CI->getArgOperand(1));
2293    } else if (Name == "avx512.mask.pmov.qd.256" ||
2294               Name == "avx512.mask.pmov.qd.512" ||
2295               Name == "avx512.mask.pmov.wb.256" ||
2296               Name == "avx512.mask.pmov.wb.512") {
2297      Type *Ty = CI->getArgOperand(1)->getType();
2298      Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2299      Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2300                          CI->getArgOperand(1));
2301    } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2302                         Name == "avx2.vbroadcasti128")) {
2303      // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2304      Type *EltTy = CI->getType()->getVectorElementType();
2305      unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2306      Type *VT = VectorType::get(EltTy, NumSrcElts);
2307      Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2308                                            PointerType::getUnqual(VT));
2309      Value *Load = Builder.CreateAlignedLoad(VT, Op, 1);
2310      if (NumSrcElts == 2)
2311        Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2312                                          { 0, 1, 0, 1 });
2313      else
2314        Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2315                                          { 0, 1, 2, 3, 0, 1, 2, 3 });
2316    } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2317                         Name.startswith("avx512.mask.shuf.f"))) {
2318      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2319      Type *VT = CI->getType();
2320      unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2321      unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2322      unsigned ControlBitsMask = NumLanes - 1;
2323      unsigned NumControlBits = NumLanes / 2;
2324      SmallVector<uint32_t, 8> ShuffleMask(0);
2325
2326      for (unsigned l = 0; l != NumLanes; ++l) {
2327        unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2328        // We actually need the other source.
2329        if (l >= NumLanes / 2)
2330          LaneMask += NumLanes;
2331        for (unsigned i = 0; i != NumElementsInLane; ++i)
2332          ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2333      }
2334      Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2335                                        CI->getArgOperand(1), ShuffleMask);
2336      Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2337                          CI->getArgOperand(3));
2338    }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2339                         Name.startswith("avx512.mask.broadcasti"))) {
2340      unsigned NumSrcElts =
2341                        CI->getArgOperand(0)->getType()->getVectorNumElements();
2342      unsigned NumDstElts = CI->getType()->getVectorNumElements();
2343
2344      SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2345      for (unsigned i = 0; i != NumDstElts; ++i)
2346        ShuffleMask[i] = i % NumSrcElts;
2347
2348      Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2349                                        CI->getArgOperand(0),
2350                                        ShuffleMask);
2351      Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2352                          CI->getArgOperand(1));
2353    } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2354                         Name.startswith("avx2.vbroadcast") ||
2355                         Name.startswith("avx512.pbroadcast") ||
2356                         Name.startswith("avx512.mask.broadcast.s"))) {
2357      // Replace vp?broadcasts with a vector shuffle.
2358      Value *Op = CI->getArgOperand(0);
2359      unsigned NumElts = CI->getType()->getVectorNumElements();
2360      Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
2361      Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
2362                                        Constant::getNullValue(MaskTy));
2363
2364      if (CI->getNumArgOperands() == 3)
2365        Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2366                            CI->getArgOperand(1));
2367    } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2368                         Name.startswith("sse2.psubs.") ||
2369                         Name.startswith("avx2.padds.") ||
2370                         Name.startswith("avx2.psubs.") ||
2371                         Name.startswith("avx512.padds.") ||
2372                         Name.startswith("avx512.psubs.") ||
2373                         Name.startswith("avx512.mask.padds.") ||
2374                         Name.startswith("avx512.mask.psubs."))) {
2375      bool IsAdd = Name.contains(".padds");
2376      Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
2377    } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2378                         Name.startswith("sse2.psubus.") ||
2379                         Name.startswith("avx2.paddus.") ||
2380                         Name.startswith("avx2.psubus.") ||
2381                         Name.startswith("avx512.mask.paddus.") ||
2382                         Name.startswith("avx512.mask.psubus."))) {
2383      bool IsAdd = Name.contains(".paddus");
2384      Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
2385    } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2386      Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2387                                      CI->getArgOperand(1),
2388                                      CI->getArgOperand(2),
2389                                      CI->getArgOperand(3),
2390                                      CI->getArgOperand(4),
2391                                      false);
2392    } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2393      Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2394                                      CI->getArgOperand(1),
2395                                      CI->getArgOperand(2),
2396                                      CI->getArgOperand(3),
2397                                      CI->getArgOperand(4),
2398                                      true);
2399    } else if (IsX86 && (Name == "sse2.psll.dq" ||
2400                         Name == "avx2.psll.dq")) {
2401      // 128/256-bit shift left specified in bits.
2402      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2403      Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2404                                       Shift / 8); // Shift is in bits.
2405    } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2406                         Name == "avx2.psrl.dq")) {
2407      // 128/256-bit shift right specified in bits.
2408      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2409      Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2410                                       Shift / 8); // Shift is in bits.
2411    } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2412                         Name == "avx2.psll.dq.bs" ||
2413                         Name == "avx512.psll.dq.512")) {
2414      // 128/256/512-bit shift left specified in bytes.
2415      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2416      Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2417    } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2418                         Name == "avx2.psrl.dq.bs" ||
2419                         Name == "avx512.psrl.dq.512")) {
2420      // 128/256/512-bit shift right specified in bytes.
2421      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2422      Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2423    } else if (IsX86 && (Name == "sse41.pblendw" ||
2424                         Name.startswith("sse41.blendp") ||
2425                         Name.startswith("avx.blend.p") ||
2426                         Name == "avx2.pblendw" ||
2427                         Name.startswith("avx2.pblendd."))) {
2428      Value *Op0 = CI->getArgOperand(0);
2429      Value *Op1 = CI->getArgOperand(1);
2430      unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2431      VectorType *VecTy = cast<VectorType>(CI->getType());
2432      unsigned NumElts = VecTy->getNumElements();
2433
2434      SmallVector<uint32_t, 16> Idxs(NumElts);
2435      for (unsigned i = 0; i != NumElts; ++i)
2436        Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2437
2438      Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2439    } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2440                         Name == "avx2.vinserti128" ||
2441                         Name.startswith("avx512.mask.insert"))) {
2442      Value *Op0 = CI->getArgOperand(0);
2443      Value *Op1 = CI->getArgOperand(1);
2444      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2445      unsigned DstNumElts = CI->getType()->getVectorNumElements();
2446      unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
2447      unsigned Scale = DstNumElts / SrcNumElts;
2448
2449      // Mask off the high bits of the immediate value; hardware ignores those.
2450      Imm = Imm % Scale;
2451
2452      // Extend the second operand into a vector the size of the destination.
2453      Value *UndefV = UndefValue::get(Op1->getType());
2454      SmallVector<uint32_t, 8> Idxs(DstNumElts);
2455      for (unsigned i = 0; i != SrcNumElts; ++i)
2456        Idxs[i] = i;
2457      for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2458        Idxs[i] = SrcNumElts;
2459      Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
2460
2461      // Insert the second operand into the first operand.
2462
2463      // Note that there is no guarantee that instruction lowering will actually
2464      // produce a vinsertf128 instruction for the created shuffles. In
2465      // particular, the 0 immediate case involves no lane changes, so it can
2466      // be handled as a blend.
2467
2468      // Example of shuffle mask for 32-bit elements:
2469      // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
2470      // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
2471
2472      // First fill with identify mask.
2473      for (unsigned i = 0; i != DstNumElts; ++i)
2474        Idxs[i] = i;
2475      // Then replace the elements where we need to insert.
2476      for (unsigned i = 0; i != SrcNumElts; ++i)
2477        Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2478      Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2479
2480      // If the intrinsic has a mask operand, handle that.
2481      if (CI->getNumArgOperands() == 5)
2482        Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2483                            CI->getArgOperand(3));
2484    } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2485                         Name == "avx2.vextracti128" ||
2486                         Name.startswith("avx512.mask.vextract"))) {
2487      Value *Op0 = CI->getArgOperand(0);
2488      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2489      unsigned DstNumElts = CI->getType()->getVectorNumElements();
2490      unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
2491      unsigned Scale = SrcNumElts / DstNumElts;
2492
2493      // Mask off the high bits of the immediate value; hardware ignores those.
2494      Imm = Imm % Scale;
2495
2496      // Get indexes for the subvector of the input vector.
2497      SmallVector<uint32_t, 8> Idxs(DstNumElts);
2498      for (unsigned i = 0; i != DstNumElts; ++i) {
2499        Idxs[i] = i + (Imm * DstNumElts);
2500      }
2501      Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2502
2503      // If the intrinsic has a mask operand, handle that.
2504      if (CI->getNumArgOperands() == 4)
2505        Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2506                            CI->getArgOperand(2));
2507    } else if (!IsX86 && Name == "stackprotectorcheck") {
2508      Rep = nullptr;
2509    } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2510                         Name.startswith("avx512.mask.perm.di."))) {
2511      Value *Op0 = CI->getArgOperand(0);
2512      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2513      VectorType *VecTy = cast<VectorType>(CI->getType());
2514      unsigned NumElts = VecTy->getNumElements();
2515
2516      SmallVector<uint32_t, 8> Idxs(NumElts);
2517      for (unsigned i = 0; i != NumElts; ++i)
2518        Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2519
2520      Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2521
2522      if (CI->getNumArgOperands() == 4)
2523        Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2524                            CI->getArgOperand(2));
2525    } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2526                         Name == "avx2.vperm2i128")) {
2527      // The immediate permute control byte looks like this:
2528      //    [1:0] - select 128 bits from sources for low half of destination
2529      //    [2]   - ignore
2530      //    [3]   - zero low half of destination
2531      //    [5:4] - select 128 bits from sources for high half of destination
2532      //    [6]   - ignore
2533      //    [7]   - zero high half of destination
2534
2535      uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2536
2537      unsigned NumElts = CI->getType()->getVectorNumElements();
2538      unsigned HalfSize = NumElts / 2;
2539      SmallVector<uint32_t, 8> ShuffleMask(NumElts);
2540
2541      // Determine which operand(s) are actually in use for this instruction.
2542      Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2543      Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2544
2545      // If needed, replace operands based on zero mask.
2546      V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2547      V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2548
2549      // Permute low half of result.
2550      unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2551      for (unsigned i = 0; i < HalfSize; ++i)
2552        ShuffleMask[i] = StartIndex + i;
2553
2554      // Permute high half of result.
2555      StartIndex = (Imm & 0x10) ? HalfSize : 0;
2556      for (unsigned i = 0; i < HalfSize; ++i)
2557        ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2558
2559      Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2560
2561    } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2562                         Name == "sse2.pshuf.d" ||
2563                         Name.startswith("avx512.mask.vpermil.p") ||
2564                         Name.startswith("avx512.mask.pshuf.d."))) {
2565      Value *Op0 = CI->getArgOperand(0);
2566      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2567      VectorType *VecTy = cast<VectorType>(CI->getType());
2568      unsigned NumElts = VecTy->getNumElements();
2569      // Calculate the size of each index in the immediate.
2570      unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2571      unsigned IdxMask = ((1 << IdxSize) - 1);
2572
2573      SmallVector<uint32_t, 8> Idxs(NumElts);
2574      // Lookup the bits for this element, wrapping around the immediate every
2575      // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2576      // to offset by the first index of each group.
2577      for (unsigned i = 0; i != NumElts; ++i)
2578        Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2579
2580      Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2581
2582      if (CI->getNumArgOperands() == 4)
2583        Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2584                            CI->getArgOperand(2));
2585    } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2586                         Name.startswith("avx512.mask.pshufl.w."))) {
2587      Value *Op0 = CI->getArgOperand(0);
2588      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2589      unsigned NumElts = CI->getType()->getVectorNumElements();
2590
2591      SmallVector<uint32_t, 16> Idxs(NumElts);
2592      for (unsigned l = 0; l != NumElts; l += 8) {
2593        for (unsigned i = 0; i != 4; ++i)
2594          Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2595        for (unsigned i = 4; i != 8; ++i)
2596          Idxs[i + l] = i + l;
2597      }
2598
2599      Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2600
2601      if (CI->getNumArgOperands() == 4)
2602        Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2603                            CI->getArgOperand(2));
2604    } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2605                         Name.startswith("avx512.mask.pshufh.w."))) {
2606      Value *Op0 = CI->getArgOperand(0);
2607      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2608      unsigned NumElts = CI->getType()->getVectorNumElements();
2609
2610      SmallVector<uint32_t, 16> Idxs(NumElts);
2611      for (unsigned l = 0; l != NumElts; l += 8) {
2612        for (unsigned i = 0; i != 4; ++i)
2613          Idxs[i + l] = i + l;
2614        for (unsigned i = 0; i != 4; ++i)
2615          Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2616      }
2617
2618      Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2619
2620      if (CI->getNumArgOperands() == 4)
2621        Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2622                            CI->getArgOperand(2));
2623    } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2624      Value *Op0 = CI->getArgOperand(0);
2625      Value *Op1 = CI->getArgOperand(1);
2626      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2627      unsigned NumElts = CI->getType()->getVectorNumElements();
2628
2629      unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2630      unsigned HalfLaneElts = NumLaneElts / 2;
2631
2632      SmallVector<uint32_t, 16> Idxs(NumElts);
2633      for (unsigned i = 0; i != NumElts; ++i) {
2634        // Base index is the starting element of the lane.
2635        Idxs[i] = i - (i % NumLaneElts);
2636        // If we are half way through the lane switch to the other source.
2637        if ((i % NumLaneElts) >= HalfLaneElts)
2638          Idxs[i] += NumElts;
2639        // Now select the specific element. By adding HalfLaneElts bits from
2640        // the immediate. Wrapping around the immediate every 8-bits.
2641        Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2642      }
2643
2644      Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2645
2646      Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2647                          CI->getArgOperand(3));
2648    } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2649                         Name.startswith("avx512.mask.movshdup") ||
2650                         Name.startswith("avx512.mask.movsldup"))) {
2651      Value *Op0 = CI->getArgOperand(0);
2652      unsigned NumElts = CI->getType()->getVectorNumElements();
2653      unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2654
2655      unsigned Offset = 0;
2656      if (Name.startswith("avx512.mask.movshdup."))
2657        Offset = 1;
2658
2659      SmallVector<uint32_t, 16> Idxs(NumElts);
2660      for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2661        for (unsigned i = 0; i != NumLaneElts; i += 2) {
2662          Idxs[i + l + 0] = i + l + Offset;
2663          Idxs[i + l + 1] = i + l + Offset;
2664        }
2665
2666      Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2667
2668      Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2669                          CI->getArgOperand(1));
2670    } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2671                         Name.startswith("avx512.mask.unpckl."))) {
2672      Value *Op0 = CI->getArgOperand(0);
2673      Value *Op1 = CI->getArgOperand(1);
2674      int NumElts = CI->getType()->getVectorNumElements();
2675      int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2676
2677      SmallVector<uint32_t, 64> Idxs(NumElts);
2678      for (int l = 0; l != NumElts; l += NumLaneElts)
2679        for (int i = 0; i != NumLaneElts; ++i)
2680          Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2681
2682      Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2683
2684      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2685                          CI->getArgOperand(2));
2686    } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2687                         Name.startswith("avx512.mask.unpckh."))) {
2688      Value *Op0 = CI->getArgOperand(0);
2689      Value *Op1 = CI->getArgOperand(1);
2690      int NumElts = CI->getType()->getVectorNumElements();
2691      int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2692
2693      SmallVector<uint32_t, 64> Idxs(NumElts);
2694      for (int l = 0; l != NumElts; l += NumLaneElts)
2695        for (int i = 0; i != NumLaneElts; ++i)
2696          Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2697
2698      Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2699
2700      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2701                          CI->getArgOperand(2));
2702    } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2703                         Name.startswith("avx512.mask.pand."))) {
2704      VectorType *FTy = cast<VectorType>(CI->getType());
2705      VectorType *ITy = VectorType::getInteger(FTy);
2706      Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2707                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2708      Rep = Builder.CreateBitCast(Rep, FTy);
2709      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2710                          CI->getArgOperand(2));
2711    } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2712                         Name.startswith("avx512.mask.pandn."))) {
2713      VectorType *FTy = cast<VectorType>(CI->getType());
2714      VectorType *ITy = VectorType::getInteger(FTy);
2715      Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2716      Rep = Builder.CreateAnd(Rep,
2717                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2718      Rep = Builder.CreateBitCast(Rep, FTy);
2719      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2720                          CI->getArgOperand(2));
2721    } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2722                         Name.startswith("avx512.mask.por."))) {
2723      VectorType *FTy = cast<VectorType>(CI->getType());
2724      VectorType *ITy = VectorType::getInteger(FTy);
2725      Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2726                             Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2727      Rep = Builder.CreateBitCast(Rep, FTy);
2728      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2729                          CI->getArgOperand(2));
2730    } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2731                         Name.startswith("avx512.mask.pxor."))) {
2732      VectorType *FTy = cast<VectorType>(CI->getType());
2733      VectorType *ITy = VectorType::getInteger(FTy);
2734      Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2735                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2736      Rep = Builder.CreateBitCast(Rep, FTy);
2737      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2738                          CI->getArgOperand(2));
2739    } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2740      Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2741      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2742                          CI->getArgOperand(2));
2743    } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2744      Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2745      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2746                          CI->getArgOperand(2));
2747    } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2748      Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2749      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2750                          CI->getArgOperand(2));
2751    } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2752      if (Name.endswith(".512")) {
2753        Intrinsic::ID IID;
2754        if (Name[17] == 's')
2755          IID = Intrinsic::x86_avx512_add_ps_512;
2756        else
2757          IID = Intrinsic::x86_avx512_add_pd_512;
2758
2759        Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2760                                 { CI->getArgOperand(0), CI->getArgOperand(1),
2761                                   CI->getArgOperand(4) });
2762      } else {
2763        Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2764      }
2765      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2766                          CI->getArgOperand(2));
2767    } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2768      if (Name.endswith(".512")) {
2769        Intrinsic::ID IID;
2770        if (Name[17] == 's')
2771          IID = Intrinsic::x86_avx512_div_ps_512;
2772        else
2773          IID = Intrinsic::x86_avx512_div_pd_512;
2774
2775        Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2776                                 { CI->getArgOperand(0), CI->getArgOperand(1),
2777                                   CI->getArgOperand(4) });
2778      } else {
2779        Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2780      }
2781      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2782                          CI->getArgOperand(2));
2783    } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2784      if (Name.endswith(".512")) {
2785        Intrinsic::ID IID;
2786        if (Name[17] == 's')
2787          IID = Intrinsic::x86_avx512_mul_ps_512;
2788        else
2789          IID = Intrinsic::x86_avx512_mul_pd_512;
2790
2791        Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2792                                 { CI->getArgOperand(0), CI->getArgOperand(1),
2793                                   CI->getArgOperand(4) });
2794      } else {
2795        Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2796      }
2797      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2798                          CI->getArgOperand(2));
2799    } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2800      if (Name.endswith(".512")) {
2801        Intrinsic::ID IID;
2802        if (Name[17] == 's')
2803          IID = Intrinsic::x86_avx512_sub_ps_512;
2804        else
2805          IID = Intrinsic::x86_avx512_sub_pd_512;
2806
2807        Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2808                                 { CI->getArgOperand(0), CI->getArgOperand(1),
2809                                   CI->getArgOperand(4) });
2810      } else {
2811        Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2812      }
2813      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2814                          CI->getArgOperand(2));
2815    } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2816                         Name.startswith("avx512.mask.min.p")) &&
2817               Name.drop_front(18) == ".512") {
2818      bool IsDouble = Name[17] == 'd';
2819      bool IsMin = Name[13] == 'i';
2820      static const Intrinsic::ID MinMaxTbl[2][2] = {
2821        { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
2822        { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
2823      };
2824      Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2825
2826      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2827                               { CI->getArgOperand(0), CI->getArgOperand(1),
2828                                 CI->getArgOperand(4) });
2829      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2830                          CI->getArgOperand(2));
2831    } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2832      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2833                                                         Intrinsic::ctlz,
2834                                                         CI->getType()),
2835                               { CI->getArgOperand(0), Builder.getInt1(false) });
2836      Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2837                          CI->getArgOperand(1));
2838    } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2839      bool IsImmediate = Name[16] == 'i' ||
2840                         (Name.size() > 18 && Name[18] == 'i');
2841      bool IsVariable = Name[16] == 'v';
2842      char Size = Name[16] == '.' ? Name[17] :
2843                  Name[17] == '.' ? Name[18] :
2844                  Name[18] == '.' ? Name[19] :
2845                                    Name[20];
2846
2847      Intrinsic::ID IID;
2848      if (IsVariable && Name[17] != '.') {
2849        if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2850          IID = Intrinsic::x86_avx2_psllv_q;
2851        else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2852          IID = Intrinsic::x86_avx2_psllv_q_256;
2853        else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
2854          IID = Intrinsic::x86_avx2_psllv_d;
2855        else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
2856          IID = Intrinsic::x86_avx2_psllv_d_256;
2857        else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
2858          IID = Intrinsic::x86_avx512_psllv_w_128;
2859        else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
2860          IID = Intrinsic::x86_avx512_psllv_w_256;
2861        else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
2862          IID = Intrinsic::x86_avx512_psllv_w_512;
2863        else
2864          llvm_unreachable("Unexpected size");
2865      } else if (Name.endswith(".128")) {
2866        if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2867          IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
2868                            : Intrinsic::x86_sse2_psll_d;
2869        else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2870          IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
2871                            : Intrinsic::x86_sse2_psll_q;
2872        else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2873          IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
2874                            : Intrinsic::x86_sse2_psll_w;
2875        else
2876          llvm_unreachable("Unexpected size");
2877      } else if (Name.endswith(".256")) {
2878        if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2879          IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
2880                            : Intrinsic::x86_avx2_psll_d;
2881        else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2882          IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
2883                            : Intrinsic::x86_avx2_psll_q;
2884        else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2885          IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
2886                            : Intrinsic::x86_avx2_psll_w;
2887        else
2888          llvm_unreachable("Unexpected size");
2889      } else {
2890        if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2891          IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
2892                IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
2893                              Intrinsic::x86_avx512_psll_d_512;
2894        else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2895          IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
2896                IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
2897                              Intrinsic::x86_avx512_psll_q_512;
2898        else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
2899          IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
2900                            : Intrinsic::x86_avx512_psll_w_512;
2901        else
2902          llvm_unreachable("Unexpected size");
2903      }
2904
2905      Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2906    } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
2907      bool IsImmediate = Name[16] == 'i' ||
2908                         (Name.size() > 18 && Name[18] == 'i');
2909      bool IsVariable = Name[16] == 'v';
2910      char Size = Name[16] == '.' ? Name[17] :
2911                  Name[17] == '.' ? Name[18] :
2912                  Name[18] == '.' ? Name[19] :
2913                                    Name[20];
2914
2915      Intrinsic::ID IID;
2916      if (IsVariable && Name[17] != '.') {
2917        if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
2918          IID = Intrinsic::x86_avx2_psrlv_q;
2919        else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
2920          IID = Intrinsic::x86_avx2_psrlv_q_256;
2921        else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
2922          IID = Intrinsic::x86_avx2_psrlv_d;
2923        else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
2924          IID = Intrinsic::x86_avx2_psrlv_d_256;
2925        else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
2926          IID = Intrinsic::x86_avx512_psrlv_w_128;
2927        else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
2928          IID = Intrinsic::x86_avx512_psrlv_w_256;
2929        else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
2930          IID = Intrinsic::x86_avx512_psrlv_w_512;
2931        else
2932          llvm_unreachable("Unexpected size");
2933      } else if (Name.endswith(".128")) {
2934        if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2935          IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
2936                            : Intrinsic::x86_sse2_psrl_d;
2937        else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2938          IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
2939                            : Intrinsic::x86_sse2_psrl_q;
2940        else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2941          IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
2942                            : Intrinsic::x86_sse2_psrl_w;
2943        else
2944          llvm_unreachable("Unexpected size");
2945      } else if (Name.endswith(".256")) {
2946        if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2947          IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
2948                            : Intrinsic::x86_avx2_psrl_d;
2949        else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2950          IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
2951                            : Intrinsic::x86_avx2_psrl_q;
2952        else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2953          IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
2954                            : Intrinsic::x86_avx2_psrl_w;
2955        else
2956          llvm_unreachable("Unexpected size");
2957      } else {
2958        if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2959          IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
2960                IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
2961                              Intrinsic::x86_avx512_psrl_d_512;
2962        else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2963          IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
2964                IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
2965                              Intrinsic::x86_avx512_psrl_q_512;
2966        else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
2967          IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
2968                            : Intrinsic::x86_avx512_psrl_w_512;
2969        else
2970          llvm_unreachable("Unexpected size");
2971      }
2972
2973      Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2974    } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
2975      bool IsImmediate = Name[16] == 'i' ||
2976                         (Name.size() > 18 && Name[18] == 'i');
2977      bool IsVariable = Name[16] == 'v';
2978      char Size = Name[16] == '.' ? Name[17] :
2979                  Name[17] == '.' ? Name[18] :
2980                  Name[18] == '.' ? Name[19] :
2981                                    Name[20];
2982
2983      Intrinsic::ID IID;
2984      if (IsVariable && Name[17] != '.') {
2985        if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
2986          IID = Intrinsic::x86_avx2_psrav_d;
2987        else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
2988          IID = Intrinsic::x86_avx2_psrav_d_256;
2989        else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
2990          IID = Intrinsic::x86_avx512_psrav_w_128;
2991        else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
2992          IID = Intrinsic::x86_avx512_psrav_w_256;
2993        else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
2994          IID = Intrinsic::x86_avx512_psrav_w_512;
2995        else
2996          llvm_unreachable("Unexpected size");
2997      } else if (Name.endswith(".128")) {
2998        if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2999          IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3000                            : Intrinsic::x86_sse2_psra_d;
3001        else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3002          IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3003                IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
3004                              Intrinsic::x86_avx512_psra_q_128;
3005        else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3006          IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3007                            : Intrinsic::x86_sse2_psra_w;
3008        else
3009          llvm_unreachable("Unexpected size");
3010      } else if (Name.endswith(".256")) {
3011        if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3012          IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3013                            : Intrinsic::x86_avx2_psra_d;
3014        else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3015          IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3016                IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
3017                              Intrinsic::x86_avx512_psra_q_256;
3018        else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3019          IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3020                            : Intrinsic::x86_avx2_psra_w;
3021        else
3022          llvm_unreachable("Unexpected size");
3023      } else {
3024        if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3025          IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3026                IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
3027                              Intrinsic::x86_avx512_psra_d_512;
3028        else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3029          IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3030                IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
3031                              Intrinsic::x86_avx512_psra_q_512;
3032        else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3033          IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3034                            : Intrinsic::x86_avx512_psra_w_512;
3035        else
3036          llvm_unreachable("Unexpected size");
3037      }
3038
3039      Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3040    } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3041      Rep = upgradeMaskedMove(Builder, *CI);
3042    } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3043      Rep = UpgradeMaskToInt(Builder, *CI);
3044    } else if (IsX86 && Name.endswith(".movntdqa")) {
3045      Module *M = F->getParent();
3046      MDNode *Node = MDNode::get(
3047          C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3048
3049      Value *Ptr = CI->getArgOperand(0);
3050      VectorType *VTy = cast<VectorType>(CI->getType());
3051
3052      // Convert the type of the pointer to a pointer to the stored type.
3053      Value *BC =
3054          Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
3055      LoadInst *LI = Builder.CreateAlignedLoad(VTy, BC, VTy->getBitWidth() / 8);
3056      LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3057      Rep = LI;
3058    } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3059                         Name.startswith("fma.vfmsub.") ||
3060                         Name.startswith("fma.vfnmadd.") ||
3061                         Name.startswith("fma.vfnmsub."))) {
3062      bool NegMul = Name[6] == 'n';
3063      bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3064      bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3065
3066      Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3067                       CI->getArgOperand(2) };
3068
3069      if (IsScalar) {
3070        Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3071        Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3072        Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3073      }
3074
3075      if (NegMul && !IsScalar)
3076        Ops[0] = Builder.CreateFNeg(Ops[0]);
3077      if (NegMul && IsScalar)
3078        Ops[1] = Builder.CreateFNeg(Ops[1]);
3079      if (NegAcc)
3080        Ops[2] = Builder.CreateFNeg(Ops[2]);
3081
3082      Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3083                                                         Intrinsic::fma,
3084                                                         Ops[0]->getType()),
3085                               Ops);
3086
3087      if (IsScalar)
3088        Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3089                                          (uint64_t)0);
3090    } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3091      Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3092                       CI->getArgOperand(2) };
3093
3094      Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3095      Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3096      Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3097
3098      Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3099                                                         Intrinsic::fma,
3100                                                         Ops[0]->getType()),
3101                               Ops);
3102
3103      Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3104                                        Rep, (uint64_t)0);
3105    } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3106                         Name.startswith("avx512.maskz.vfmadd.s") ||
3107                         Name.startswith("avx512.mask3.vfmadd.s") ||
3108                         Name.startswith("avx512.mask3.vfmsub.s") ||
3109                         Name.startswith("avx512.mask3.vfnmsub.s"))) {
3110      bool IsMask3 = Name[11] == '3';
3111      bool IsMaskZ = Name[11] == 'z';
3112      // Drop the "avx512.mask." to make it easier.
3113      Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3114      bool NegMul = Name[2] == 'n';
3115      bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3116
3117      Value *A = CI->getArgOperand(0);
3118      Value *B = CI->getArgOperand(1);
3119      Value *C = CI->getArgOperand(2);
3120
3121      if (NegMul && (IsMask3 || IsMaskZ))
3122        A = Builder.CreateFNeg(A);
3123      if (NegMul && !(IsMask3 || IsMaskZ))
3124        B = Builder.CreateFNeg(B);
3125      if (NegAcc)
3126        C = Builder.CreateFNeg(C);
3127
3128      A = Builder.CreateExtractElement(A, (uint64_t)0);
3129      B = Builder.CreateExtractElement(B, (uint64_t)0);
3130      C = Builder.CreateExtractElement(C, (uint64_t)0);
3131
3132      if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3133          cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3134        Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3135
3136        Intrinsic::ID IID;
3137        if (Name.back() == 'd')
3138          IID = Intrinsic::x86_avx512_vfmadd_f64;
3139        else
3140          IID = Intrinsic::x86_avx512_vfmadd_f32;
3141        Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3142        Rep = Builder.CreateCall(FMA, Ops);
3143      } else {
3144        Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3145                                                  Intrinsic::fma,
3146                                                  A->getType());
3147        Rep = Builder.CreateCall(FMA, { A, B, C });
3148      }
3149
3150      Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3151                        IsMask3 ? C : A;
3152
3153      // For Mask3 with NegAcc, we need to create a new extractelement that
3154      // avoids the negation above.
3155      if (NegAcc && IsMask3)
3156        PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3157                                                (uint64_t)0);
3158
3159      Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3160                                Rep, PassThru);
3161      Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3162                                        Rep, (uint64_t)0);
3163    } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3164                         Name.startswith("avx512.mask.vfnmadd.p") ||
3165                         Name.startswith("avx512.mask.vfnmsub.p") ||
3166                         Name.startswith("avx512.mask3.vfmadd.p") ||
3167                         Name.startswith("avx512.mask3.vfmsub.p") ||
3168                         Name.startswith("avx512.mask3.vfnmsub.p") ||
3169                         Name.startswith("avx512.maskz.vfmadd.p"))) {
3170      bool IsMask3 = Name[11] == '3';
3171      bool IsMaskZ = Name[11] == 'z';
3172      // Drop the "avx512.mask." to make it easier.
3173      Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3174      bool NegMul = Name[2] == 'n';
3175      bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3176
3177      Value *A = CI->getArgOperand(0);
3178      Value *B = CI->getArgOperand(1);
3179      Value *C = CI->getArgOperand(2);
3180
3181      if (NegMul && (IsMask3 || IsMaskZ))
3182        A = Builder.CreateFNeg(A);
3183      if (NegMul && !(IsMask3 || IsMaskZ))
3184        B = Builder.CreateFNeg(B);
3185      if (NegAcc)
3186        C = Builder.CreateFNeg(C);
3187
3188      if (CI->getNumArgOperands() == 5 &&
3189          (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3190           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3191        Intrinsic::ID IID;
3192        // Check the character before ".512" in string.
3193        if (Name[Name.size()-5] == 's')
3194          IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3195        else
3196          IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3197
3198        Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3199                                 { A, B, C, CI->getArgOperand(4) });
3200      } else {
3201        Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3202                                                  Intrinsic::fma,
3203                                                  A->getType());
3204        Rep = Builder.CreateCall(FMA, { A, B, C });
3205      }
3206
3207      Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3208                        IsMask3 ? CI->getArgOperand(2) :
3209                                  CI->getArgOperand(0);
3210
3211      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3212    } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
3213                         Name.startswith("fma.vfmsubadd.p"))) {
3214      bool IsSubAdd = Name[7] == 's';
3215      int NumElts = CI->getType()->getVectorNumElements();
3216
3217      Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3218                       CI->getArgOperand(2) };
3219
3220      Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3221                                                Ops[0]->getType());
3222      Value *Odd = Builder.CreateCall(FMA, Ops);
3223      Ops[2] = Builder.CreateFNeg(Ops[2]);
3224      Value *Even = Builder.CreateCall(FMA, Ops);
3225
3226      if (IsSubAdd)
3227        std::swap(Even, Odd);
3228
3229      SmallVector<uint32_t, 32> Idxs(NumElts);
3230      for (int i = 0; i != NumElts; ++i)
3231        Idxs[i] = i + (i % 2) * NumElts;
3232
3233      Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3234    } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3235                         Name.startswith("avx512.mask3.vfmaddsub.p") ||
3236                         Name.startswith("avx512.maskz.vfmaddsub.p") ||
3237                         Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3238      bool IsMask3 = Name[11] == '3';
3239      bool IsMaskZ = Name[11] == 'z';
3240      // Drop the "avx512.mask." to make it easier.
3241      Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3242      bool IsSubAdd = Name[3] == 's';
3243      if (CI->getNumArgOperands() == 5 &&
3244          (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3245           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3246        Intrinsic::ID IID;
3247        // Check the character before ".512" in string.
3248        if (Name[Name.size()-5] == 's')
3249          IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3250        else
3251          IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3252
3253        Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3254                         CI->getArgOperand(2), CI->getArgOperand(4) };
3255        if (IsSubAdd)
3256          Ops[2] = Builder.CreateFNeg(Ops[2]);
3257
3258        Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3259                                 {CI->getArgOperand(0), CI->getArgOperand(1),
3260                                  CI->getArgOperand(2), CI->getArgOperand(4)});
3261      } else {
3262        int NumElts = CI->getType()->getVectorNumElements();
3263
3264        Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3265                         CI->getArgOperand(2) };
3266
3267        Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3268                                                  Ops[0]->getType());
3269        Value *Odd = Builder.CreateCall(FMA, Ops);
3270        Ops[2] = Builder.CreateFNeg(Ops[2]);
3271        Value *Even = Builder.CreateCall(FMA, Ops);
3272
3273        if (IsSubAdd)
3274          std::swap(Even, Odd);
3275
3276        SmallVector<uint32_t, 32> Idxs(NumElts);
3277        for (int i = 0; i != NumElts; ++i)
3278          Idxs[i] = i + (i % 2) * NumElts;
3279
3280        Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3281      }
3282
3283      Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3284                        IsMask3 ? CI->getArgOperand(2) :
3285                                  CI->getArgOperand(0);
3286
3287      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3288    } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3289                         Name.startswith("avx512.maskz.pternlog."))) {
3290      bool ZeroMask = Name[11] == 'z';
3291      unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3292      unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3293      Intrinsic::ID IID;
3294      if (VecWidth == 128 && EltWidth == 32)
3295        IID = Intrinsic::x86_avx512_pternlog_d_128;
3296      else if (VecWidth == 256 && EltWidth == 32)
3297        IID = Intrinsic::x86_avx512_pternlog_d_256;
3298      else if (VecWidth == 512 && EltWidth == 32)
3299        IID = Intrinsic::x86_avx512_pternlog_d_512;
3300      else if (VecWidth == 128 && EltWidth == 64)
3301        IID = Intrinsic::x86_avx512_pternlog_q_128;
3302      else if (VecWidth == 256 && EltWidth == 64)
3303        IID = Intrinsic::x86_avx512_pternlog_q_256;
3304      else if (VecWidth == 512 && EltWidth == 64)
3305        IID = Intrinsic::x86_avx512_pternlog_q_512;
3306      else
3307        llvm_unreachable("Unexpected intrinsic");
3308
3309      Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3310                        CI->getArgOperand(2), CI->getArgOperand(3) };
3311      Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3312                               Args);
3313      Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3314                                 : CI->getArgOperand(0);
3315      Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3316    } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3317                         Name.startswith("avx512.maskz.vpmadd52"))) {
3318      bool ZeroMask = Name[11] == 'z';
3319      bool High = Name[20] == 'h' || Name[21] == 'h';
3320      unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3321      Intrinsic::ID IID;
3322      if (VecWidth == 128 && !High)
3323        IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3324      else if (VecWidth == 256 && !High)
3325        IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3326      else if (VecWidth == 512 && !High)
3327        IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3328      else if (VecWidth == 128 && High)
3329        IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3330      else if (VecWidth == 256 && High)
3331        IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3332      else if (VecWidth == 512 && High)
3333        IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3334      else
3335        llvm_unreachable("Unexpected intrinsic");
3336
3337      Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3338                        CI->getArgOperand(2) };
3339      Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3340                               Args);
3341      Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3342                                 : CI->getArgOperand(0);
3343      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3344    } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3345                         Name.startswith("avx512.mask.vpermt2var.") ||
3346                         Name.startswith("avx512.maskz.vpermt2var."))) {
3347      bool ZeroMask = Name[11] == 'z';
3348      bool IndexForm = Name[17] == 'i';
3349      Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3350    } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3351                         Name.startswith("avx512.maskz.vpdpbusd.") ||
3352                         Name.startswith("avx512.mask.vpdpbusds.") ||
3353                         Name.startswith("avx512.maskz.vpdpbusds."))) {
3354      bool ZeroMask = Name[11] == 'z';
3355      bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3356      unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3357      Intrinsic::ID IID;
3358      if (VecWidth == 128 && !IsSaturating)
3359        IID = Intrinsic::x86_avx512_vpdpbusd_128;
3360      else if (VecWidth == 256 && !IsSaturating)
3361        IID = Intrinsic::x86_avx512_vpdpbusd_256;
3362      else if (VecWidth == 512 && !IsSaturating)
3363        IID = Intrinsic::x86_avx512_vpdpbusd_512;
3364      else if (VecWidth == 128 && IsSaturating)
3365        IID = Intrinsic::x86_avx512_vpdpbusds_128;
3366      else if (VecWidth == 256 && IsSaturating)
3367        IID = Intrinsic::x86_avx512_vpdpbusds_256;
3368      else if (VecWidth == 512 && IsSaturating)
3369        IID = Intrinsic::x86_avx512_vpdpbusds_512;
3370      else
3371        llvm_unreachable("Unexpected intrinsic");
3372
3373      Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3374                        CI->getArgOperand(2)  };
3375      Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3376                               Args);
3377      Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3378                                 : CI->getArgOperand(0);
3379      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3380    } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3381                         Name.startswith("avx512.maskz.vpdpwssd.") ||
3382                         Name.startswith("avx512.mask.vpdpwssds.") ||
3383                         Name.startswith("avx512.maskz.vpdpwssds."))) {
3384      bool ZeroMask = Name[11] == 'z';
3385      bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3386      unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3387      Intrinsic::ID IID;
3388      if (VecWidth == 128 && !IsSaturating)
3389        IID = Intrinsic::x86_avx512_vpdpwssd_128;
3390      else if (VecWidth == 256 && !IsSaturating)
3391        IID = Intrinsic::x86_avx512_vpdpwssd_256;
3392      else if (VecWidth == 512 && !IsSaturating)
3393        IID = Intrinsic::x86_avx512_vpdpwssd_512;
3394      else if (VecWidth == 128 && IsSaturating)
3395        IID = Intrinsic::x86_avx512_vpdpwssds_128;
3396      else if (VecWidth == 256 && IsSaturating)
3397        IID = Intrinsic::x86_avx512_vpdpwssds_256;
3398      else if (VecWidth == 512 && IsSaturating)
3399        IID = Intrinsic::x86_avx512_vpdpwssds_512;
3400      else
3401        llvm_unreachable("Unexpected intrinsic");
3402
3403      Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3404                        CI->getArgOperand(2)  };
3405      Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3406                               Args);
3407      Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3408                                 : CI->getArgOperand(0);
3409      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3410    } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3411                         Name == "addcarry.u32" || Name == "addcarry.u64" ||
3412                         Name == "subborrow.u32" || Name == "subborrow.u64")) {
3413      Intrinsic::ID IID;
3414      if (Name[0] == 'a' && Name.back() == '2')
3415        IID = Intrinsic::x86_addcarry_32;
3416      else if (Name[0] == 'a' && Name.back() == '4')
3417        IID = Intrinsic::x86_addcarry_64;
3418      else if (Name[0] == 's' && Name.back() == '2')
3419        IID = Intrinsic::x86_subborrow_32;
3420      else if (Name[0] == 's' && Name.back() == '4')
3421        IID = Intrinsic::x86_subborrow_64;
3422      else
3423        llvm_unreachable("Unexpected intrinsic");
3424
3425      // Make a call with 3 operands.
3426      Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3427                        CI->getArgOperand(2)};
3428      Value *NewCall = Builder.CreateCall(
3429                                Intrinsic::getDeclaration(CI->getModule(), IID),
3430                                Args);
3431
3432      // Extract the second result and store it.
3433      Value *Data = Builder.CreateExtractValue(NewCall, 1);
3434      // Cast the pointer to the right type.
3435      Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3436                                 llvm::PointerType::getUnqual(Data->getType()));
3437      Builder.CreateAlignedStore(Data, Ptr, 1);
3438      // Replace the original call result with the first result of the new call.
3439      Value *CF = Builder.CreateExtractValue(NewCall, 0);
3440
3441      CI->replaceAllUsesWith(CF);
3442      Rep = nullptr;
3443    } else if (IsX86 && Name.startswith("avx512.mask.") &&
3444               upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3445      // Rep will be updated by the call in the condition.
3446    } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3447      Value *Arg = CI->getArgOperand(0);
3448      Value *Neg = Builder.CreateNeg(Arg, "neg");
3449      Value *Cmp = Builder.CreateICmpSGE(
3450          Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3451      Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3452    } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3453                          Name.startswith("atomic.load.add.f64.p"))) {
3454      Value *Ptr = CI->getArgOperand(0);
3455      Value *Val = CI->getArgOperand(1);
3456      Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val,
3457                                    AtomicOrdering::SequentiallyConsistent);
3458    } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3459                          Name == "max.ui" || Name == "max.ull")) {
3460      Value *Arg0 = CI->getArgOperand(0);
3461      Value *Arg1 = CI->getArgOperand(1);
3462      Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3463                       ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3464                       : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3465      Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3466    } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3467                          Name == "min.ui" || Name == "min.ull")) {
3468      Value *Arg0 = CI->getArgOperand(0);
3469      Value *Arg1 = CI->getArgOperand(1);
3470      Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3471                       ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3472                       : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3473      Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3474    } else if (IsNVVM && Name == "clz.ll") {
3475      // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3476      Value *Arg = CI->getArgOperand(0);
3477      Value *Ctlz = Builder.CreateCall(
3478          Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3479                                    {Arg->getType()}),
3480          {Arg, Builder.getFalse()}, "ctlz");
3481      Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3482    } else if (IsNVVM && Name == "popc.ll") {
3483      // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3484      // i64.
3485      Value *Arg = CI->getArgOperand(0);
3486      Value *Popc = Builder.CreateCall(
3487          Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3488                                    {Arg->getType()}),
3489          Arg, "ctpop");
3490      Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3491    } else if (IsNVVM && Name == "h2f") {
3492      Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3493                                   F->getParent(), Intrinsic::convert_from_fp16,
3494                                   {Builder.getFloatTy()}),
3495                               CI->getArgOperand(0), "h2f");
3496    } else {
3497      llvm_unreachable("Unknown function for CallInst upgrade.");
3498    }
3499
3500    if (Rep)
3501      CI->replaceAllUsesWith(Rep);
3502    CI->eraseFromParent();
3503    return;
3504  }
3505
3506  const auto &DefaultCase = [&NewFn, &CI]() -> void {
3507    // Handle generic mangling change, but nothing else
3508    assert(
3509        (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3510        "Unknown function for CallInst upgrade and isn't just a name change");
3511    CI->setCalledFunction(NewFn);
3512  };
3513  CallInst *NewCall = nullptr;
3514  switch (NewFn->getIntrinsicID()) {
3515  default: {
3516    DefaultCase();
3517    return;
3518  }
3519  case Intrinsic::experimental_vector_reduce_v2_fmul: {
3520    SmallVector<Value *, 2> Args;
3521    if (CI->isFast())
3522      Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0));
3523    else
3524      Args.push_back(CI->getOperand(0));
3525    Args.push_back(CI->getOperand(1));
3526    NewCall = Builder.CreateCall(NewFn, Args);
3527    cast<Instruction>(NewCall)->copyFastMathFlags(CI);
3528    break;
3529  }
3530  case Intrinsic::experimental_vector_reduce_v2_fadd: {
3531    SmallVector<Value *, 2> Args;
3532    if (CI->isFast())
3533      Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType()));
3534    else
3535      Args.push_back(CI->getOperand(0));
3536    Args.push_back(CI->getOperand(1));
3537    NewCall = Builder.CreateCall(NewFn, Args);
3538    cast<Instruction>(NewCall)->copyFastMathFlags(CI);
3539    break;
3540  }
3541  case Intrinsic::arm_neon_vld1:
3542  case Intrinsic::arm_neon_vld2:
3543  case Intrinsic::arm_neon_vld3:
3544  case Intrinsic::arm_neon_vld4:
3545  case Intrinsic::arm_neon_vld2lane:
3546  case Intrinsic::arm_neon_vld3lane:
3547  case Intrinsic::arm_neon_vld4lane:
3548  case Intrinsic::arm_neon_vst1:
3549  case Intrinsic::arm_neon_vst2:
3550  case Intrinsic::arm_neon_vst3:
3551  case Intrinsic::arm_neon_vst4:
3552  case Intrinsic::arm_neon_vst2lane:
3553  case Intrinsic::arm_neon_vst3lane:
3554  case Intrinsic::arm_neon_vst4lane: {
3555    SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3556                                 CI->arg_operands().end());
3557    NewCall = Builder.CreateCall(NewFn, Args);
3558    break;
3559  }
3560
3561  case Intrinsic::bitreverse:
3562    NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3563    break;
3564
3565  case Intrinsic::ctlz:
3566  case Intrinsic::cttz:
3567    assert(CI->getNumArgOperands() == 1 &&
3568           "Mismatch between function args and call args");
3569    NewCall =
3570        Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3571    break;
3572
3573  case Intrinsic::objectsize: {
3574    Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3575                                   ? Builder.getFalse()
3576                                   : CI->getArgOperand(2);
3577    Value *Dynamic =
3578        CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3579    NewCall = Builder.CreateCall(
3580        NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3581    break;
3582  }
3583
3584  case Intrinsic::ctpop:
3585    NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3586    break;
3587
3588  case Intrinsic::convert_from_fp16:
3589    NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3590    break;
3591
3592  case Intrinsic::dbg_value:
3593    // Upgrade from the old version that had an extra offset argument.
3594    assert(CI->getNumArgOperands() == 4);
3595    // Drop nonzero offsets instead of attempting to upgrade them.
3596    if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3597      if (Offset->isZeroValue()) {
3598        NewCall = Builder.CreateCall(
3599            NewFn,
3600            {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3601        break;
3602      }
3603    CI->eraseFromParent();
3604    return;
3605
3606  case Intrinsic::x86_xop_vfrcz_ss:
3607  case Intrinsic::x86_xop_vfrcz_sd:
3608    NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3609    break;
3610
3611  case Intrinsic::x86_xop_vpermil2pd:
3612  case Intrinsic::x86_xop_vpermil2ps:
3613  case Intrinsic::x86_xop_vpermil2pd_256:
3614  case Intrinsic::x86_xop_vpermil2ps_256: {
3615    SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3616                                 CI->arg_operands().end());
3617    VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3618    VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3619    Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3620    NewCall = Builder.CreateCall(NewFn, Args);
3621    break;
3622  }
3623
3624  case Intrinsic::x86_sse41_ptestc:
3625  case Intrinsic::x86_sse41_ptestz:
3626  case Intrinsic::x86_sse41_ptestnzc: {
3627    // The arguments for these intrinsics used to be v4f32, and changed
3628    // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3629    // So, the only thing required is a bitcast for both arguments.
3630    // First, check the arguments have the old type.
3631    Value *Arg0 = CI->getArgOperand(0);
3632    if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
3633      return;
3634
3635    // Old intrinsic, add bitcasts
3636    Value *Arg1 = CI->getArgOperand(1);
3637
3638    Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
3639
3640    Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3641    Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3642
3643    NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3644    break;
3645  }
3646
3647  case Intrinsic::x86_rdtscp: {
3648    // This used to take 1 arguments. If we have no arguments, it is already
3649    // upgraded.
3650    if (CI->getNumOperands() == 0)
3651      return;
3652
3653    NewCall = Builder.CreateCall(NewFn);
3654    // Extract the second result and store it.
3655    Value *Data = Builder.CreateExtractValue(NewCall, 1);
3656    // Cast the pointer to the right type.
3657    Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3658                                 llvm::PointerType::getUnqual(Data->getType()));
3659    Builder.CreateAlignedStore(Data, Ptr, 1);
3660    // Replace the original call result with the first result of the new call.
3661    Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3662
3663    std::string Name = CI->getName();
3664    if (!Name.empty()) {
3665      CI->setName(Name + ".old");
3666      NewCall->setName(Name);
3667    }
3668    CI->replaceAllUsesWith(TSC);
3669    CI->eraseFromParent();
3670    return;
3671  }
3672
3673  case Intrinsic::x86_sse41_insertps:
3674  case Intrinsic::x86_sse41_dppd:
3675  case Intrinsic::x86_sse41_dpps:
3676  case Intrinsic::x86_sse41_mpsadbw:
3677  case Intrinsic::x86_avx_dp_ps_256:
3678  case Intrinsic::x86_avx2_mpsadbw: {
3679    // Need to truncate the last argument from i32 to i8 -- this argument models
3680    // an inherently 8-bit immediate operand to these x86 instructions.
3681    SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3682                                 CI->arg_operands().end());
3683
3684    // Replace the last argument with a trunc.
3685    Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3686    NewCall = Builder.CreateCall(NewFn, Args);
3687    break;
3688  }
3689
3690  case Intrinsic::thread_pointer: {
3691    NewCall = Builder.CreateCall(NewFn, {});
3692    break;
3693  }
3694
3695  case Intrinsic::invariant_start:
3696  case Intrinsic::invariant_end:
3697  case Intrinsic::masked_load:
3698  case Intrinsic::masked_store:
3699  case Intrinsic::masked_gather:
3700  case Intrinsic::masked_scatter: {
3701    SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3702                                 CI->arg_operands().end());
3703    NewCall = Builder.CreateCall(NewFn, Args);
3704    break;
3705  }
3706
3707  case Intrinsic::memcpy:
3708  case Intrinsic::memmove:
3709  case Intrinsic::memset: {
3710    // We have to make sure that the call signature is what we're expecting.
3711    // We only want to change the old signatures by removing the alignment arg:
3712    //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3713    //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3714    //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3715    //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
3716    // Note: i8*'s in the above can be any pointer type
3717    if (CI->getNumArgOperands() != 5) {
3718      DefaultCase();
3719      return;
3720    }
3721    // Remove alignment argument (3), and add alignment attributes to the
3722    // dest/src pointers.
3723    Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3724                      CI->getArgOperand(2), CI->getArgOperand(4)};
3725    NewCall = Builder.CreateCall(NewFn, Args);
3726    auto *MemCI = cast<MemIntrinsic>(NewCall);
3727    // All mem intrinsics support dest alignment.
3728    const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3729    MemCI->setDestAlignment(Align->getZExtValue());
3730    // Memcpy/Memmove also support source alignment.
3731    if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3732      MTI->setSourceAlignment(Align->getZExtValue());
3733    break;
3734  }
3735  }
3736  assert(NewCall && "Should have either set this variable or returned through "
3737                    "the default case");
3738  std::string Name = CI->getName();
3739  if (!Name.empty()) {
3740    CI->setName(Name + ".old");
3741    NewCall->setName(Name);
3742  }
3743  CI->replaceAllUsesWith(NewCall);
3744  CI->eraseFromParent();
3745}
3746
3747void llvm::UpgradeCallsToIntrinsic(Function *F) {
3748  assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3749
3750  // Check if this function should be upgraded and get the replacement function
3751  // if there is one.
3752  Function *NewFn;
3753  if (UpgradeIntrinsicFunction(F, NewFn)) {
3754    // Replace all users of the old function with the new function or new
3755    // instructions. This is not a range loop because the call is deleted.
3756    for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
3757      if (CallInst *CI = dyn_cast<CallInst>(*UI++))
3758        UpgradeIntrinsicCall(CI, NewFn);
3759
3760    // Remove old function, no longer used, from the module.
3761    F->eraseFromParent();
3762  }
3763}
3764
3765MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
3766  // Check if the tag uses struct-path aware TBAA format.
3767  if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3768    return &MD;
3769
3770  auto &Context = MD.getContext();
3771  if (MD.getNumOperands() == 3) {
3772    Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3773    MDNode *ScalarType = MDNode::get(Context, Elts);
3774    // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3775    Metadata *Elts2[] = {ScalarType, ScalarType,
3776                         ConstantAsMetadata::get(
3777                             Constant::getNullValue(Type::getInt64Ty(Context))),
3778                         MD.getOperand(2)};
3779    return MDNode::get(Context, Elts2);
3780  }
3781  // Create a MDNode <MD, MD, offset 0>
3782  Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
3783                                    Type::getInt64Ty(Context)))};
3784  return MDNode::get(Context, Elts);
3785}
3786
3787Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3788                                      Instruction *&Temp) {
3789  if (Opc != Instruction::BitCast)
3790    return nullptr;
3791
3792  Temp = nullptr;
3793  Type *SrcTy = V->getType();
3794  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3795      SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3796    LLVMContext &Context = V->getContext();
3797
3798    // We have no information about target data layout, so we assume that
3799    // the maximum pointer size is 64bit.
3800    Type *MidTy = Type::getInt64Ty(Context);
3801    Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3802
3803    return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3804  }
3805
3806  return nullptr;
3807}
3808
3809Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3810  if (Opc != Instruction::BitCast)
3811    return nullptr;
3812
3813  Type *SrcTy = C->getType();
3814  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3815      SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3816    LLVMContext &Context = C->getContext();
3817
3818    // We have no information about target data layout, so we assume that
3819    // the maximum pointer size is 64bit.
3820    Type *MidTy = Type::getInt64Ty(Context);
3821
3822    return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
3823                                     DestTy);
3824  }
3825
3826  return nullptr;
3827}
3828
3829/// Check the debug info version number, if it is out-dated, drop the debug
3830/// info. Return true if module is modified.
3831bool llvm::UpgradeDebugInfo(Module &M) {
3832  unsigned Version = getDebugMetadataVersionFromModule(M);
3833  if (Version == DEBUG_METADATA_VERSION) {
3834    bool BrokenDebugInfo = false;
3835    if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
3836      report_fatal_error("Broken module found, compilation aborted!");
3837    if (!BrokenDebugInfo)
3838      // Everything is ok.
3839      return false;
3840    else {
3841      // Diagnose malformed debug info.
3842      DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
3843      M.getContext().diagnose(Diag);
3844    }
3845  }
3846  bool Modified = StripDebugInfo(M);
3847  if (Modified && Version != DEBUG_METADATA_VERSION) {
3848    // Diagnose a version mismatch.
3849    DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
3850    M.getContext().diagnose(DiagVersion);
3851  }
3852  return Modified;
3853}
3854
3855/// This checks for objc retain release marker which should be upgraded. It
3856/// returns true if module is modified.
3857static bool UpgradeRetainReleaseMarker(Module &M) {
3858  bool Changed = false;
3859  const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
3860  NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
3861  if (ModRetainReleaseMarker) {
3862    MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3863    if (Op) {
3864      MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3865      if (ID) {
3866        SmallVector<StringRef, 4> ValueComp;
3867        ID->getString().split(ValueComp, "#");
3868        if (ValueComp.size() == 2) {
3869          std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3870          ID = MDString::get(M.getContext(), NewValue);
3871        }
3872        M.addModuleFlag(Module::Error, MarkerKey, ID);
3873        M.eraseNamedMetadata(ModRetainReleaseMarker);
3874        Changed = true;
3875      }
3876    }
3877  }
3878  return Changed;
3879}
3880
3881void llvm::UpgradeARCRuntime(Module &M) {
3882  // This lambda converts normal function calls to ARC runtime functions to
3883  // intrinsic calls.
3884  auto UpgradeToIntrinsic = [&](const char *OldFunc,
3885                                llvm::Intrinsic::ID IntrinsicFunc) {
3886    Function *Fn = M.getFunction(OldFunc);
3887
3888    if (!Fn)
3889      return;
3890
3891    Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
3892
3893    for (auto I = Fn->user_begin(), E = Fn->user_end(); I != E;) {
3894      CallInst *CI = dyn_cast<CallInst>(*I++);
3895      if (!CI || CI->getCalledFunction() != Fn)
3896        continue;
3897
3898      IRBuilder<> Builder(CI->getParent(), CI->getIterator());
3899      FunctionType *NewFuncTy = NewFn->getFunctionType();
3900      SmallVector<Value *, 2> Args;
3901
3902      // Don't upgrade the intrinsic if it's not valid to bitcast the return
3903      // value to the return type of the old function.
3904      if (NewFuncTy->getReturnType() != CI->getType() &&
3905          !CastInst::castIsValid(Instruction::BitCast, CI,
3906                                 NewFuncTy->getReturnType()))
3907        continue;
3908
3909      bool InvalidCast = false;
3910
3911      for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
3912        Value *Arg = CI->getArgOperand(I);
3913
3914        // Bitcast argument to the parameter type of the new function if it's
3915        // not a variadic argument.
3916        if (I < NewFuncTy->getNumParams()) {
3917          // Don't upgrade the intrinsic if it's not valid to bitcast the argument
3918          // to the parameter type of the new function.
3919          if (!CastInst::castIsValid(Instruction::BitCast, Arg,
3920                                     NewFuncTy->getParamType(I))) {
3921            InvalidCast = true;
3922            break;
3923          }
3924          Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
3925        }
3926        Args.push_back(Arg);
3927      }
3928
3929      if (InvalidCast)
3930        continue;
3931
3932      // Create a call instruction that calls the new function.
3933      CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
3934      NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
3935      NewCall->setName(CI->getName());
3936
3937      // Bitcast the return value back to the type of the old call.
3938      Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
3939
3940      if (!CI->use_empty())
3941        CI->replaceAllUsesWith(NewRetVal);
3942      CI->eraseFromParent();
3943    }
3944
3945    if (Fn->use_empty())
3946      Fn->eraseFromParent();
3947  };
3948
3949  // Unconditionally convert a call to "clang.arc.use" to a call to
3950  // "llvm.objc.clang.arc.use".
3951  UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
3952
3953  // Upgrade the retain release marker. If there is no need to upgrade
3954  // the marker, that means either the module is already new enough to contain
3955  // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
3956  if (!UpgradeRetainReleaseMarker(M))
3957    return;
3958
3959  std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
3960      {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
3961      {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
3962      {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
3963      {"objc_autoreleaseReturnValue",
3964       llvm::Intrinsic::objc_autoreleaseReturnValue},
3965      {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
3966      {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
3967      {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
3968      {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
3969      {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
3970      {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
3971      {"objc_release", llvm::Intrinsic::objc_release},
3972      {"objc_retain", llvm::Intrinsic::objc_retain},
3973      {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
3974      {"objc_retainAutoreleaseReturnValue",
3975       llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
3976      {"objc_retainAutoreleasedReturnValue",
3977       llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
3978      {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
3979      {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
3980      {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
3981      {"objc_unsafeClaimAutoreleasedReturnValue",
3982       llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
3983      {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
3984      {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
3985      {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
3986      {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
3987      {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
3988      {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
3989      {"objc_arc_annotation_topdown_bbstart",
3990       llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
3991      {"objc_arc_annotation_topdown_bbend",
3992       llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
3993      {"objc_arc_annotation_bottomup_bbstart",
3994       llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
3995      {"objc_arc_annotation_bottomup_bbend",
3996       llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
3997
3998  for (auto &I : RuntimeFuncs)
3999    UpgradeToIntrinsic(I.first, I.second);
4000}
4001
4002bool llvm::UpgradeModuleFlags(Module &M) {
4003  NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4004  if (!ModFlags)
4005    return false;
4006
4007  bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4008  for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4009    MDNode *Op = ModFlags->getOperand(I);
4010    if (Op->getNumOperands() != 3)
4011      continue;
4012    MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4013    if (!ID)
4014      continue;
4015    if (ID->getString() == "Objective-C Image Info Version")
4016      HasObjCFlag = true;
4017    if (ID->getString() == "Objective-C Class Properties")
4018      HasClassProperties = true;
4019    // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
4020    // field was Error and now they are Max.
4021    if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
4022      if (auto *Behavior =
4023              mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4024        if (Behavior->getLimitedValue() == Module::Error) {
4025          Type *Int32Ty = Type::getInt32Ty(M.getContext());
4026          Metadata *Ops[3] = {
4027              ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
4028              MDString::get(M.getContext(), ID->getString()),
4029              Op->getOperand(2)};
4030          ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4031          Changed = true;
4032        }
4033      }
4034    }
4035    // Upgrade Objective-C Image Info Section. Removed the whitespce in the
4036    // section name so that llvm-lto will not complain about mismatching
4037    // module flags that is functionally the same.
4038    if (ID->getString() == "Objective-C Image Info Section") {
4039      if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
4040        SmallVector<StringRef, 4> ValueComp;
4041        Value->getString().split(ValueComp, " ");
4042        if (ValueComp.size() != 1) {
4043          std::string NewValue;
4044          for (auto &S : ValueComp)
4045            NewValue += S.str();
4046          Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
4047                              MDString::get(M.getContext(), NewValue)};
4048          ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4049          Changed = true;
4050        }
4051      }
4052    }
4053  }
4054
4055  // "Objective-C Class Properties" is recently added for Objective-C. We
4056  // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
4057  // flag of value 0, so we can correclty downgrade this flag when trying to
4058  // link an ObjC bitcode without this module flag with an ObjC bitcode with
4059  // this module flag.
4060  if (HasObjCFlag && !HasClassProperties) {
4061    M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
4062                    (uint32_t)0);
4063    Changed = true;
4064  }
4065
4066  return Changed;
4067}
4068
4069void llvm::UpgradeSectionAttributes(Module &M) {
4070  auto TrimSpaces = [](StringRef Section) -> std::string {
4071    SmallVector<StringRef, 5> Components;
4072    Section.split(Components, ',');
4073
4074    SmallString<32> Buffer;
4075    raw_svector_ostream OS(Buffer);
4076
4077    for (auto Component : Components)
4078      OS << ',' << Component.trim();
4079
4080    return OS.str().substr(1);
4081  };
4082
4083  for (auto &GV : M.globals()) {
4084    if (!GV.hasSection())
4085      continue;
4086
4087    StringRef Section = GV.getSection();
4088
4089    if (!Section.startswith("__DATA, __objc_catlist"))
4090      continue;
4091
4092    // __DATA, __objc_catlist, regular, no_dead_strip
4093    // __DATA,__objc_catlist,regular,no_dead_strip
4094    GV.setSection(TrimSpaces(Section));
4095  }
4096}
4097
4098static bool isOldLoopArgument(Metadata *MD) {
4099  auto *T = dyn_cast_or_null<MDTuple>(MD);
4100  if (!T)
4101    return false;
4102  if (T->getNumOperands() < 1)
4103    return false;
4104  auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
4105  if (!S)
4106    return false;
4107  return S->getString().startswith("llvm.vectorizer.");
4108}
4109
4110static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
4111  StringRef OldPrefix = "llvm.vectorizer.";
4112  assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
4113
4114  if (OldTag == "llvm.vectorizer.unroll")
4115    return MDString::get(C, "llvm.loop.interleave.count");
4116
4117  return MDString::get(
4118      C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
4119             .str());
4120}
4121
4122static Metadata *upgradeLoopArgument(Metadata *MD) {
4123  auto *T = dyn_cast_or_null<MDTuple>(MD);
4124  if (!T)
4125    return MD;
4126  if (T->getNumOperands() < 1)
4127    return MD;
4128  auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
4129  if (!OldTag)
4130    return MD;
4131  if (!OldTag->getString().startswith("llvm.vectorizer."))
4132    return MD;
4133
4134  // This has an old tag.  Upgrade it.
4135  SmallVector<Metadata *, 8> Ops;
4136  Ops.reserve(T->getNumOperands());
4137  Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
4138  for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
4139    Ops.push_back(T->getOperand(I));
4140
4141  return MDTuple::get(T->getContext(), Ops);
4142}
4143
4144MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
4145  auto *T = dyn_cast<MDTuple>(&N);
4146  if (!T)
4147    return &N;
4148
4149  if (none_of(T->operands(), isOldLoopArgument))
4150    return &N;
4151
4152  SmallVector<Metadata *, 8> Ops;
4153  Ops.reserve(T->getNumOperands());
4154  for (Metadata *MD : T->operands())
4155    Ops.push_back(upgradeLoopArgument(MD));
4156
4157  return MDTuple::get(T->getContext(), Ops);
4158}
4159
4160std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
4161  std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
4162
4163  // If X86, and the datalayout matches the expected format, add pointer size
4164  // address spaces to the datalayout.
4165  if (!Triple(TT).isX86() || DL.contains(AddrSpaces))
4166    return DL;
4167
4168  SmallVector<StringRef, 4> Groups;
4169  Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
4170  if (!R.match(DL, &Groups))
4171    return DL;
4172
4173  SmallString<1024> Buf;
4174  std::string Res = (Groups[1] + AddrSpaces + Groups[3]).toStringRef(Buf).str();
4175  return Res;
4176}
4177
4178void llvm::UpgradeFramePointerAttributes(AttrBuilder &B) {
4179  StringRef FramePointer;
4180  if (B.contains("no-frame-pointer-elim")) {
4181    // The value can be "true" or "false".
4182    for (const auto &I : B.td_attrs())
4183      if (I.first == "no-frame-pointer-elim")
4184        FramePointer = I.second == "true" ? "all" : "none";
4185    B.removeAttribute("no-frame-pointer-elim");
4186  }
4187  if (B.contains("no-frame-pointer-elim-non-leaf")) {
4188    // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
4189    if (FramePointer != "all")
4190      FramePointer = "non-leaf";
4191    B.removeAttribute("no-frame-pointer-elim-non-leaf");
4192  }
4193
4194  if (!FramePointer.empty())
4195    B.addAttribute("frame-pointer", FramePointer);
4196}
4197