Deleted Added
full compact
AutoUpgrade.cpp (322855) AutoUpgrade.cpp (326496)
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the auto-upgrade helper functions.
11// This is where deprecated IR intrinsics and other IR features are updated to
12// current specifications.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/IR/AutoUpgrade.h"
17#include "llvm/ADT/StringSwitch.h"
18#include "llvm/IR/CFG.h"
19#include "llvm/IR/CallSite.h"
20#include "llvm/IR/Constants.h"
21#include "llvm/IR/DIBuilder.h"
22#include "llvm/IR/DebugInfo.h"
23#include "llvm/IR/DiagnosticInfo.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/Instruction.h"
27#include "llvm/IR/IntrinsicInst.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/IR/Module.h"
30#include "llvm/Support/ErrorHandling.h"
31#include "llvm/Support/Regex.h"
32#include <cstring>
33using namespace llvm;
34
35static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
36
37// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
38// changed their type from v4f32 to v2i64.
39static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
40 Function *&NewFn) {
41 // Check whether this is an old version of the function, which received
42 // v4f32 arguments.
43 Type *Arg0Type = F->getFunctionType()->getParamType(0);
44 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
45 return false;
46
47 // Yes, it's old, replace it with new version.
48 rename(F);
49 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
50 return true;
51}
52
53// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
54// arguments have changed their type from i32 to i8.
55static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
56 Function *&NewFn) {
57 // Check that the last argument is an i32.
58 Type *LastArgType = F->getFunctionType()->getParamType(
59 F->getFunctionType()->getNumParams() - 1);
60 if (!LastArgType->isIntegerTy(32))
61 return false;
62
63 // Move this function aside and map down.
64 rename(F);
65 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
66 return true;
67}
68
69static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
70 // All of the intrinsics matches below should be marked with which llvm
71 // version started autoupgrading them. At some point in the future we would
72 // like to use this information to remove upgrade code for some older
73 // intrinsics. It is currently undecided how we will determine that future
74 // point.
75 if (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
76 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
77 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
78 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
79 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
80 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
81 Name == "sse.add.ss" || // Added in 4.0
82 Name == "sse2.add.sd" || // Added in 4.0
83 Name == "sse.sub.ss" || // Added in 4.0
84 Name == "sse2.sub.sd" || // Added in 4.0
85 Name == "sse.mul.ss" || // Added in 4.0
86 Name == "sse2.mul.sd" || // Added in 4.0
87 Name == "sse.div.ss" || // Added in 4.0
88 Name == "sse2.div.sd" || // Added in 4.0
89 Name == "sse41.pmaxsb" || // Added in 3.9
90 Name == "sse2.pmaxs.w" || // Added in 3.9
91 Name == "sse41.pmaxsd" || // Added in 3.9
92 Name == "sse2.pmaxu.b" || // Added in 3.9
93 Name == "sse41.pmaxuw" || // Added in 3.9
94 Name == "sse41.pmaxud" || // Added in 3.9
95 Name == "sse41.pminsb" || // Added in 3.9
96 Name == "sse2.pmins.w" || // Added in 3.9
97 Name == "sse41.pminsd" || // Added in 3.9
98 Name == "sse2.pminu.b" || // Added in 3.9
99 Name == "sse41.pminuw" || // Added in 3.9
100 Name == "sse41.pminud" || // Added in 3.9
101 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
102 Name.startswith("avx2.pmax") || // Added in 3.9
103 Name.startswith("avx2.pmin") || // Added in 3.9
104 Name.startswith("avx512.mask.pmax") || // Added in 4.0
105 Name.startswith("avx512.mask.pmin") || // Added in 4.0
106 Name.startswith("avx2.vbroadcast") || // Added in 3.8
107 Name.startswith("avx2.pbroadcast") || // Added in 3.8
108 Name.startswith("avx.vpermil.") || // Added in 3.1
109 Name.startswith("sse2.pshuf") || // Added in 3.9
110 Name.startswith("avx512.pbroadcast") || // Added in 3.9
111 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
112 Name.startswith("avx512.mask.movddup") || // Added in 3.9
113 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
114 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
115 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
116 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
117 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
118 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
119 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
120 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
121 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
122 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
123 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
124 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
125 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
126 Name.startswith("avx512.mask.pand.") || // Added in 3.9
127 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
128 Name.startswith("avx512.mask.por.") || // Added in 3.9
129 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
130 Name.startswith("avx512.mask.and.") || // Added in 3.9
131 Name.startswith("avx512.mask.andn.") || // Added in 3.9
132 Name.startswith("avx512.mask.or.") || // Added in 3.9
133 Name.startswith("avx512.mask.xor.") || // Added in 3.9
134 Name.startswith("avx512.mask.padd.") || // Added in 4.0
135 Name.startswith("avx512.mask.psub.") || // Added in 4.0
136 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
137 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
138 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
139 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
140 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
141 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
142 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
143 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
144 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
145 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
146 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
147 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
148 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
149 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
150 Name == "avx512.mask.add.pd.128" || // Added in 4.0
151 Name == "avx512.mask.add.pd.256" || // Added in 4.0
152 Name == "avx512.mask.add.ps.128" || // Added in 4.0
153 Name == "avx512.mask.add.ps.256" || // Added in 4.0
154 Name == "avx512.mask.div.pd.128" || // Added in 4.0
155 Name == "avx512.mask.div.pd.256" || // Added in 4.0
156 Name == "avx512.mask.div.ps.128" || // Added in 4.0
157 Name == "avx512.mask.div.ps.256" || // Added in 4.0
158 Name == "avx512.mask.mul.pd.128" || // Added in 4.0
159 Name == "avx512.mask.mul.pd.256" || // Added in 4.0
160 Name == "avx512.mask.mul.ps.128" || // Added in 4.0
161 Name == "avx512.mask.mul.ps.256" || // Added in 4.0
162 Name == "avx512.mask.sub.pd.128" || // Added in 4.0
163 Name == "avx512.mask.sub.pd.256" || // Added in 4.0
164 Name == "avx512.mask.sub.ps.128" || // Added in 4.0
165 Name == "avx512.mask.sub.ps.256" || // Added in 4.0
166 Name == "avx512.mask.max.pd.128" || // Added in 5.0
167 Name == "avx512.mask.max.pd.256" || // Added in 5.0
168 Name == "avx512.mask.max.ps.128" || // Added in 5.0
169 Name == "avx512.mask.max.ps.256" || // Added in 5.0
170 Name == "avx512.mask.min.pd.128" || // Added in 5.0
171 Name == "avx512.mask.min.pd.256" || // Added in 5.0
172 Name == "avx512.mask.min.ps.128" || // Added in 5.0
173 Name == "avx512.mask.min.ps.256" || // Added in 5.0
174 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
175 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
176 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
177 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
178 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
179 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
180 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
181 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
182 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
183 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
184 Name.startswith("avx512.mask.pslli") || // Added in 4.0
185 Name.startswith("avx512.mask.psrai") || // Added in 4.0
186 Name.startswith("avx512.mask.psrli") || // Added in 4.0
187 Name.startswith("avx512.mask.psllv") || // Added in 4.0
188 Name.startswith("avx512.mask.psrav") || // Added in 4.0
189 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
190 Name.startswith("sse41.pmovsx") || // Added in 3.8
191 Name.startswith("sse41.pmovzx") || // Added in 3.9
192 Name.startswith("avx2.pmovsx") || // Added in 3.9
193 Name.startswith("avx2.pmovzx") || // Added in 3.9
194 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
195 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
196 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
197 Name == "sse2.cvtdq2pd" || // Added in 3.9
198 Name == "sse2.cvtps2pd" || // Added in 3.9
199 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
200 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
201 Name.startswith("avx.vinsertf128.") || // Added in 3.7
202 Name == "avx2.vinserti128" || // Added in 3.7
203 Name.startswith("avx512.mask.insert") || // Added in 4.0
204 Name.startswith("avx.vextractf128.") || // Added in 3.7
205 Name == "avx2.vextracti128" || // Added in 3.7
206 Name.startswith("avx512.mask.vextract") || // Added in 4.0
207 Name.startswith("sse4a.movnt.") || // Added in 3.9
208 Name.startswith("avx.movnt.") || // Added in 3.2
209 Name.startswith("avx512.storent.") || // Added in 3.9
210 Name == "sse41.movntdqa" || // Added in 5.0
211 Name == "avx2.movntdqa" || // Added in 5.0
212 Name == "avx512.movntdqa" || // Added in 5.0
213 Name == "sse2.storel.dq" || // Added in 3.9
214 Name.startswith("sse.storeu.") || // Added in 3.9
215 Name.startswith("sse2.storeu.") || // Added in 3.9
216 Name.startswith("avx.storeu.") || // Added in 3.9
217 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
218 Name.startswith("avx512.mask.store.p") || // Added in 3.9
219 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
220 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
221 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
222 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
223 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
224 Name.startswith("avx512.mask.load.") || // Added in 3.9
225 Name == "sse42.crc32.64.8" || // Added in 3.4
226 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
227 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
228 Name.startswith("avx512.mask.valign.") || // Added in 4.0
229 Name.startswith("sse2.psll.dq") || // Added in 3.7
230 Name.startswith("sse2.psrl.dq") || // Added in 3.7
231 Name.startswith("avx2.psll.dq") || // Added in 3.7
232 Name.startswith("avx2.psrl.dq") || // Added in 3.7
233 Name.startswith("avx512.psll.dq") || // Added in 3.9
234 Name.startswith("avx512.psrl.dq") || // Added in 3.9
235 Name == "sse41.pblendw" || // Added in 3.7
236 Name.startswith("sse41.blendp") || // Added in 3.7
237 Name.startswith("avx.blend.p") || // Added in 3.7
238 Name == "avx2.pblendw" || // Added in 3.7
239 Name.startswith("avx2.pblendd.") || // Added in 3.7
240 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
241 Name == "avx2.vbroadcasti128" || // Added in 3.7
242 Name == "xop.vpcmov" || // Added in 3.8
243 Name == "xop.vpcmov.256" || // Added in 5.0
244 Name.startswith("avx512.mask.move.s") || // Added in 4.0
245 Name.startswith("avx512.cvtmask2") || // Added in 5.0
246 (Name.startswith("xop.vpcom") && // Added in 3.2
247 F->arg_size() == 2))
248 return true;
249
250 return false;
251}
252
253static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
254 Function *&NewFn) {
255 // Only handle intrinsics that start with "x86.".
256 if (!Name.startswith("x86."))
257 return false;
258 // Remove "x86." prefix.
259 Name = Name.substr(4);
260
261 if (ShouldUpgradeX86Intrinsic(F, Name)) {
262 NewFn = nullptr;
263 return true;
264 }
265
266 // SSE4.1 ptest functions may have an old signature.
267 if (Name.startswith("sse41.ptest")) { // Added in 3.2
268 if (Name.substr(11) == "c")
269 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
270 if (Name.substr(11) == "z")
271 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
272 if (Name.substr(11) == "nzc")
273 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
274 }
275 // Several blend and other instructions with masks used the wrong number of
276 // bits.
277 if (Name == "sse41.insertps") // Added in 3.6
278 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
279 NewFn);
280 if (Name == "sse41.dppd") // Added in 3.6
281 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
282 NewFn);
283 if (Name == "sse41.dpps") // Added in 3.6
284 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
285 NewFn);
286 if (Name == "sse41.mpsadbw") // Added in 3.6
287 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
288 NewFn);
289 if (Name == "avx.dp.ps.256") // Added in 3.6
290 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
291 NewFn);
292 if (Name == "avx2.mpsadbw") // Added in 3.6
293 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
294 NewFn);
295
296 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
297 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
298 rename(F);
299 NewFn = Intrinsic::getDeclaration(F->getParent(),
300 Intrinsic::x86_xop_vfrcz_ss);
301 return true;
302 }
303 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
304 rename(F);
305 NewFn = Intrinsic::getDeclaration(F->getParent(),
306 Intrinsic::x86_xop_vfrcz_sd);
307 return true;
308 }
309 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
310 if (Name.startswith("xop.vpermil2")) { // Added in 3.9
311 auto Idx = F->getFunctionType()->getParamType(2);
312 if (Idx->isFPOrFPVectorTy()) {
313 rename(F);
314 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
315 unsigned EltSize = Idx->getScalarSizeInBits();
316 Intrinsic::ID Permil2ID;
317 if (EltSize == 64 && IdxSize == 128)
318 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
319 else if (EltSize == 32 && IdxSize == 128)
320 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
321 else if (EltSize == 64 && IdxSize == 256)
322 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
323 else
324 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
325 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
326 return true;
327 }
328 }
329
330 return false;
331}
332
333static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
334 assert(F && "Illegal to upgrade a non-existent Function.");
335
336 // Quickly eliminate it, if it's not a candidate.
337 StringRef Name = F->getName();
338 if (Name.size() <= 8 || !Name.startswith("llvm."))
339 return false;
340 Name = Name.substr(5); // Strip off "llvm."
341
342 switch (Name[0]) {
343 default: break;
344 case 'a': {
345 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
346 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
347 F->arg_begin()->getType());
348 return true;
349 }
350 if (Name.startswith("arm.neon.vclz")) {
351 Type* args[2] = {
352 F->arg_begin()->getType(),
353 Type::getInt1Ty(F->getContext())
354 };
355 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
356 // the end of the name. Change name from llvm.arm.neon.vclz.* to
357 // llvm.ctlz.*
358 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
359 NewFn = Function::Create(fType, F->getLinkage(),
360 "llvm.ctlz." + Name.substr(14), F->getParent());
361 return true;
362 }
363 if (Name.startswith("arm.neon.vcnt")) {
364 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
365 F->arg_begin()->getType());
366 return true;
367 }
368 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
369 if (vldRegex.match(Name)) {
370 auto fArgs = F->getFunctionType()->params();
371 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
372 // Can't use Intrinsic::getDeclaration here as the return types might
373 // then only be structurally equal.
374 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
375 NewFn = Function::Create(fType, F->getLinkage(),
376 "llvm." + Name + ".p0i8", F->getParent());
377 return true;
378 }
379 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
380 if (vstRegex.match(Name)) {
381 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
382 Intrinsic::arm_neon_vst2,
383 Intrinsic::arm_neon_vst3,
384 Intrinsic::arm_neon_vst4};
385
386 static const Intrinsic::ID StoreLaneInts[] = {
387 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
388 Intrinsic::arm_neon_vst4lane
389 };
390
391 auto fArgs = F->getFunctionType()->params();
392 Type *Tys[] = {fArgs[0], fArgs[1]};
393 if (Name.find("lane") == StringRef::npos)
394 NewFn = Intrinsic::getDeclaration(F->getParent(),
395 StoreInts[fArgs.size() - 3], Tys);
396 else
397 NewFn = Intrinsic::getDeclaration(F->getParent(),
398 StoreLaneInts[fArgs.size() - 5], Tys);
399 return true;
400 }
401 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
402 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
403 return true;
404 }
405 break;
406 }
407
408 case 'c': {
409 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
410 rename(F);
411 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
412 F->arg_begin()->getType());
413 return true;
414 }
415 if (Name.startswith("cttz.") && F->arg_size() == 1) {
416 rename(F);
417 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
418 F->arg_begin()->getType());
419 return true;
420 }
421 break;
422 }
423 case 'i':
424 case 'l': {
425 bool IsLifetimeStart = Name.startswith("lifetime.start");
426 if (IsLifetimeStart || Name.startswith("invariant.start")) {
427 Intrinsic::ID ID = IsLifetimeStart ?
428 Intrinsic::lifetime_start : Intrinsic::invariant_start;
429 auto Args = F->getFunctionType()->params();
430 Type* ObjectPtr[1] = {Args[1]};
431 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
432 rename(F);
433 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
434 return true;
435 }
436 }
437
438 bool IsLifetimeEnd = Name.startswith("lifetime.end");
439 if (IsLifetimeEnd || Name.startswith("invariant.end")) {
440 Intrinsic::ID ID = IsLifetimeEnd ?
441 Intrinsic::lifetime_end : Intrinsic::invariant_end;
442
443 auto Args = F->getFunctionType()->params();
444 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
445 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
446 rename(F);
447 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
448 return true;
449 }
450 }
451 break;
452 }
453 case 'm': {
454 if (Name.startswith("masked.load.")) {
455 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
456 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
457 rename(F);
458 NewFn = Intrinsic::getDeclaration(F->getParent(),
459 Intrinsic::masked_load,
460 Tys);
461 return true;
462 }
463 }
464 if (Name.startswith("masked.store.")) {
465 auto Args = F->getFunctionType()->params();
466 Type *Tys[] = { Args[0], Args[1] };
467 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
468 rename(F);
469 NewFn = Intrinsic::getDeclaration(F->getParent(),
470 Intrinsic::masked_store,
471 Tys);
472 return true;
473 }
474 }
475 // Renaming gather/scatter intrinsics with no address space overloading
476 // to the new overload which includes an address space
477 if (Name.startswith("masked.gather.")) {
478 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
479 if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
480 rename(F);
481 NewFn = Intrinsic::getDeclaration(F->getParent(),
482 Intrinsic::masked_gather, Tys);
483 return true;
484 }
485 }
486 if (Name.startswith("masked.scatter.")) {
487 auto Args = F->getFunctionType()->params();
488 Type *Tys[] = {Args[0], Args[1]};
489 if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
490 rename(F);
491 NewFn = Intrinsic::getDeclaration(F->getParent(),
492 Intrinsic::masked_scatter, Tys);
493 return true;
494 }
495 }
496 break;
497 }
498 case 'n': {
499 if (Name.startswith("nvvm.")) {
500 Name = Name.substr(5);
501
502 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
503 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
504 .Cases("brev32", "brev64", Intrinsic::bitreverse)
505 .Case("clz.i", Intrinsic::ctlz)
506 .Case("popc.i", Intrinsic::ctpop)
507 .Default(Intrinsic::not_intrinsic);
508 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
509 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
510 {F->getReturnType()});
511 return true;
512 }
513
514 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
515 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
516 //
517 // TODO: We could add lohi.i2d.
518 bool Expand = StringSwitch<bool>(Name)
519 .Cases("abs.i", "abs.ll", true)
520 .Cases("clz.ll", "popc.ll", "h2f", true)
521 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
522 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
523 .Default(false);
524 if (Expand) {
525 NewFn = nullptr;
526 return true;
527 }
528 }
529 break;
530 }
531 case 'o':
532 // We only need to change the name to match the mangling including the
533 // address space.
534 if (Name.startswith("objectsize.")) {
535 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
536 if (F->arg_size() == 2 ||
537 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
538 rename(F);
539 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
540 Tys);
541 return true;
542 }
543 }
544 break;
545
546 case 's':
547 if (Name == "stackprotectorcheck") {
548 NewFn = nullptr;
549 return true;
550 }
551 break;
552
553 case 'x':
554 if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
555 return true;
556 }
557 // Remangle our intrinsic since we upgrade the mangling
558 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
559 if (Result != None) {
560 NewFn = Result.getValue();
561 return true;
562 }
563
564 // This may not belong here. This function is effectively being overloaded
565 // to both detect an intrinsic which needs upgrading, and to provide the
566 // upgraded form of the intrinsic. We should perhaps have two separate
567 // functions for this.
568 return false;
569}
570
571bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
572 NewFn = nullptr;
573 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
574 assert(F != NewFn && "Intrinsic function upgraded to the same function");
575
576 // Upgrade intrinsic attributes. This does not change the function.
577 if (NewFn)
578 F = NewFn;
579 if (Intrinsic::ID id = F->getIntrinsicID())
580 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
581 return Upgraded;
582}
583
584bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
585 // Nothing to do yet.
586 return false;
587}
588
589// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
590// to byte shuffles.
591static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
592 Value *Op, unsigned Shift) {
593 Type *ResultTy = Op->getType();
594 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
595
596 // Bitcast from a 64-bit element type to a byte element type.
597 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
598 Op = Builder.CreateBitCast(Op, VecTy, "cast");
599
600 // We'll be shuffling in zeroes.
601 Value *Res = Constant::getNullValue(VecTy);
602
603 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
604 // we'll just return the zero vector.
605 if (Shift < 16) {
606 uint32_t Idxs[64];
607 // 256/512-bit version is split into 2/4 16-byte lanes.
608 for (unsigned l = 0; l != NumElts; l += 16)
609 for (unsigned i = 0; i != 16; ++i) {
610 unsigned Idx = NumElts + i - Shift;
611 if (Idx < NumElts)
612 Idx -= NumElts - 16; // end of lane, switch operand.
613 Idxs[l + i] = Idx + l;
614 }
615
616 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
617 }
618
619 // Bitcast back to a 64-bit element type.
620 return Builder.CreateBitCast(Res, ResultTy, "cast");
621}
622
623// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
624// to byte shuffles.
625static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
626 unsigned Shift) {
627 Type *ResultTy = Op->getType();
628 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
629
630 // Bitcast from a 64-bit element type to a byte element type.
631 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
632 Op = Builder.CreateBitCast(Op, VecTy, "cast");
633
634 // We'll be shuffling in zeroes.
635 Value *Res = Constant::getNullValue(VecTy);
636
637 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
638 // we'll just return the zero vector.
639 if (Shift < 16) {
640 uint32_t Idxs[64];
641 // 256/512-bit version is split into 2/4 16-byte lanes.
642 for (unsigned l = 0; l != NumElts; l += 16)
643 for (unsigned i = 0; i != 16; ++i) {
644 unsigned Idx = i + Shift;
645 if (Idx >= 16)
646 Idx += NumElts - 16; // end of lane, switch operand.
647 Idxs[l + i] = Idx + l;
648 }
649
650 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
651 }
652
653 // Bitcast back to a 64-bit element type.
654 return Builder.CreateBitCast(Res, ResultTy, "cast");
655}
656
657static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
658 unsigned NumElts) {
659 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
660 cast<IntegerType>(Mask->getType())->getBitWidth());
661 Mask = Builder.CreateBitCast(Mask, MaskTy);
662
663 // If we have less than 8 elements, then the starting mask was an i8 and
664 // we need to extract down to the right number of elements.
665 if (NumElts < 8) {
666 uint32_t Indices[4];
667 for (unsigned i = 0; i != NumElts; ++i)
668 Indices[i] = i;
669 Mask = Builder.CreateShuffleVector(Mask, Mask,
670 makeArrayRef(Indices, NumElts),
671 "extract");
672 }
673
674 return Mask;
675}
676
677static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
678 Value *Op0, Value *Op1) {
679 // If the mask is all ones just emit the align operation.
680 if (const auto *C = dyn_cast<Constant>(Mask))
681 if (C->isAllOnesValue())
682 return Op0;
683
684 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
685 return Builder.CreateSelect(Mask, Op0, Op1);
686}
687
688// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
689// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
690// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
691static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
692 Value *Op1, Value *Shift,
693 Value *Passthru, Value *Mask,
694 bool IsVALIGN) {
695 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
696
697 unsigned NumElts = Op0->getType()->getVectorNumElements();
698 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
699 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
700 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
701
702 // Mask the immediate for VALIGN.
703 if (IsVALIGN)
704 ShiftVal &= (NumElts - 1);
705
706 // If palignr is shifting the pair of vectors more than the size of two
707 // lanes, emit zero.
708 if (ShiftVal >= 32)
709 return llvm::Constant::getNullValue(Op0->getType());
710
711 // If palignr is shifting the pair of input vectors more than one lane,
712 // but less than two lanes, convert to shifting in zeroes.
713 if (ShiftVal > 16) {
714 ShiftVal -= 16;
715 Op1 = Op0;
716 Op0 = llvm::Constant::getNullValue(Op0->getType());
717 }
718
719 uint32_t Indices[64];
720 // 256-bit palignr operates on 128-bit lanes so we need to handle that
721 for (unsigned l = 0; l < NumElts; l += 16) {
722 for (unsigned i = 0; i != 16; ++i) {
723 unsigned Idx = ShiftVal + i;
724 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
725 Idx += NumElts - 16; // End of lane, switch operand.
726 Indices[l + i] = Idx + l;
727 }
728 }
729
730 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
731 makeArrayRef(Indices, NumElts),
732 "palignr");
733
734 return EmitX86Select(Builder, Mask, Align, Passthru);
735}
736
737static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
738 Value *Ptr, Value *Data, Value *Mask,
739 bool Aligned) {
740 // Cast the pointer to the right type.
741 Ptr = Builder.CreateBitCast(Ptr,
742 llvm::PointerType::getUnqual(Data->getType()));
743 unsigned Align =
744 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
745
746 // If the mask is all ones just emit a regular store.
747 if (const auto *C = dyn_cast<Constant>(Mask))
748 if (C->isAllOnesValue())
749 return Builder.CreateAlignedStore(Data, Ptr, Align);
750
751 // Convert the mask from an integer type to a vector of i1.
752 unsigned NumElts = Data->getType()->getVectorNumElements();
753 Mask = getX86MaskVec(Builder, Mask, NumElts);
754 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
755}
756
757static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
758 Value *Ptr, Value *Passthru, Value *Mask,
759 bool Aligned) {
760 // Cast the pointer to the right type.
761 Ptr = Builder.CreateBitCast(Ptr,
762 llvm::PointerType::getUnqual(Passthru->getType()));
763 unsigned Align =
764 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
765
766 // If the mask is all ones just emit a regular store.
767 if (const auto *C = dyn_cast<Constant>(Mask))
768 if (C->isAllOnesValue())
769 return Builder.CreateAlignedLoad(Ptr, Align);
770
771 // Convert the mask from an integer type to a vector of i1.
772 unsigned NumElts = Passthru->getType()->getVectorNumElements();
773 Mask = getX86MaskVec(Builder, Mask, NumElts);
774 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
775}
776
777static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
778 ICmpInst::Predicate Pred) {
779 Value *Op0 = CI.getArgOperand(0);
780 Value *Op1 = CI.getArgOperand(1);
781 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
782 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
783
784 if (CI.getNumArgOperands() == 4)
785 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
786
787 return Res;
788}
789
790static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
791 unsigned CC, bool Signed) {
792 Value *Op0 = CI.getArgOperand(0);
793 unsigned NumElts = Op0->getType()->getVectorNumElements();
794
795 Value *Cmp;
796 if (CC == 3) {
797 Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
798 } else if (CC == 7) {
799 Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
800 } else {
801 ICmpInst::Predicate Pred;
802 switch (CC) {
803 default: llvm_unreachable("Unknown condition code");
804 case 0: Pred = ICmpInst::ICMP_EQ; break;
805 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
806 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
807 case 4: Pred = ICmpInst::ICMP_NE; break;
808 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
809 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
810 }
811 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
812 }
813
814 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
815 const auto *C = dyn_cast<Constant>(Mask);
816 if (!C || !C->isAllOnesValue())
817 Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
818
819 if (NumElts < 8) {
820 uint32_t Indices[8];
821 for (unsigned i = 0; i != NumElts; ++i)
822 Indices[i] = i;
823 for (unsigned i = NumElts; i != 8; ++i)
824 Indices[i] = NumElts + i % NumElts;
825 Cmp = Builder.CreateShuffleVector(Cmp,
826 Constant::getNullValue(Cmp->getType()),
827 Indices);
828 }
829 return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
830 std::max(NumElts, 8U)));
831}
832
833// Replace a masked intrinsic with an older unmasked intrinsic.
834static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
835 Intrinsic::ID IID) {
836 Function *F = CI.getCalledFunction();
837 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
838 Value *Rep = Builder.CreateCall(Intrin,
839 { CI.getArgOperand(0), CI.getArgOperand(1) });
840 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
841}
842
843static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
844 Value* A = CI.getArgOperand(0);
845 Value* B = CI.getArgOperand(1);
846 Value* Src = CI.getArgOperand(2);
847 Value* Mask = CI.getArgOperand(3);
848
849 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
850 Value* Cmp = Builder.CreateIsNotNull(AndNode);
851 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
852 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
853 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
854 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
855}
856
857
858static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
859 Value* Op = CI.getArgOperand(0);
860 Type* ReturnOp = CI.getType();
861 unsigned NumElts = CI.getType()->getVectorNumElements();
862 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
863 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
864}
865
866/// Upgrade a call to an old intrinsic. All argument and return casting must be
867/// provided to seamlessly integrate with existing context.
868void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
869 Function *F = CI->getCalledFunction();
870 LLVMContext &C = CI->getContext();
871 IRBuilder<> Builder(C);
872 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
873
874 assert(F && "Intrinsic call is not direct?");
875
876 if (!NewFn) {
877 // Get the Function's name.
878 StringRef Name = F->getName();
879
880 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
881 Name = Name.substr(5);
882
883 bool IsX86 = Name.startswith("x86.");
884 if (IsX86)
885 Name = Name.substr(4);
886 bool IsNVVM = Name.startswith("nvvm.");
887 if (IsNVVM)
888 Name = Name.substr(5);
889
890 if (IsX86 && Name.startswith("sse4a.movnt.")) {
891 Module *M = F->getParent();
892 SmallVector<Metadata *, 1> Elts;
893 Elts.push_back(
894 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
895 MDNode *Node = MDNode::get(C, Elts);
896
897 Value *Arg0 = CI->getArgOperand(0);
898 Value *Arg1 = CI->getArgOperand(1);
899
900 // Nontemporal (unaligned) store of the 0'th element of the float/double
901 // vector.
902 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
903 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
904 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
905 Value *Extract =
906 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
907
908 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
909 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
910
911 // Remove intrinsic.
912 CI->eraseFromParent();
913 return;
914 }
915
916 if (IsX86 && (Name.startswith("avx.movnt.") ||
917 Name.startswith("avx512.storent."))) {
918 Module *M = F->getParent();
919 SmallVector<Metadata *, 1> Elts;
920 Elts.push_back(
921 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
922 MDNode *Node = MDNode::get(C, Elts);
923
924 Value *Arg0 = CI->getArgOperand(0);
925 Value *Arg1 = CI->getArgOperand(1);
926
927 // Convert the type of the pointer to a pointer to the stored type.
928 Value *BC = Builder.CreateBitCast(Arg0,
929 PointerType::getUnqual(Arg1->getType()),
930 "cast");
931 VectorType *VTy = cast<VectorType>(Arg1->getType());
932 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
933 VTy->getBitWidth() / 8);
934 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
935
936 // Remove intrinsic.
937 CI->eraseFromParent();
938 return;
939 }
940
941 if (IsX86 && Name == "sse2.storel.dq") {
942 Value *Arg0 = CI->getArgOperand(0);
943 Value *Arg1 = CI->getArgOperand(1);
944
945 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
946 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
947 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
948 Value *BC = Builder.CreateBitCast(Arg0,
949 PointerType::getUnqual(Elt->getType()),
950 "cast");
951 Builder.CreateAlignedStore(Elt, BC, 1);
952
953 // Remove intrinsic.
954 CI->eraseFromParent();
955 return;
956 }
957
958 if (IsX86 && (Name.startswith("sse.storeu.") ||
959 Name.startswith("sse2.storeu.") ||
960 Name.startswith("avx.storeu."))) {
961 Value *Arg0 = CI->getArgOperand(0);
962 Value *Arg1 = CI->getArgOperand(1);
963
964 Arg0 = Builder.CreateBitCast(Arg0,
965 PointerType::getUnqual(Arg1->getType()),
966 "cast");
967 Builder.CreateAlignedStore(Arg1, Arg0, 1);
968
969 // Remove intrinsic.
970 CI->eraseFromParent();
971 return;
972 }
973
974 if (IsX86 && (Name.startswith("avx512.mask.store"))) {
975 // "avx512.mask.storeu." or "avx512.mask.store."
976 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
977 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
978 CI->getArgOperand(2), Aligned);
979
980 // Remove intrinsic.
981 CI->eraseFromParent();
982 return;
983 }
984
985 Value *Rep;
986 // Upgrade packed integer vector compare intrinsics to compare instructions.
987 if (IsX86 && (Name.startswith("sse2.pcmp") ||
988 Name.startswith("avx2.pcmp"))) {
989 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
990 bool CmpEq = Name[9] == 'e';
991 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
992 CI->getArgOperand(0), CI->getArgOperand(1));
993 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
994 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
995 Type *I32Ty = Type::getInt32Ty(C);
996 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
997 ConstantInt::get(I32Ty, 0));
998 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
999 ConstantInt::get(I32Ty, 0));
1000 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1001 Builder.CreateFAdd(Elt0, Elt1),
1002 ConstantInt::get(I32Ty, 0));
1003 } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
1004 Type *I32Ty = Type::getInt32Ty(C);
1005 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1006 ConstantInt::get(I32Ty, 0));
1007 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1008 ConstantInt::get(I32Ty, 0));
1009 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1010 Builder.CreateFSub(Elt0, Elt1),
1011 ConstantInt::get(I32Ty, 0));
1012 } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
1013 Type *I32Ty = Type::getInt32Ty(C);
1014 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1015 ConstantInt::get(I32Ty, 0));
1016 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1017 ConstantInt::get(I32Ty, 0));
1018 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1019 Builder.CreateFMul(Elt0, Elt1),
1020 ConstantInt::get(I32Ty, 0));
1021 } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1022 Type *I32Ty = Type::getInt32Ty(C);
1023 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1024 ConstantInt::get(I32Ty, 0));
1025 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1026 ConstantInt::get(I32Ty, 0));
1027 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1028 Builder.CreateFDiv(Elt0, Elt1),
1029 ConstantInt::get(I32Ty, 0));
1030 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1031 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1032 bool CmpEq = Name[16] == 'e';
1033 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1034 } else if (IsX86 && Name.startswith("avx512.mask.cmp")) {
1035 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1036 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
1037 } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) {
1038 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1039 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1040 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1041 Name == "sse2.pmaxs.w" ||
1042 Name == "sse41.pmaxsd" ||
1043 Name.startswith("avx2.pmaxs") ||
1044 Name.startswith("avx512.mask.pmaxs"))) {
1045 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
1046 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
1047 Name == "sse41.pmaxuw" ||
1048 Name == "sse41.pmaxud" ||
1049 Name.startswith("avx2.pmaxu") ||
1050 Name.startswith("avx512.mask.pmaxu"))) {
1051 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1052 } else if (IsX86 && (Name == "sse41.pminsb" ||
1053 Name == "sse2.pmins.w" ||
1054 Name == "sse41.pminsd" ||
1055 Name.startswith("avx2.pmins") ||
1056 Name.startswith("avx512.mask.pmins"))) {
1057 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1058 } else if (IsX86 && (Name == "sse2.pminu.b" ||
1059 Name == "sse41.pminuw" ||
1060 Name == "sse41.pminud" ||
1061 Name.startswith("avx2.pminu") ||
1062 Name.startswith("avx512.mask.pminu"))) {
1063 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1064 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
1065 Name == "sse2.cvtps2pd" ||
1066 Name == "avx.cvtdq2.pd.256" ||
1067 Name == "avx.cvt.ps2.pd.256" ||
1068 Name.startswith("avx512.mask.cvtdq2pd.") ||
1069 Name.startswith("avx512.mask.cvtudq2pd."))) {
1070 // Lossless i32/float to double conversion.
1071 // Extract the bottom elements if necessary and convert to double vector.
1072 Value *Src = CI->getArgOperand(0);
1073 VectorType *SrcTy = cast<VectorType>(Src->getType());
1074 VectorType *DstTy = cast<VectorType>(CI->getType());
1075 Rep = CI->getArgOperand(0);
1076
1077 unsigned NumDstElts = DstTy->getNumElements();
1078 if (NumDstElts < SrcTy->getNumElements()) {
1079 assert(NumDstElts == 2 && "Unexpected vector size");
1080 uint32_t ShuffleMask[2] = { 0, 1 };
1081 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
1082 ShuffleMask);
1083 }
1084
1085 bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
1086 bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
1087 if (SInt2Double)
1088 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
1089 else if (UInt2Double)
1090 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
1091 else
1092 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
1093
1094 if (CI->getNumArgOperands() == 3)
1095 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1096 CI->getArgOperand(1));
1097 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
1098 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1099 CI->getArgOperand(1), CI->getArgOperand(2),
1100 /*Aligned*/false);
1101 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
1102 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1103 CI->getArgOperand(1),CI->getArgOperand(2),
1104 /*Aligned*/true);
1105 } else if (IsX86 && Name.startswith("xop.vpcom")) {
1106 Intrinsic::ID intID;
1107 if (Name.endswith("ub"))
1108 intID = Intrinsic::x86_xop_vpcomub;
1109 else if (Name.endswith("uw"))
1110 intID = Intrinsic::x86_xop_vpcomuw;
1111 else if (Name.endswith("ud"))
1112 intID = Intrinsic::x86_xop_vpcomud;
1113 else if (Name.endswith("uq"))
1114 intID = Intrinsic::x86_xop_vpcomuq;
1115 else if (Name.endswith("b"))
1116 intID = Intrinsic::x86_xop_vpcomb;
1117 else if (Name.endswith("w"))
1118 intID = Intrinsic::x86_xop_vpcomw;
1119 else if (Name.endswith("d"))
1120 intID = Intrinsic::x86_xop_vpcomd;
1121 else if (Name.endswith("q"))
1122 intID = Intrinsic::x86_xop_vpcomq;
1123 else
1124 llvm_unreachable("Unknown suffix");
1125
1126 Name = Name.substr(9); // strip off "xop.vpcom"
1127 unsigned Imm;
1128 if (Name.startswith("lt"))
1129 Imm = 0;
1130 else if (Name.startswith("le"))
1131 Imm = 1;
1132 else if (Name.startswith("gt"))
1133 Imm = 2;
1134 else if (Name.startswith("ge"))
1135 Imm = 3;
1136 else if (Name.startswith("eq"))
1137 Imm = 4;
1138 else if (Name.startswith("ne"))
1139 Imm = 5;
1140 else if (Name.startswith("false"))
1141 Imm = 6;
1142 else if (Name.startswith("true"))
1143 Imm = 7;
1144 else
1145 llvm_unreachable("Unknown condition");
1146
1147 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
1148 Rep =
1149 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
1150 Builder.getInt8(Imm)});
1151 } else if (IsX86 && Name.startswith("xop.vpcmov")) {
1152 Value *Sel = CI->getArgOperand(2);
1153 Value *NotSel = Builder.CreateNot(Sel);
1154 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
1155 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
1156 Rep = Builder.CreateOr(Sel0, Sel1);
1157 } else if (IsX86 && Name == "sse42.crc32.64.8") {
1158 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
1159 Intrinsic::x86_sse42_crc32_32_8);
1160 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
1161 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
1162 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
1163 } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
1164 // Replace broadcasts with a series of insertelements.
1165 Type *VecTy = CI->getType();
1166 Type *EltTy = VecTy->getVectorElementType();
1167 unsigned EltNum = VecTy->getVectorNumElements();
1168 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
1169 EltTy->getPointerTo());
1170 Value *Load = Builder.CreateLoad(EltTy, Cast);
1171 Type *I32Ty = Type::getInt32Ty(C);
1172 Rep = UndefValue::get(VecTy);
1173 for (unsigned I = 0; I < EltNum; ++I)
1174 Rep = Builder.CreateInsertElement(Rep, Load,
1175 ConstantInt::get(I32Ty, I));
1176 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
1177 Name.startswith("sse41.pmovzx") ||
1178 Name.startswith("avx2.pmovsx") ||
1179 Name.startswith("avx2.pmovzx") ||
1180 Name.startswith("avx512.mask.pmovsx") ||
1181 Name.startswith("avx512.mask.pmovzx"))) {
1182 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
1183 VectorType *DstTy = cast<VectorType>(CI->getType());
1184 unsigned NumDstElts = DstTy->getNumElements();
1185
1186 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
1187 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
1188 for (unsigned i = 0; i != NumDstElts; ++i)
1189 ShuffleMask[i] = i;
1190
1191 Value *SV = Builder.CreateShuffleVector(
1192 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
1193
1194 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
1195 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
1196 : Builder.CreateZExt(SV, DstTy);
1197 // If there are 3 arguments, it's a masked intrinsic so we need a select.
1198 if (CI->getNumArgOperands() == 3)
1199 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1200 CI->getArgOperand(1));
1201 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
1202 Name == "avx2.vbroadcasti128")) {
1203 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
1204 Type *EltTy = CI->getType()->getVectorElementType();
1205 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
1206 Type *VT = VectorType::get(EltTy, NumSrcElts);
1207 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
1208 PointerType::getUnqual(VT));
1209 Value *Load = Builder.CreateAlignedLoad(Op, 1);
1210 if (NumSrcElts == 2)
1211 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1212 { 0, 1, 0, 1 });
1213 else
1214 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1215 { 0, 1, 2, 3, 0, 1, 2, 3 });
1216 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
1217 Name.startswith("avx2.vbroadcast") ||
1218 Name.startswith("avx512.pbroadcast") ||
1219 Name.startswith("avx512.mask.broadcast.s"))) {
1220 // Replace vp?broadcasts with a vector shuffle.
1221 Value *Op = CI->getArgOperand(0);
1222 unsigned NumElts = CI->getType()->getVectorNumElements();
1223 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
1224 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
1225 Constant::getNullValue(MaskTy));
1226
1227 if (CI->getNumArgOperands() == 3)
1228 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1229 CI->getArgOperand(1));
1230 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
1231 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1232 CI->getArgOperand(1),
1233 CI->getArgOperand(2),
1234 CI->getArgOperand(3),
1235 CI->getArgOperand(4),
1236 false);
1237 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
1238 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1239 CI->getArgOperand(1),
1240 CI->getArgOperand(2),
1241 CI->getArgOperand(3),
1242 CI->getArgOperand(4),
1243 true);
1244 } else if (IsX86 && (Name == "sse2.psll.dq" ||
1245 Name == "avx2.psll.dq")) {
1246 // 128/256-bit shift left specified in bits.
1247 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1248 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
1249 Shift / 8); // Shift is in bits.
1250 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
1251 Name == "avx2.psrl.dq")) {
1252 // 128/256-bit shift right specified in bits.
1253 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1254 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
1255 Shift / 8); // Shift is in bits.
1256 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
1257 Name == "avx2.psll.dq.bs" ||
1258 Name == "avx512.psll.dq.512")) {
1259 // 128/256/512-bit shift left specified in bytes.
1260 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1261 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1262 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
1263 Name == "avx2.psrl.dq.bs" ||
1264 Name == "avx512.psrl.dq.512")) {
1265 // 128/256/512-bit shift right specified in bytes.
1266 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1267 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1268 } else if (IsX86 && (Name == "sse41.pblendw" ||
1269 Name.startswith("sse41.blendp") ||
1270 Name.startswith("avx.blend.p") ||
1271 Name == "avx2.pblendw" ||
1272 Name.startswith("avx2.pblendd."))) {
1273 Value *Op0 = CI->getArgOperand(0);
1274 Value *Op1 = CI->getArgOperand(1);
1275 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1276 VectorType *VecTy = cast<VectorType>(CI->getType());
1277 unsigned NumElts = VecTy->getNumElements();
1278
1279 SmallVector<uint32_t, 16> Idxs(NumElts);
1280 for (unsigned i = 0; i != NumElts; ++i)
1281 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
1282
1283 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1284 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
1285 Name == "avx2.vinserti128" ||
1286 Name.startswith("avx512.mask.insert"))) {
1287 Value *Op0 = CI->getArgOperand(0);
1288 Value *Op1 = CI->getArgOperand(1);
1289 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1290 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1291 unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
1292 unsigned Scale = DstNumElts / SrcNumElts;
1293
1294 // Mask off the high bits of the immediate value; hardware ignores those.
1295 Imm = Imm % Scale;
1296
1297 // Extend the second operand into a vector the size of the destination.
1298 Value *UndefV = UndefValue::get(Op1->getType());
1299 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1300 for (unsigned i = 0; i != SrcNumElts; ++i)
1301 Idxs[i] = i;
1302 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
1303 Idxs[i] = SrcNumElts;
1304 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
1305
1306 // Insert the second operand into the first operand.
1307
1308 // Note that there is no guarantee that instruction lowering will actually
1309 // produce a vinsertf128 instruction for the created shuffles. In
1310 // particular, the 0 immediate case involves no lane changes, so it can
1311 // be handled as a blend.
1312
1313 // Example of shuffle mask for 32-bit elements:
1314 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1315 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
1316
1317 // First fill with identify mask.
1318 for (unsigned i = 0; i != DstNumElts; ++i)
1319 Idxs[i] = i;
1320 // Then replace the elements where we need to insert.
1321 for (unsigned i = 0; i != SrcNumElts; ++i)
1322 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
1323 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
1324
1325 // If the intrinsic has a mask operand, handle that.
1326 if (CI->getNumArgOperands() == 5)
1327 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1328 CI->getArgOperand(3));
1329 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
1330 Name == "avx2.vextracti128" ||
1331 Name.startswith("avx512.mask.vextract"))) {
1332 Value *Op0 = CI->getArgOperand(0);
1333 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1334 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1335 unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
1336 unsigned Scale = SrcNumElts / DstNumElts;
1337
1338 // Mask off the high bits of the immediate value; hardware ignores those.
1339 Imm = Imm % Scale;
1340
1341 // Get indexes for the subvector of the input vector.
1342 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1343 for (unsigned i = 0; i != DstNumElts; ++i) {
1344 Idxs[i] = i + (Imm * DstNumElts);
1345 }
1346 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1347
1348 // If the intrinsic has a mask operand, handle that.
1349 if (CI->getNumArgOperands() == 4)
1350 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1351 CI->getArgOperand(2));
1352 } else if (!IsX86 && Name == "stackprotectorcheck") {
1353 Rep = nullptr;
1354 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
1355 Name.startswith("avx512.mask.perm.di."))) {
1356 Value *Op0 = CI->getArgOperand(0);
1357 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1358 VectorType *VecTy = cast<VectorType>(CI->getType());
1359 unsigned NumElts = VecTy->getNumElements();
1360
1361 SmallVector<uint32_t, 8> Idxs(NumElts);
1362 for (unsigned i = 0; i != NumElts; ++i)
1363 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
1364
1365 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1366
1367 if (CI->getNumArgOperands() == 4)
1368 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1369 CI->getArgOperand(2));
1370 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
1371 Name == "sse2.pshuf.d" ||
1372 Name.startswith("avx512.mask.vpermil.p") ||
1373 Name.startswith("avx512.mask.pshuf.d."))) {
1374 Value *Op0 = CI->getArgOperand(0);
1375 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1376 VectorType *VecTy = cast<VectorType>(CI->getType());
1377 unsigned NumElts = VecTy->getNumElements();
1378 // Calculate the size of each index in the immediate.
1379 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
1380 unsigned IdxMask = ((1 << IdxSize) - 1);
1381
1382 SmallVector<uint32_t, 8> Idxs(NumElts);
1383 // Lookup the bits for this element, wrapping around the immediate every
1384 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
1385 // to offset by the first index of each group.
1386 for (unsigned i = 0; i != NumElts; ++i)
1387 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
1388
1389 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1390
1391 if (CI->getNumArgOperands() == 4)
1392 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1393 CI->getArgOperand(2));
1394 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
1395 Name.startswith("avx512.mask.pshufl.w."))) {
1396 Value *Op0 = CI->getArgOperand(0);
1397 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1398 unsigned NumElts = CI->getType()->getVectorNumElements();
1399
1400 SmallVector<uint32_t, 16> Idxs(NumElts);
1401 for (unsigned l = 0; l != NumElts; l += 8) {
1402 for (unsigned i = 0; i != 4; ++i)
1403 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
1404 for (unsigned i = 4; i != 8; ++i)
1405 Idxs[i + l] = i + l;
1406 }
1407
1408 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1409
1410 if (CI->getNumArgOperands() == 4)
1411 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1412 CI->getArgOperand(2));
1413 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
1414 Name.startswith("avx512.mask.pshufh.w."))) {
1415 Value *Op0 = CI->getArgOperand(0);
1416 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1417 unsigned NumElts = CI->getType()->getVectorNumElements();
1418
1419 SmallVector<uint32_t, 16> Idxs(NumElts);
1420 for (unsigned l = 0; l != NumElts; l += 8) {
1421 for (unsigned i = 0; i != 4; ++i)
1422 Idxs[i + l] = i + l;
1423 for (unsigned i = 0; i != 4; ++i)
1424 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
1425 }
1426
1427 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1428
1429 if (CI->getNumArgOperands() == 4)
1430 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1431 CI->getArgOperand(2));
1432 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
1433 Value *Op0 = CI->getArgOperand(0);
1434 Value *Op1 = CI->getArgOperand(1);
1435 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1436 unsigned NumElts = CI->getType()->getVectorNumElements();
1437
1438 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1439 unsigned HalfLaneElts = NumLaneElts / 2;
1440
1441 SmallVector<uint32_t, 16> Idxs(NumElts);
1442 for (unsigned i = 0; i != NumElts; ++i) {
1443 // Base index is the starting element of the lane.
1444 Idxs[i] = i - (i % NumLaneElts);
1445 // If we are half way through the lane switch to the other source.
1446 if ((i % NumLaneElts) >= HalfLaneElts)
1447 Idxs[i] += NumElts;
1448 // Now select the specific element. By adding HalfLaneElts bits from
1449 // the immediate. Wrapping around the immediate every 8-bits.
1450 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
1451 }
1452
1453 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1454
1455 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1456 CI->getArgOperand(3));
1457 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
1458 Name.startswith("avx512.mask.movshdup") ||
1459 Name.startswith("avx512.mask.movsldup"))) {
1460 Value *Op0 = CI->getArgOperand(0);
1461 unsigned NumElts = CI->getType()->getVectorNumElements();
1462 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1463
1464 unsigned Offset = 0;
1465 if (Name.startswith("avx512.mask.movshdup."))
1466 Offset = 1;
1467
1468 SmallVector<uint32_t, 16> Idxs(NumElts);
1469 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
1470 for (unsigned i = 0; i != NumLaneElts; i += 2) {
1471 Idxs[i + l + 0] = i + l + Offset;
1472 Idxs[i + l + 1] = i + l + Offset;
1473 }
1474
1475 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1476
1477 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1478 CI->getArgOperand(1));
1479 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
1480 Name.startswith("avx512.mask.unpckl."))) {
1481 Value *Op0 = CI->getArgOperand(0);
1482 Value *Op1 = CI->getArgOperand(1);
1483 int NumElts = CI->getType()->getVectorNumElements();
1484 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1485
1486 SmallVector<uint32_t, 64> Idxs(NumElts);
1487 for (int l = 0; l != NumElts; l += NumLaneElts)
1488 for (int i = 0; i != NumLaneElts; ++i)
1489 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
1490
1491 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1492
1493 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1494 CI->getArgOperand(2));
1495 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
1496 Name.startswith("avx512.mask.unpckh."))) {
1497 Value *Op0 = CI->getArgOperand(0);
1498 Value *Op1 = CI->getArgOperand(1);
1499 int NumElts = CI->getType()->getVectorNumElements();
1500 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1501
1502 SmallVector<uint32_t, 64> Idxs(NumElts);
1503 for (int l = 0; l != NumElts; l += NumLaneElts)
1504 for (int i = 0; i != NumLaneElts; ++i)
1505 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
1506
1507 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1508
1509 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1510 CI->getArgOperand(2));
1511 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
1512 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
1513 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1514 CI->getArgOperand(2));
1515 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
1516 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
1517 CI->getArgOperand(1));
1518 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1519 CI->getArgOperand(2));
1520 } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
1521 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
1522 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1523 CI->getArgOperand(2));
1524 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
1525 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
1526 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1527 CI->getArgOperand(2));
1528 } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
1529 VectorType *FTy = cast<VectorType>(CI->getType());
1530 VectorType *ITy = VectorType::getInteger(FTy);
1531 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1532 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1533 Rep = Builder.CreateBitCast(Rep, FTy);
1534 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1535 CI->getArgOperand(2));
1536 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
1537 VectorType *FTy = cast<VectorType>(CI->getType());
1538 VectorType *ITy = VectorType::getInteger(FTy);
1539 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
1540 Rep = Builder.CreateAnd(Rep,
1541 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1542 Rep = Builder.CreateBitCast(Rep, FTy);
1543 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1544 CI->getArgOperand(2));
1545 } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
1546 VectorType *FTy = cast<VectorType>(CI->getType());
1547 VectorType *ITy = VectorType::getInteger(FTy);
1548 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1549 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1550 Rep = Builder.CreateBitCast(Rep, FTy);
1551 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1552 CI->getArgOperand(2));
1553 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
1554 VectorType *FTy = cast<VectorType>(CI->getType());
1555 VectorType *ITy = VectorType::getInteger(FTy);
1556 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1557 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1558 Rep = Builder.CreateBitCast(Rep, FTy);
1559 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1560 CI->getArgOperand(2));
1561 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
1562 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1563 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1564 CI->getArgOperand(2));
1565 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
1566 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
1567 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1568 CI->getArgOperand(2));
1569 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
1570 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
1571 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1572 CI->getArgOperand(2));
1573 } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
1574 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1575 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1576 CI->getArgOperand(2));
1577 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
1578 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
1579 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1580 CI->getArgOperand(2));
1581 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
1582 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
1583 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1584 CI->getArgOperand(2));
1585 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
1586 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
1587 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1588 CI->getArgOperand(2));
1589 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
1590 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1591 Intrinsic::ctlz,
1592 CI->getType()),
1593 { CI->getArgOperand(0), Builder.getInt1(false) });
1594 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1595 CI->getArgOperand(1));
1596 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
1597 Name.startswith("avx512.mask.min.p"))) {
1598 bool IsMin = Name[13] == 'i';
1599 VectorType *VecTy = cast<VectorType>(CI->getType());
1600 unsigned VecWidth = VecTy->getPrimitiveSizeInBits();
1601 unsigned EltWidth = VecTy->getScalarSizeInBits();
1602 Intrinsic::ID IID;
1603 if (!IsMin && VecWidth == 128 && EltWidth == 32)
1604 IID = Intrinsic::x86_sse_max_ps;
1605 else if (!IsMin && VecWidth == 128 && EltWidth == 64)
1606 IID = Intrinsic::x86_sse2_max_pd;
1607 else if (!IsMin && VecWidth == 256 && EltWidth == 32)
1608 IID = Intrinsic::x86_avx_max_ps_256;
1609 else if (!IsMin && VecWidth == 256 && EltWidth == 64)
1610 IID = Intrinsic::x86_avx_max_pd_256;
1611 else if (IsMin && VecWidth == 128 && EltWidth == 32)
1612 IID = Intrinsic::x86_sse_min_ps;
1613 else if (IsMin && VecWidth == 128 && EltWidth == 64)
1614 IID = Intrinsic::x86_sse2_min_pd;
1615 else if (IsMin && VecWidth == 256 && EltWidth == 32)
1616 IID = Intrinsic::x86_avx_min_ps_256;
1617 else if (IsMin && VecWidth == 256 && EltWidth == 64)
1618 IID = Intrinsic::x86_avx_min_pd_256;
1619 else
1620 llvm_unreachable("Unexpected intrinsic");
1621
1622 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1623 { CI->getArgOperand(0), CI->getArgOperand(1) });
1624 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1625 CI->getArgOperand(2));
1626 } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
1627 VectorType *VecTy = cast<VectorType>(CI->getType());
1628 Intrinsic::ID IID;
1629 if (VecTy->getPrimitiveSizeInBits() == 128)
1630 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1631 else if (VecTy->getPrimitiveSizeInBits() == 256)
1632 IID = Intrinsic::x86_avx2_pshuf_b;
1633 else if (VecTy->getPrimitiveSizeInBits() == 512)
1634 IID = Intrinsic::x86_avx512_pshuf_b_512;
1635 else
1636 llvm_unreachable("Unexpected intrinsic");
1637
1638 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1639 { CI->getArgOperand(0), CI->getArgOperand(1) });
1640 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1641 CI->getArgOperand(2));
1642 } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") ||
1643 Name.startswith("avx512.mask.pmulu.dq."))) {
1644 bool IsUnsigned = Name[16] == 'u';
1645 VectorType *VecTy = cast<VectorType>(CI->getType());
1646 Intrinsic::ID IID;
1647 if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1648 IID = Intrinsic::x86_sse41_pmuldq;
1649 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1650 IID = Intrinsic::x86_avx2_pmul_dq;
1651 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1652 IID = Intrinsic::x86_avx512_pmul_dq_512;
1653 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1654 IID = Intrinsic::x86_sse2_pmulu_dq;
1655 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1656 IID = Intrinsic::x86_avx2_pmulu_dq;
1657 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1658 IID = Intrinsic::x86_avx512_pmulu_dq_512;
1659 else
1660 llvm_unreachable("Unexpected intrinsic");
1661
1662 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1663 { CI->getArgOperand(0), CI->getArgOperand(1) });
1664 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1665 CI->getArgOperand(2));
1666 } else if (IsX86 && Name.startswith("avx512.mask.pack")) {
1667 bool IsUnsigned = Name[16] == 'u';
1668 bool IsDW = Name[18] == 'd';
1669 VectorType *VecTy = cast<VectorType>(CI->getType());
1670 Intrinsic::ID IID;
1671 if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1672 IID = Intrinsic::x86_sse2_packsswb_128;
1673 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1674 IID = Intrinsic::x86_avx2_packsswb;
1675 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1676 IID = Intrinsic::x86_avx512_packsswb_512;
1677 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1678 IID = Intrinsic::x86_sse2_packssdw_128;
1679 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1680 IID = Intrinsic::x86_avx2_packssdw;
1681 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1682 IID = Intrinsic::x86_avx512_packssdw_512;
1683 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1684 IID = Intrinsic::x86_sse2_packuswb_128;
1685 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1686 IID = Intrinsic::x86_avx2_packuswb;
1687 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1688 IID = Intrinsic::x86_avx512_packuswb_512;
1689 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1690 IID = Intrinsic::x86_sse41_packusdw;
1691 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1692 IID = Intrinsic::x86_avx2_packusdw;
1693 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1694 IID = Intrinsic::x86_avx512_packusdw_512;
1695 else
1696 llvm_unreachable("Unexpected intrinsic");
1697
1698 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1699 { CI->getArgOperand(0), CI->getArgOperand(1) });
1700 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1701 CI->getArgOperand(2));
1702 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
1703 bool IsImmediate = Name[16] == 'i' ||
1704 (Name.size() > 18 && Name[18] == 'i');
1705 bool IsVariable = Name[16] == 'v';
1706 char Size = Name[16] == '.' ? Name[17] :
1707 Name[17] == '.' ? Name[18] :
1708 Name[18] == '.' ? Name[19] :
1709 Name[20];
1710
1711 Intrinsic::ID IID;
1712 if (IsVariable && Name[17] != '.') {
1713 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
1714 IID = Intrinsic::x86_avx2_psllv_q;
1715 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
1716 IID = Intrinsic::x86_avx2_psllv_q_256;
1717 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
1718 IID = Intrinsic::x86_avx2_psllv_d;
1719 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
1720 IID = Intrinsic::x86_avx2_psllv_d_256;
1721 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
1722 IID = Intrinsic::x86_avx512_psllv_w_128;
1723 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
1724 IID = Intrinsic::x86_avx512_psllv_w_256;
1725 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
1726 IID = Intrinsic::x86_avx512_psllv_w_512;
1727 else
1728 llvm_unreachable("Unexpected size");
1729 } else if (Name.endswith(".128")) {
1730 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
1731 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
1732 : Intrinsic::x86_sse2_psll_d;
1733 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
1734 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
1735 : Intrinsic::x86_sse2_psll_q;
1736 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
1737 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
1738 : Intrinsic::x86_sse2_psll_w;
1739 else
1740 llvm_unreachable("Unexpected size");
1741 } else if (Name.endswith(".256")) {
1742 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
1743 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
1744 : Intrinsic::x86_avx2_psll_d;
1745 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
1746 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
1747 : Intrinsic::x86_avx2_psll_q;
1748 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
1749 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
1750 : Intrinsic::x86_avx2_psll_w;
1751 else
1752 llvm_unreachable("Unexpected size");
1753 } else {
1754 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
1755 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
1756 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
1757 Intrinsic::x86_avx512_psll_d_512;
1758 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
1759 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
1760 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
1761 Intrinsic::x86_avx512_psll_q_512;
1762 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
1763 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
1764 : Intrinsic::x86_avx512_psll_w_512;
1765 else
1766 llvm_unreachable("Unexpected size");
1767 }
1768
1769 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1770 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
1771 bool IsImmediate = Name[16] == 'i' ||
1772 (Name.size() > 18 && Name[18] == 'i');
1773 bool IsVariable = Name[16] == 'v';
1774 char Size = Name[16] == '.' ? Name[17] :
1775 Name[17] == '.' ? Name[18] :
1776 Name[18] == '.' ? Name[19] :
1777 Name[20];
1778
1779 Intrinsic::ID IID;
1780 if (IsVariable && Name[17] != '.') {
1781 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
1782 IID = Intrinsic::x86_avx2_psrlv_q;
1783 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
1784 IID = Intrinsic::x86_avx2_psrlv_q_256;
1785 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
1786 IID = Intrinsic::x86_avx2_psrlv_d;
1787 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
1788 IID = Intrinsic::x86_avx2_psrlv_d_256;
1789 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
1790 IID = Intrinsic::x86_avx512_psrlv_w_128;
1791 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
1792 IID = Intrinsic::x86_avx512_psrlv_w_256;
1793 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
1794 IID = Intrinsic::x86_avx512_psrlv_w_512;
1795 else
1796 llvm_unreachable("Unexpected size");
1797 } else if (Name.endswith(".128")) {
1798 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
1799 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
1800 : Intrinsic::x86_sse2_psrl_d;
1801 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
1802 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
1803 : Intrinsic::x86_sse2_psrl_q;
1804 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
1805 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
1806 : Intrinsic::x86_sse2_psrl_w;
1807 else
1808 llvm_unreachable("Unexpected size");
1809 } else if (Name.endswith(".256")) {
1810 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
1811 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
1812 : Intrinsic::x86_avx2_psrl_d;
1813 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
1814 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
1815 : Intrinsic::x86_avx2_psrl_q;
1816 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
1817 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
1818 : Intrinsic::x86_avx2_psrl_w;
1819 else
1820 llvm_unreachable("Unexpected size");
1821 } else {
1822 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
1823 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
1824 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
1825 Intrinsic::x86_avx512_psrl_d_512;
1826 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
1827 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
1828 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
1829 Intrinsic::x86_avx512_psrl_q_512;
1830 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
1831 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
1832 : Intrinsic::x86_avx512_psrl_w_512;
1833 else
1834 llvm_unreachable("Unexpected size");
1835 }
1836
1837 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1838 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
1839 bool IsImmediate = Name[16] == 'i' ||
1840 (Name.size() > 18 && Name[18] == 'i');
1841 bool IsVariable = Name[16] == 'v';
1842 char Size = Name[16] == '.' ? Name[17] :
1843 Name[17] == '.' ? Name[18] :
1844 Name[18] == '.' ? Name[19] :
1845 Name[20];
1846
1847 Intrinsic::ID IID;
1848 if (IsVariable && Name[17] != '.') {
1849 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
1850 IID = Intrinsic::x86_avx2_psrav_d;
1851 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
1852 IID = Intrinsic::x86_avx2_psrav_d_256;
1853 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
1854 IID = Intrinsic::x86_avx512_psrav_w_128;
1855 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
1856 IID = Intrinsic::x86_avx512_psrav_w_256;
1857 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
1858 IID = Intrinsic::x86_avx512_psrav_w_512;
1859 else
1860 llvm_unreachable("Unexpected size");
1861 } else if (Name.endswith(".128")) {
1862 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
1863 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
1864 : Intrinsic::x86_sse2_psra_d;
1865 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
1866 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
1867 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
1868 Intrinsic::x86_avx512_psra_q_128;
1869 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
1870 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
1871 : Intrinsic::x86_sse2_psra_w;
1872 else
1873 llvm_unreachable("Unexpected size");
1874 } else if (Name.endswith(".256")) {
1875 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
1876 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
1877 : Intrinsic::x86_avx2_psra_d;
1878 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
1879 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
1880 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
1881 Intrinsic::x86_avx512_psra_q_256;
1882 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
1883 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
1884 : Intrinsic::x86_avx2_psra_w;
1885 else
1886 llvm_unreachable("Unexpected size");
1887 } else {
1888 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
1889 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
1890 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
1891 Intrinsic::x86_avx512_psra_d_512;
1892 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
1893 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
1894 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
1895 Intrinsic::x86_avx512_psra_q_512;
1896 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
1897 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
1898 : Intrinsic::x86_avx512_psra_w_512;
1899 else
1900 llvm_unreachable("Unexpected size");
1901 }
1902
1903 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1904 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
1905 Rep = upgradeMaskedMove(Builder, *CI);
1906 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
1907 Rep = UpgradeMaskToInt(Builder, *CI);
1908 } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
1909 Intrinsic::ID IID;
1910 if (Name.endswith("ps.128"))
1911 IID = Intrinsic::x86_avx_vpermilvar_ps;
1912 else if (Name.endswith("pd.128"))
1913 IID = Intrinsic::x86_avx_vpermilvar_pd;
1914 else if (Name.endswith("ps.256"))
1915 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1916 else if (Name.endswith("pd.256"))
1917 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1918 else if (Name.endswith("ps.512"))
1919 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1920 else if (Name.endswith("pd.512"))
1921 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1922 else
1923 llvm_unreachable("Unexpected vpermilvar intrinsic");
1924
1925 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
1926 Rep = Builder.CreateCall(Intrin,
1927 { CI->getArgOperand(0), CI->getArgOperand(1) });
1928 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1929 CI->getArgOperand(2));
1930 } else if (IsX86 && Name.endswith(".movntdqa")) {
1931 Module *M = F->getParent();
1932 MDNode *Node = MDNode::get(
1933 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1934
1935 Value *Ptr = CI->getArgOperand(0);
1936 VectorType *VTy = cast<VectorType>(CI->getType());
1937
1938 // Convert the type of the pointer to a pointer to the stored type.
1939 Value *BC =
1940 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
1941 LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
1942 LI->setMetadata(M->getMDKindID("nontemporal"), Node);
1943 Rep = LI;
1944 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
1945 Value *Arg = CI->getArgOperand(0);
1946 Value *Neg = Builder.CreateNeg(Arg, "neg");
1947 Value *Cmp = Builder.CreateICmpSGE(
1948 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
1949 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
1950 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
1951 Name == "max.ui" || Name == "max.ull")) {
1952 Value *Arg0 = CI->getArgOperand(0);
1953 Value *Arg1 = CI->getArgOperand(1);
1954 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
1955 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
1956 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
1957 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
1958 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
1959 Name == "min.ui" || Name == "min.ull")) {
1960 Value *Arg0 = CI->getArgOperand(0);
1961 Value *Arg1 = CI->getArgOperand(1);
1962 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
1963 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
1964 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
1965 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
1966 } else if (IsNVVM && Name == "clz.ll") {
1967 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
1968 Value *Arg = CI->getArgOperand(0);
1969 Value *Ctlz = Builder.CreateCall(
1970 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
1971 {Arg->getType()}),
1972 {Arg, Builder.getFalse()}, "ctlz");
1973 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
1974 } else if (IsNVVM && Name == "popc.ll") {
1975 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
1976 // i64.
1977 Value *Arg = CI->getArgOperand(0);
1978 Value *Popc = Builder.CreateCall(
1979 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
1980 {Arg->getType()}),
1981 Arg, "ctpop");
1982 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
1983 } else if (IsNVVM && Name == "h2f") {
1984 Rep = Builder.CreateCall(Intrinsic::getDeclaration(
1985 F->getParent(), Intrinsic::convert_from_fp16,
1986 {Builder.getFloatTy()}),
1987 CI->getArgOperand(0), "h2f");
1988 } else {
1989 llvm_unreachable("Unknown function for CallInst upgrade.");
1990 }
1991
1992 if (Rep)
1993 CI->replaceAllUsesWith(Rep);
1994 CI->eraseFromParent();
1995 return;
1996 }
1997
1998 CallInst *NewCall = nullptr;
1999 switch (NewFn->getIntrinsicID()) {
2000 default: {
2001 // Handle generic mangling change, but nothing else
2002 assert(
2003 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
2004 "Unknown function for CallInst upgrade and isn't just a name change");
2005 CI->setCalledFunction(NewFn);
2006 return;
2007 }
2008
2009 case Intrinsic::arm_neon_vld1:
2010 case Intrinsic::arm_neon_vld2:
2011 case Intrinsic::arm_neon_vld3:
2012 case Intrinsic::arm_neon_vld4:
2013 case Intrinsic::arm_neon_vld2lane:
2014 case Intrinsic::arm_neon_vld3lane:
2015 case Intrinsic::arm_neon_vld4lane:
2016 case Intrinsic::arm_neon_vst1:
2017 case Intrinsic::arm_neon_vst2:
2018 case Intrinsic::arm_neon_vst3:
2019 case Intrinsic::arm_neon_vst4:
2020 case Intrinsic::arm_neon_vst2lane:
2021 case Intrinsic::arm_neon_vst3lane:
2022 case Intrinsic::arm_neon_vst4lane: {
2023 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2024 CI->arg_operands().end());
2025 NewCall = Builder.CreateCall(NewFn, Args);
2026 break;
2027 }
2028
2029 case Intrinsic::bitreverse:
2030 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2031 break;
2032
2033 case Intrinsic::ctlz:
2034 case Intrinsic::cttz:
2035 assert(CI->getNumArgOperands() == 1 &&
2036 "Mismatch between function args and call args");
2037 NewCall =
2038 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
2039 break;
2040
2041 case Intrinsic::objectsize: {
2042 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
2043 ? Builder.getFalse()
2044 : CI->getArgOperand(2);
2045 NewCall = Builder.CreateCall(
2046 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
2047 break;
2048 }
2049
2050 case Intrinsic::ctpop:
2051 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2052 break;
2053
2054 case Intrinsic::convert_from_fp16:
2055 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2056 break;
2057
2058 case Intrinsic::x86_xop_vfrcz_ss:
2059 case Intrinsic::x86_xop_vfrcz_sd:
2060 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
2061 break;
2062
2063 case Intrinsic::x86_xop_vpermil2pd:
2064 case Intrinsic::x86_xop_vpermil2ps:
2065 case Intrinsic::x86_xop_vpermil2pd_256:
2066 case Intrinsic::x86_xop_vpermil2ps_256: {
2067 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2068 CI->arg_operands().end());
2069 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
2070 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
2071 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
2072 NewCall = Builder.CreateCall(NewFn, Args);
2073 break;
2074 }
2075
2076 case Intrinsic::x86_sse41_ptestc:
2077 case Intrinsic::x86_sse41_ptestz:
2078 case Intrinsic::x86_sse41_ptestnzc: {
2079 // The arguments for these intrinsics used to be v4f32, and changed
2080 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
2081 // So, the only thing required is a bitcast for both arguments.
2082 // First, check the arguments have the old type.
2083 Value *Arg0 = CI->getArgOperand(0);
2084 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
2085 return;
2086
2087 // Old intrinsic, add bitcasts
2088 Value *Arg1 = CI->getArgOperand(1);
2089
2090 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
2091
2092 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
2093 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2094
2095 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
2096 break;
2097 }
2098
2099 case Intrinsic::x86_sse41_insertps:
2100 case Intrinsic::x86_sse41_dppd:
2101 case Intrinsic::x86_sse41_dpps:
2102 case Intrinsic::x86_sse41_mpsadbw:
2103 case Intrinsic::x86_avx_dp_ps_256:
2104 case Intrinsic::x86_avx2_mpsadbw: {
2105 // Need to truncate the last argument from i32 to i8 -- this argument models
2106 // an inherently 8-bit immediate operand to these x86 instructions.
2107 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2108 CI->arg_operands().end());
2109
2110 // Replace the last argument with a trunc.
2111 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
2112 NewCall = Builder.CreateCall(NewFn, Args);
2113 break;
2114 }
2115
2116 case Intrinsic::thread_pointer: {
2117 NewCall = Builder.CreateCall(NewFn, {});
2118 break;
2119 }
2120
2121 case Intrinsic::invariant_start:
2122 case Intrinsic::invariant_end:
2123 case Intrinsic::masked_load:
2124 case Intrinsic::masked_store:
2125 case Intrinsic::masked_gather:
2126 case Intrinsic::masked_scatter: {
2127 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2128 CI->arg_operands().end());
2129 NewCall = Builder.CreateCall(NewFn, Args);
2130 break;
2131 }
2132 }
2133 assert(NewCall && "Should have either set this variable or returned through "
2134 "the default case");
2135 std::string Name = CI->getName();
2136 if (!Name.empty()) {
2137 CI->setName(Name + ".old");
2138 NewCall->setName(Name);
2139 }
2140 CI->replaceAllUsesWith(NewCall);
2141 CI->eraseFromParent();
2142}
2143
2144void llvm::UpgradeCallsToIntrinsic(Function *F) {
2145 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
2146
2147 // Check if this function should be upgraded and get the replacement function
2148 // if there is one.
2149 Function *NewFn;
2150 if (UpgradeIntrinsicFunction(F, NewFn)) {
2151 // Replace all users of the old function with the new function or new
2152 // instructions. This is not a range loop because the call is deleted.
2153 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
2154 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
2155 UpgradeIntrinsicCall(CI, NewFn);
2156
2157 // Remove old function, no longer used, from the module.
2158 F->eraseFromParent();
2159 }
2160}
2161
2162MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
2163 // Check if the tag uses struct-path aware TBAA format.
2164 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
2165 return &MD;
2166
2167 auto &Context = MD.getContext();
2168 if (MD.getNumOperands() == 3) {
2169 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
2170 MDNode *ScalarType = MDNode::get(Context, Elts);
2171 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
2172 Metadata *Elts2[] = {ScalarType, ScalarType,
2173 ConstantAsMetadata::get(
2174 Constant::getNullValue(Type::getInt64Ty(Context))),
2175 MD.getOperand(2)};
2176 return MDNode::get(Context, Elts2);
2177 }
2178 // Create a MDNode <MD, MD, offset 0>
2179 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
2180 Type::getInt64Ty(Context)))};
2181 return MDNode::get(Context, Elts);
2182}
2183
2184Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
2185 Instruction *&Temp) {
2186 if (Opc != Instruction::BitCast)
2187 return nullptr;
2188
2189 Temp = nullptr;
2190 Type *SrcTy = V->getType();
2191 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
2192 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
2193 LLVMContext &Context = V->getContext();
2194
2195 // We have no information about target data layout, so we assume that
2196 // the maximum pointer size is 64bit.
2197 Type *MidTy = Type::getInt64Ty(Context);
2198 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
2199
2200 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
2201 }
2202
2203 return nullptr;
2204}
2205
2206Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
2207 if (Opc != Instruction::BitCast)
2208 return nullptr;
2209
2210 Type *SrcTy = C->getType();
2211 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
2212 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
2213 LLVMContext &Context = C->getContext();
2214
2215 // We have no information about target data layout, so we assume that
2216 // the maximum pointer size is 64bit.
2217 Type *MidTy = Type::getInt64Ty(Context);
2218
2219 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
2220 DestTy);
2221 }
2222
2223 return nullptr;
2224}
2225
2226/// Check the debug info version number, if it is out-dated, drop the debug
2227/// info. Return true if module is modified.
2228bool llvm::UpgradeDebugInfo(Module &M) {
2229 unsigned Version = getDebugMetadataVersionFromModule(M);
2230 if (Version == DEBUG_METADATA_VERSION)
2231 return false;
2232
2233 bool RetCode = StripDebugInfo(M);
2234 if (RetCode) {
2235 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
2236 M.getContext().diagnose(DiagVersion);
2237 }
2238 return RetCode;
2239}
2240
2241bool llvm::UpgradeModuleFlags(Module &M) {
2242 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
2243 if (!ModFlags)
2244 return false;
2245
2246 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
2247 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
2248 MDNode *Op = ModFlags->getOperand(I);
2249 if (Op->getNumOperands() != 3)
2250 continue;
2251 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
2252 if (!ID)
2253 continue;
2254 if (ID->getString() == "Objective-C Image Info Version")
2255 HasObjCFlag = true;
2256 if (ID->getString() == "Objective-C Class Properties")
2257 HasClassProperties = true;
2258 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
2259 // field was Error and now they are Max.
2260 if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
2261 if (auto *Behavior =
2262 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
2263 if (Behavior->getLimitedValue() == Module::Error) {
2264 Type *Int32Ty = Type::getInt32Ty(M.getContext());
2265 Metadata *Ops[3] = {
2266 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
2267 MDString::get(M.getContext(), ID->getString()),
2268 Op->getOperand(2)};
2269 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
2270 Changed = true;
2271 }
2272 }
2273 }
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the auto-upgrade helper functions.
11// This is where deprecated IR intrinsics and other IR features are updated to
12// current specifications.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/IR/AutoUpgrade.h"
17#include "llvm/ADT/StringSwitch.h"
18#include "llvm/IR/CFG.h"
19#include "llvm/IR/CallSite.h"
20#include "llvm/IR/Constants.h"
21#include "llvm/IR/DIBuilder.h"
22#include "llvm/IR/DebugInfo.h"
23#include "llvm/IR/DiagnosticInfo.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/Instruction.h"
27#include "llvm/IR/IntrinsicInst.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/IR/Module.h"
30#include "llvm/Support/ErrorHandling.h"
31#include "llvm/Support/Regex.h"
32#include <cstring>
33using namespace llvm;
34
35static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
36
37// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
38// changed their type from v4f32 to v2i64.
39static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
40 Function *&NewFn) {
41 // Check whether this is an old version of the function, which received
42 // v4f32 arguments.
43 Type *Arg0Type = F->getFunctionType()->getParamType(0);
44 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
45 return false;
46
47 // Yes, it's old, replace it with new version.
48 rename(F);
49 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
50 return true;
51}
52
53// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
54// arguments have changed their type from i32 to i8.
55static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
56 Function *&NewFn) {
57 // Check that the last argument is an i32.
58 Type *LastArgType = F->getFunctionType()->getParamType(
59 F->getFunctionType()->getNumParams() - 1);
60 if (!LastArgType->isIntegerTy(32))
61 return false;
62
63 // Move this function aside and map down.
64 rename(F);
65 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
66 return true;
67}
68
69static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
70 // All of the intrinsics matches below should be marked with which llvm
71 // version started autoupgrading them. At some point in the future we would
72 // like to use this information to remove upgrade code for some older
73 // intrinsics. It is currently undecided how we will determine that future
74 // point.
75 if (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
76 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
77 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
78 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
79 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
80 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
81 Name == "sse.add.ss" || // Added in 4.0
82 Name == "sse2.add.sd" || // Added in 4.0
83 Name == "sse.sub.ss" || // Added in 4.0
84 Name == "sse2.sub.sd" || // Added in 4.0
85 Name == "sse.mul.ss" || // Added in 4.0
86 Name == "sse2.mul.sd" || // Added in 4.0
87 Name == "sse.div.ss" || // Added in 4.0
88 Name == "sse2.div.sd" || // Added in 4.0
89 Name == "sse41.pmaxsb" || // Added in 3.9
90 Name == "sse2.pmaxs.w" || // Added in 3.9
91 Name == "sse41.pmaxsd" || // Added in 3.9
92 Name == "sse2.pmaxu.b" || // Added in 3.9
93 Name == "sse41.pmaxuw" || // Added in 3.9
94 Name == "sse41.pmaxud" || // Added in 3.9
95 Name == "sse41.pminsb" || // Added in 3.9
96 Name == "sse2.pmins.w" || // Added in 3.9
97 Name == "sse41.pminsd" || // Added in 3.9
98 Name == "sse2.pminu.b" || // Added in 3.9
99 Name == "sse41.pminuw" || // Added in 3.9
100 Name == "sse41.pminud" || // Added in 3.9
101 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
102 Name.startswith("avx2.pmax") || // Added in 3.9
103 Name.startswith("avx2.pmin") || // Added in 3.9
104 Name.startswith("avx512.mask.pmax") || // Added in 4.0
105 Name.startswith("avx512.mask.pmin") || // Added in 4.0
106 Name.startswith("avx2.vbroadcast") || // Added in 3.8
107 Name.startswith("avx2.pbroadcast") || // Added in 3.8
108 Name.startswith("avx.vpermil.") || // Added in 3.1
109 Name.startswith("sse2.pshuf") || // Added in 3.9
110 Name.startswith("avx512.pbroadcast") || // Added in 3.9
111 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
112 Name.startswith("avx512.mask.movddup") || // Added in 3.9
113 Name.startswith("avx512.mask.movshdup") || // Added in 3.9
114 Name.startswith("avx512.mask.movsldup") || // Added in 3.9
115 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
116 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
117 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
118 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
119 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
120 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
121 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
122 Name.startswith("avx512.mask.punpckl") || // Added in 3.9
123 Name.startswith("avx512.mask.punpckh") || // Added in 3.9
124 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
125 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
126 Name.startswith("avx512.mask.pand.") || // Added in 3.9
127 Name.startswith("avx512.mask.pandn.") || // Added in 3.9
128 Name.startswith("avx512.mask.por.") || // Added in 3.9
129 Name.startswith("avx512.mask.pxor.") || // Added in 3.9
130 Name.startswith("avx512.mask.and.") || // Added in 3.9
131 Name.startswith("avx512.mask.andn.") || // Added in 3.9
132 Name.startswith("avx512.mask.or.") || // Added in 3.9
133 Name.startswith("avx512.mask.xor.") || // Added in 3.9
134 Name.startswith("avx512.mask.padd.") || // Added in 4.0
135 Name.startswith("avx512.mask.psub.") || // Added in 4.0
136 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
137 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
138 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
139 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
140 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
141 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
142 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
143 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
144 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
145 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
146 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
147 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
148 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
149 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
150 Name == "avx512.mask.add.pd.128" || // Added in 4.0
151 Name == "avx512.mask.add.pd.256" || // Added in 4.0
152 Name == "avx512.mask.add.ps.128" || // Added in 4.0
153 Name == "avx512.mask.add.ps.256" || // Added in 4.0
154 Name == "avx512.mask.div.pd.128" || // Added in 4.0
155 Name == "avx512.mask.div.pd.256" || // Added in 4.0
156 Name == "avx512.mask.div.ps.128" || // Added in 4.0
157 Name == "avx512.mask.div.ps.256" || // Added in 4.0
158 Name == "avx512.mask.mul.pd.128" || // Added in 4.0
159 Name == "avx512.mask.mul.pd.256" || // Added in 4.0
160 Name == "avx512.mask.mul.ps.128" || // Added in 4.0
161 Name == "avx512.mask.mul.ps.256" || // Added in 4.0
162 Name == "avx512.mask.sub.pd.128" || // Added in 4.0
163 Name == "avx512.mask.sub.pd.256" || // Added in 4.0
164 Name == "avx512.mask.sub.ps.128" || // Added in 4.0
165 Name == "avx512.mask.sub.ps.256" || // Added in 4.0
166 Name == "avx512.mask.max.pd.128" || // Added in 5.0
167 Name == "avx512.mask.max.pd.256" || // Added in 5.0
168 Name == "avx512.mask.max.ps.128" || // Added in 5.0
169 Name == "avx512.mask.max.ps.256" || // Added in 5.0
170 Name == "avx512.mask.min.pd.128" || // Added in 5.0
171 Name == "avx512.mask.min.pd.256" || // Added in 5.0
172 Name == "avx512.mask.min.ps.128" || // Added in 5.0
173 Name == "avx512.mask.min.ps.256" || // Added in 5.0
174 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
175 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
176 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
177 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
178 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
179 Name.startswith("avx512.mask.psra.q") || // Added in 4.0
180 Name.startswith("avx512.mask.psra.w") || // Added in 4.0
181 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
182 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
183 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
184 Name.startswith("avx512.mask.pslli") || // Added in 4.0
185 Name.startswith("avx512.mask.psrai") || // Added in 4.0
186 Name.startswith("avx512.mask.psrli") || // Added in 4.0
187 Name.startswith("avx512.mask.psllv") || // Added in 4.0
188 Name.startswith("avx512.mask.psrav") || // Added in 4.0
189 Name.startswith("avx512.mask.psrlv") || // Added in 4.0
190 Name.startswith("sse41.pmovsx") || // Added in 3.8
191 Name.startswith("sse41.pmovzx") || // Added in 3.9
192 Name.startswith("avx2.pmovsx") || // Added in 3.9
193 Name.startswith("avx2.pmovzx") || // Added in 3.9
194 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
195 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
196 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
197 Name == "sse2.cvtdq2pd" || // Added in 3.9
198 Name == "sse2.cvtps2pd" || // Added in 3.9
199 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
200 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
201 Name.startswith("avx.vinsertf128.") || // Added in 3.7
202 Name == "avx2.vinserti128" || // Added in 3.7
203 Name.startswith("avx512.mask.insert") || // Added in 4.0
204 Name.startswith("avx.vextractf128.") || // Added in 3.7
205 Name == "avx2.vextracti128" || // Added in 3.7
206 Name.startswith("avx512.mask.vextract") || // Added in 4.0
207 Name.startswith("sse4a.movnt.") || // Added in 3.9
208 Name.startswith("avx.movnt.") || // Added in 3.2
209 Name.startswith("avx512.storent.") || // Added in 3.9
210 Name == "sse41.movntdqa" || // Added in 5.0
211 Name == "avx2.movntdqa" || // Added in 5.0
212 Name == "avx512.movntdqa" || // Added in 5.0
213 Name == "sse2.storel.dq" || // Added in 3.9
214 Name.startswith("sse.storeu.") || // Added in 3.9
215 Name.startswith("sse2.storeu.") || // Added in 3.9
216 Name.startswith("avx.storeu.") || // Added in 3.9
217 Name.startswith("avx512.mask.storeu.") || // Added in 3.9
218 Name.startswith("avx512.mask.store.p") || // Added in 3.9
219 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
220 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
221 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
222 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
223 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
224 Name.startswith("avx512.mask.load.") || // Added in 3.9
225 Name == "sse42.crc32.64.8" || // Added in 3.4
226 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
227 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
228 Name.startswith("avx512.mask.valign.") || // Added in 4.0
229 Name.startswith("sse2.psll.dq") || // Added in 3.7
230 Name.startswith("sse2.psrl.dq") || // Added in 3.7
231 Name.startswith("avx2.psll.dq") || // Added in 3.7
232 Name.startswith("avx2.psrl.dq") || // Added in 3.7
233 Name.startswith("avx512.psll.dq") || // Added in 3.9
234 Name.startswith("avx512.psrl.dq") || // Added in 3.9
235 Name == "sse41.pblendw" || // Added in 3.7
236 Name.startswith("sse41.blendp") || // Added in 3.7
237 Name.startswith("avx.blend.p") || // Added in 3.7
238 Name == "avx2.pblendw" || // Added in 3.7
239 Name.startswith("avx2.pblendd.") || // Added in 3.7
240 Name.startswith("avx.vbroadcastf128") || // Added in 4.0
241 Name == "avx2.vbroadcasti128" || // Added in 3.7
242 Name == "xop.vpcmov" || // Added in 3.8
243 Name == "xop.vpcmov.256" || // Added in 5.0
244 Name.startswith("avx512.mask.move.s") || // Added in 4.0
245 Name.startswith("avx512.cvtmask2") || // Added in 5.0
246 (Name.startswith("xop.vpcom") && // Added in 3.2
247 F->arg_size() == 2))
248 return true;
249
250 return false;
251}
252
253static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
254 Function *&NewFn) {
255 // Only handle intrinsics that start with "x86.".
256 if (!Name.startswith("x86."))
257 return false;
258 // Remove "x86." prefix.
259 Name = Name.substr(4);
260
261 if (ShouldUpgradeX86Intrinsic(F, Name)) {
262 NewFn = nullptr;
263 return true;
264 }
265
266 // SSE4.1 ptest functions may have an old signature.
267 if (Name.startswith("sse41.ptest")) { // Added in 3.2
268 if (Name.substr(11) == "c")
269 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
270 if (Name.substr(11) == "z")
271 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
272 if (Name.substr(11) == "nzc")
273 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
274 }
275 // Several blend and other instructions with masks used the wrong number of
276 // bits.
277 if (Name == "sse41.insertps") // Added in 3.6
278 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
279 NewFn);
280 if (Name == "sse41.dppd") // Added in 3.6
281 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
282 NewFn);
283 if (Name == "sse41.dpps") // Added in 3.6
284 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
285 NewFn);
286 if (Name == "sse41.mpsadbw") // Added in 3.6
287 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
288 NewFn);
289 if (Name == "avx.dp.ps.256") // Added in 3.6
290 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
291 NewFn);
292 if (Name == "avx2.mpsadbw") // Added in 3.6
293 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
294 NewFn);
295
296 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
297 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
298 rename(F);
299 NewFn = Intrinsic::getDeclaration(F->getParent(),
300 Intrinsic::x86_xop_vfrcz_ss);
301 return true;
302 }
303 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
304 rename(F);
305 NewFn = Intrinsic::getDeclaration(F->getParent(),
306 Intrinsic::x86_xop_vfrcz_sd);
307 return true;
308 }
309 // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
310 if (Name.startswith("xop.vpermil2")) { // Added in 3.9
311 auto Idx = F->getFunctionType()->getParamType(2);
312 if (Idx->isFPOrFPVectorTy()) {
313 rename(F);
314 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
315 unsigned EltSize = Idx->getScalarSizeInBits();
316 Intrinsic::ID Permil2ID;
317 if (EltSize == 64 && IdxSize == 128)
318 Permil2ID = Intrinsic::x86_xop_vpermil2pd;
319 else if (EltSize == 32 && IdxSize == 128)
320 Permil2ID = Intrinsic::x86_xop_vpermil2ps;
321 else if (EltSize == 64 && IdxSize == 256)
322 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
323 else
324 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
325 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
326 return true;
327 }
328 }
329
330 return false;
331}
332
333static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
334 assert(F && "Illegal to upgrade a non-existent Function.");
335
336 // Quickly eliminate it, if it's not a candidate.
337 StringRef Name = F->getName();
338 if (Name.size() <= 8 || !Name.startswith("llvm."))
339 return false;
340 Name = Name.substr(5); // Strip off "llvm."
341
342 switch (Name[0]) {
343 default: break;
344 case 'a': {
345 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
346 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
347 F->arg_begin()->getType());
348 return true;
349 }
350 if (Name.startswith("arm.neon.vclz")) {
351 Type* args[2] = {
352 F->arg_begin()->getType(),
353 Type::getInt1Ty(F->getContext())
354 };
355 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
356 // the end of the name. Change name from llvm.arm.neon.vclz.* to
357 // llvm.ctlz.*
358 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
359 NewFn = Function::Create(fType, F->getLinkage(),
360 "llvm.ctlz." + Name.substr(14), F->getParent());
361 return true;
362 }
363 if (Name.startswith("arm.neon.vcnt")) {
364 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
365 F->arg_begin()->getType());
366 return true;
367 }
368 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
369 if (vldRegex.match(Name)) {
370 auto fArgs = F->getFunctionType()->params();
371 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
372 // Can't use Intrinsic::getDeclaration here as the return types might
373 // then only be structurally equal.
374 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
375 NewFn = Function::Create(fType, F->getLinkage(),
376 "llvm." + Name + ".p0i8", F->getParent());
377 return true;
378 }
379 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
380 if (vstRegex.match(Name)) {
381 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
382 Intrinsic::arm_neon_vst2,
383 Intrinsic::arm_neon_vst3,
384 Intrinsic::arm_neon_vst4};
385
386 static const Intrinsic::ID StoreLaneInts[] = {
387 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
388 Intrinsic::arm_neon_vst4lane
389 };
390
391 auto fArgs = F->getFunctionType()->params();
392 Type *Tys[] = {fArgs[0], fArgs[1]};
393 if (Name.find("lane") == StringRef::npos)
394 NewFn = Intrinsic::getDeclaration(F->getParent(),
395 StoreInts[fArgs.size() - 3], Tys);
396 else
397 NewFn = Intrinsic::getDeclaration(F->getParent(),
398 StoreLaneInts[fArgs.size() - 5], Tys);
399 return true;
400 }
401 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
402 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
403 return true;
404 }
405 break;
406 }
407
408 case 'c': {
409 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
410 rename(F);
411 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
412 F->arg_begin()->getType());
413 return true;
414 }
415 if (Name.startswith("cttz.") && F->arg_size() == 1) {
416 rename(F);
417 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
418 F->arg_begin()->getType());
419 return true;
420 }
421 break;
422 }
423 case 'i':
424 case 'l': {
425 bool IsLifetimeStart = Name.startswith("lifetime.start");
426 if (IsLifetimeStart || Name.startswith("invariant.start")) {
427 Intrinsic::ID ID = IsLifetimeStart ?
428 Intrinsic::lifetime_start : Intrinsic::invariant_start;
429 auto Args = F->getFunctionType()->params();
430 Type* ObjectPtr[1] = {Args[1]};
431 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
432 rename(F);
433 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
434 return true;
435 }
436 }
437
438 bool IsLifetimeEnd = Name.startswith("lifetime.end");
439 if (IsLifetimeEnd || Name.startswith("invariant.end")) {
440 Intrinsic::ID ID = IsLifetimeEnd ?
441 Intrinsic::lifetime_end : Intrinsic::invariant_end;
442
443 auto Args = F->getFunctionType()->params();
444 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
445 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
446 rename(F);
447 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
448 return true;
449 }
450 }
451 break;
452 }
453 case 'm': {
454 if (Name.startswith("masked.load.")) {
455 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
456 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
457 rename(F);
458 NewFn = Intrinsic::getDeclaration(F->getParent(),
459 Intrinsic::masked_load,
460 Tys);
461 return true;
462 }
463 }
464 if (Name.startswith("masked.store.")) {
465 auto Args = F->getFunctionType()->params();
466 Type *Tys[] = { Args[0], Args[1] };
467 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
468 rename(F);
469 NewFn = Intrinsic::getDeclaration(F->getParent(),
470 Intrinsic::masked_store,
471 Tys);
472 return true;
473 }
474 }
475 // Renaming gather/scatter intrinsics with no address space overloading
476 // to the new overload which includes an address space
477 if (Name.startswith("masked.gather.")) {
478 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
479 if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
480 rename(F);
481 NewFn = Intrinsic::getDeclaration(F->getParent(),
482 Intrinsic::masked_gather, Tys);
483 return true;
484 }
485 }
486 if (Name.startswith("masked.scatter.")) {
487 auto Args = F->getFunctionType()->params();
488 Type *Tys[] = {Args[0], Args[1]};
489 if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
490 rename(F);
491 NewFn = Intrinsic::getDeclaration(F->getParent(),
492 Intrinsic::masked_scatter, Tys);
493 return true;
494 }
495 }
496 break;
497 }
498 case 'n': {
499 if (Name.startswith("nvvm.")) {
500 Name = Name.substr(5);
501
502 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
503 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
504 .Cases("brev32", "brev64", Intrinsic::bitreverse)
505 .Case("clz.i", Intrinsic::ctlz)
506 .Case("popc.i", Intrinsic::ctpop)
507 .Default(Intrinsic::not_intrinsic);
508 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
509 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
510 {F->getReturnType()});
511 return true;
512 }
513
514 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
515 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
516 //
517 // TODO: We could add lohi.i2d.
518 bool Expand = StringSwitch<bool>(Name)
519 .Cases("abs.i", "abs.ll", true)
520 .Cases("clz.ll", "popc.ll", "h2f", true)
521 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
522 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
523 .Default(false);
524 if (Expand) {
525 NewFn = nullptr;
526 return true;
527 }
528 }
529 break;
530 }
531 case 'o':
532 // We only need to change the name to match the mangling including the
533 // address space.
534 if (Name.startswith("objectsize.")) {
535 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
536 if (F->arg_size() == 2 ||
537 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
538 rename(F);
539 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
540 Tys);
541 return true;
542 }
543 }
544 break;
545
546 case 's':
547 if (Name == "stackprotectorcheck") {
548 NewFn = nullptr;
549 return true;
550 }
551 break;
552
553 case 'x':
554 if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
555 return true;
556 }
557 // Remangle our intrinsic since we upgrade the mangling
558 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
559 if (Result != None) {
560 NewFn = Result.getValue();
561 return true;
562 }
563
564 // This may not belong here. This function is effectively being overloaded
565 // to both detect an intrinsic which needs upgrading, and to provide the
566 // upgraded form of the intrinsic. We should perhaps have two separate
567 // functions for this.
568 return false;
569}
570
571bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
572 NewFn = nullptr;
573 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
574 assert(F != NewFn && "Intrinsic function upgraded to the same function");
575
576 // Upgrade intrinsic attributes. This does not change the function.
577 if (NewFn)
578 F = NewFn;
579 if (Intrinsic::ID id = F->getIntrinsicID())
580 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
581 return Upgraded;
582}
583
584bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
585 // Nothing to do yet.
586 return false;
587}
588
589// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
590// to byte shuffles.
591static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
592 Value *Op, unsigned Shift) {
593 Type *ResultTy = Op->getType();
594 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
595
596 // Bitcast from a 64-bit element type to a byte element type.
597 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
598 Op = Builder.CreateBitCast(Op, VecTy, "cast");
599
600 // We'll be shuffling in zeroes.
601 Value *Res = Constant::getNullValue(VecTy);
602
603 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
604 // we'll just return the zero vector.
605 if (Shift < 16) {
606 uint32_t Idxs[64];
607 // 256/512-bit version is split into 2/4 16-byte lanes.
608 for (unsigned l = 0; l != NumElts; l += 16)
609 for (unsigned i = 0; i != 16; ++i) {
610 unsigned Idx = NumElts + i - Shift;
611 if (Idx < NumElts)
612 Idx -= NumElts - 16; // end of lane, switch operand.
613 Idxs[l + i] = Idx + l;
614 }
615
616 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
617 }
618
619 // Bitcast back to a 64-bit element type.
620 return Builder.CreateBitCast(Res, ResultTy, "cast");
621}
622
623// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
624// to byte shuffles.
625static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
626 unsigned Shift) {
627 Type *ResultTy = Op->getType();
628 unsigned NumElts = ResultTy->getVectorNumElements() * 8;
629
630 // Bitcast from a 64-bit element type to a byte element type.
631 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
632 Op = Builder.CreateBitCast(Op, VecTy, "cast");
633
634 // We'll be shuffling in zeroes.
635 Value *Res = Constant::getNullValue(VecTy);
636
637 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
638 // we'll just return the zero vector.
639 if (Shift < 16) {
640 uint32_t Idxs[64];
641 // 256/512-bit version is split into 2/4 16-byte lanes.
642 for (unsigned l = 0; l != NumElts; l += 16)
643 for (unsigned i = 0; i != 16; ++i) {
644 unsigned Idx = i + Shift;
645 if (Idx >= 16)
646 Idx += NumElts - 16; // end of lane, switch operand.
647 Idxs[l + i] = Idx + l;
648 }
649
650 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
651 }
652
653 // Bitcast back to a 64-bit element type.
654 return Builder.CreateBitCast(Res, ResultTy, "cast");
655}
656
657static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
658 unsigned NumElts) {
659 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
660 cast<IntegerType>(Mask->getType())->getBitWidth());
661 Mask = Builder.CreateBitCast(Mask, MaskTy);
662
663 // If we have less than 8 elements, then the starting mask was an i8 and
664 // we need to extract down to the right number of elements.
665 if (NumElts < 8) {
666 uint32_t Indices[4];
667 for (unsigned i = 0; i != NumElts; ++i)
668 Indices[i] = i;
669 Mask = Builder.CreateShuffleVector(Mask, Mask,
670 makeArrayRef(Indices, NumElts),
671 "extract");
672 }
673
674 return Mask;
675}
676
677static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
678 Value *Op0, Value *Op1) {
679 // If the mask is all ones just emit the align operation.
680 if (const auto *C = dyn_cast<Constant>(Mask))
681 if (C->isAllOnesValue())
682 return Op0;
683
684 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
685 return Builder.CreateSelect(Mask, Op0, Op1);
686}
687
688// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
689// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
690// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
691static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
692 Value *Op1, Value *Shift,
693 Value *Passthru, Value *Mask,
694 bool IsVALIGN) {
695 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
696
697 unsigned NumElts = Op0->getType()->getVectorNumElements();
698 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
699 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
700 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
701
702 // Mask the immediate for VALIGN.
703 if (IsVALIGN)
704 ShiftVal &= (NumElts - 1);
705
706 // If palignr is shifting the pair of vectors more than the size of two
707 // lanes, emit zero.
708 if (ShiftVal >= 32)
709 return llvm::Constant::getNullValue(Op0->getType());
710
711 // If palignr is shifting the pair of input vectors more than one lane,
712 // but less than two lanes, convert to shifting in zeroes.
713 if (ShiftVal > 16) {
714 ShiftVal -= 16;
715 Op1 = Op0;
716 Op0 = llvm::Constant::getNullValue(Op0->getType());
717 }
718
719 uint32_t Indices[64];
720 // 256-bit palignr operates on 128-bit lanes so we need to handle that
721 for (unsigned l = 0; l < NumElts; l += 16) {
722 for (unsigned i = 0; i != 16; ++i) {
723 unsigned Idx = ShiftVal + i;
724 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
725 Idx += NumElts - 16; // End of lane, switch operand.
726 Indices[l + i] = Idx + l;
727 }
728 }
729
730 Value *Align = Builder.CreateShuffleVector(Op1, Op0,
731 makeArrayRef(Indices, NumElts),
732 "palignr");
733
734 return EmitX86Select(Builder, Mask, Align, Passthru);
735}
736
737static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
738 Value *Ptr, Value *Data, Value *Mask,
739 bool Aligned) {
740 // Cast the pointer to the right type.
741 Ptr = Builder.CreateBitCast(Ptr,
742 llvm::PointerType::getUnqual(Data->getType()));
743 unsigned Align =
744 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
745
746 // If the mask is all ones just emit a regular store.
747 if (const auto *C = dyn_cast<Constant>(Mask))
748 if (C->isAllOnesValue())
749 return Builder.CreateAlignedStore(Data, Ptr, Align);
750
751 // Convert the mask from an integer type to a vector of i1.
752 unsigned NumElts = Data->getType()->getVectorNumElements();
753 Mask = getX86MaskVec(Builder, Mask, NumElts);
754 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
755}
756
757static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
758 Value *Ptr, Value *Passthru, Value *Mask,
759 bool Aligned) {
760 // Cast the pointer to the right type.
761 Ptr = Builder.CreateBitCast(Ptr,
762 llvm::PointerType::getUnqual(Passthru->getType()));
763 unsigned Align =
764 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
765
766 // If the mask is all ones just emit a regular store.
767 if (const auto *C = dyn_cast<Constant>(Mask))
768 if (C->isAllOnesValue())
769 return Builder.CreateAlignedLoad(Ptr, Align);
770
771 // Convert the mask from an integer type to a vector of i1.
772 unsigned NumElts = Passthru->getType()->getVectorNumElements();
773 Mask = getX86MaskVec(Builder, Mask, NumElts);
774 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
775}
776
777static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
778 ICmpInst::Predicate Pred) {
779 Value *Op0 = CI.getArgOperand(0);
780 Value *Op1 = CI.getArgOperand(1);
781 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
782 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
783
784 if (CI.getNumArgOperands() == 4)
785 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
786
787 return Res;
788}
789
790static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
791 unsigned CC, bool Signed) {
792 Value *Op0 = CI.getArgOperand(0);
793 unsigned NumElts = Op0->getType()->getVectorNumElements();
794
795 Value *Cmp;
796 if (CC == 3) {
797 Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
798 } else if (CC == 7) {
799 Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
800 } else {
801 ICmpInst::Predicate Pred;
802 switch (CC) {
803 default: llvm_unreachable("Unknown condition code");
804 case 0: Pred = ICmpInst::ICMP_EQ; break;
805 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
806 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
807 case 4: Pred = ICmpInst::ICMP_NE; break;
808 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
809 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
810 }
811 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
812 }
813
814 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
815 const auto *C = dyn_cast<Constant>(Mask);
816 if (!C || !C->isAllOnesValue())
817 Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
818
819 if (NumElts < 8) {
820 uint32_t Indices[8];
821 for (unsigned i = 0; i != NumElts; ++i)
822 Indices[i] = i;
823 for (unsigned i = NumElts; i != 8; ++i)
824 Indices[i] = NumElts + i % NumElts;
825 Cmp = Builder.CreateShuffleVector(Cmp,
826 Constant::getNullValue(Cmp->getType()),
827 Indices);
828 }
829 return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
830 std::max(NumElts, 8U)));
831}
832
833// Replace a masked intrinsic with an older unmasked intrinsic.
834static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
835 Intrinsic::ID IID) {
836 Function *F = CI.getCalledFunction();
837 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
838 Value *Rep = Builder.CreateCall(Intrin,
839 { CI.getArgOperand(0), CI.getArgOperand(1) });
840 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
841}
842
843static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
844 Value* A = CI.getArgOperand(0);
845 Value* B = CI.getArgOperand(1);
846 Value* Src = CI.getArgOperand(2);
847 Value* Mask = CI.getArgOperand(3);
848
849 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
850 Value* Cmp = Builder.CreateIsNotNull(AndNode);
851 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
852 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
853 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
854 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
855}
856
857
858static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
859 Value* Op = CI.getArgOperand(0);
860 Type* ReturnOp = CI.getType();
861 unsigned NumElts = CI.getType()->getVectorNumElements();
862 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
863 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
864}
865
866/// Upgrade a call to an old intrinsic. All argument and return casting must be
867/// provided to seamlessly integrate with existing context.
868void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
869 Function *F = CI->getCalledFunction();
870 LLVMContext &C = CI->getContext();
871 IRBuilder<> Builder(C);
872 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
873
874 assert(F && "Intrinsic call is not direct?");
875
876 if (!NewFn) {
877 // Get the Function's name.
878 StringRef Name = F->getName();
879
880 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
881 Name = Name.substr(5);
882
883 bool IsX86 = Name.startswith("x86.");
884 if (IsX86)
885 Name = Name.substr(4);
886 bool IsNVVM = Name.startswith("nvvm.");
887 if (IsNVVM)
888 Name = Name.substr(5);
889
890 if (IsX86 && Name.startswith("sse4a.movnt.")) {
891 Module *M = F->getParent();
892 SmallVector<Metadata *, 1> Elts;
893 Elts.push_back(
894 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
895 MDNode *Node = MDNode::get(C, Elts);
896
897 Value *Arg0 = CI->getArgOperand(0);
898 Value *Arg1 = CI->getArgOperand(1);
899
900 // Nontemporal (unaligned) store of the 0'th element of the float/double
901 // vector.
902 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
903 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
904 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
905 Value *Extract =
906 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
907
908 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
909 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
910
911 // Remove intrinsic.
912 CI->eraseFromParent();
913 return;
914 }
915
916 if (IsX86 && (Name.startswith("avx.movnt.") ||
917 Name.startswith("avx512.storent."))) {
918 Module *M = F->getParent();
919 SmallVector<Metadata *, 1> Elts;
920 Elts.push_back(
921 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
922 MDNode *Node = MDNode::get(C, Elts);
923
924 Value *Arg0 = CI->getArgOperand(0);
925 Value *Arg1 = CI->getArgOperand(1);
926
927 // Convert the type of the pointer to a pointer to the stored type.
928 Value *BC = Builder.CreateBitCast(Arg0,
929 PointerType::getUnqual(Arg1->getType()),
930 "cast");
931 VectorType *VTy = cast<VectorType>(Arg1->getType());
932 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
933 VTy->getBitWidth() / 8);
934 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
935
936 // Remove intrinsic.
937 CI->eraseFromParent();
938 return;
939 }
940
941 if (IsX86 && Name == "sse2.storel.dq") {
942 Value *Arg0 = CI->getArgOperand(0);
943 Value *Arg1 = CI->getArgOperand(1);
944
945 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
946 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
947 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
948 Value *BC = Builder.CreateBitCast(Arg0,
949 PointerType::getUnqual(Elt->getType()),
950 "cast");
951 Builder.CreateAlignedStore(Elt, BC, 1);
952
953 // Remove intrinsic.
954 CI->eraseFromParent();
955 return;
956 }
957
958 if (IsX86 && (Name.startswith("sse.storeu.") ||
959 Name.startswith("sse2.storeu.") ||
960 Name.startswith("avx.storeu."))) {
961 Value *Arg0 = CI->getArgOperand(0);
962 Value *Arg1 = CI->getArgOperand(1);
963
964 Arg0 = Builder.CreateBitCast(Arg0,
965 PointerType::getUnqual(Arg1->getType()),
966 "cast");
967 Builder.CreateAlignedStore(Arg1, Arg0, 1);
968
969 // Remove intrinsic.
970 CI->eraseFromParent();
971 return;
972 }
973
974 if (IsX86 && (Name.startswith("avx512.mask.store"))) {
975 // "avx512.mask.storeu." or "avx512.mask.store."
976 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
977 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
978 CI->getArgOperand(2), Aligned);
979
980 // Remove intrinsic.
981 CI->eraseFromParent();
982 return;
983 }
984
985 Value *Rep;
986 // Upgrade packed integer vector compare intrinsics to compare instructions.
987 if (IsX86 && (Name.startswith("sse2.pcmp") ||
988 Name.startswith("avx2.pcmp"))) {
989 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
990 bool CmpEq = Name[9] == 'e';
991 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
992 CI->getArgOperand(0), CI->getArgOperand(1));
993 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
994 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
995 Type *I32Ty = Type::getInt32Ty(C);
996 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
997 ConstantInt::get(I32Ty, 0));
998 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
999 ConstantInt::get(I32Ty, 0));
1000 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1001 Builder.CreateFAdd(Elt0, Elt1),
1002 ConstantInt::get(I32Ty, 0));
1003 } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
1004 Type *I32Ty = Type::getInt32Ty(C);
1005 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1006 ConstantInt::get(I32Ty, 0));
1007 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1008 ConstantInt::get(I32Ty, 0));
1009 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1010 Builder.CreateFSub(Elt0, Elt1),
1011 ConstantInt::get(I32Ty, 0));
1012 } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
1013 Type *I32Ty = Type::getInt32Ty(C);
1014 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1015 ConstantInt::get(I32Ty, 0));
1016 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1017 ConstantInt::get(I32Ty, 0));
1018 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1019 Builder.CreateFMul(Elt0, Elt1),
1020 ConstantInt::get(I32Ty, 0));
1021 } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1022 Type *I32Ty = Type::getInt32Ty(C);
1023 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1024 ConstantInt::get(I32Ty, 0));
1025 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1026 ConstantInt::get(I32Ty, 0));
1027 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1028 Builder.CreateFDiv(Elt0, Elt1),
1029 ConstantInt::get(I32Ty, 0));
1030 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1031 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1032 bool CmpEq = Name[16] == 'e';
1033 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1034 } else if (IsX86 && Name.startswith("avx512.mask.cmp")) {
1035 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1036 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
1037 } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) {
1038 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1039 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1040 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1041 Name == "sse2.pmaxs.w" ||
1042 Name == "sse41.pmaxsd" ||
1043 Name.startswith("avx2.pmaxs") ||
1044 Name.startswith("avx512.mask.pmaxs"))) {
1045 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
1046 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
1047 Name == "sse41.pmaxuw" ||
1048 Name == "sse41.pmaxud" ||
1049 Name.startswith("avx2.pmaxu") ||
1050 Name.startswith("avx512.mask.pmaxu"))) {
1051 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1052 } else if (IsX86 && (Name == "sse41.pminsb" ||
1053 Name == "sse2.pmins.w" ||
1054 Name == "sse41.pminsd" ||
1055 Name.startswith("avx2.pmins") ||
1056 Name.startswith("avx512.mask.pmins"))) {
1057 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1058 } else if (IsX86 && (Name == "sse2.pminu.b" ||
1059 Name == "sse41.pminuw" ||
1060 Name == "sse41.pminud" ||
1061 Name.startswith("avx2.pminu") ||
1062 Name.startswith("avx512.mask.pminu"))) {
1063 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1064 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
1065 Name == "sse2.cvtps2pd" ||
1066 Name == "avx.cvtdq2.pd.256" ||
1067 Name == "avx.cvt.ps2.pd.256" ||
1068 Name.startswith("avx512.mask.cvtdq2pd.") ||
1069 Name.startswith("avx512.mask.cvtudq2pd."))) {
1070 // Lossless i32/float to double conversion.
1071 // Extract the bottom elements if necessary and convert to double vector.
1072 Value *Src = CI->getArgOperand(0);
1073 VectorType *SrcTy = cast<VectorType>(Src->getType());
1074 VectorType *DstTy = cast<VectorType>(CI->getType());
1075 Rep = CI->getArgOperand(0);
1076
1077 unsigned NumDstElts = DstTy->getNumElements();
1078 if (NumDstElts < SrcTy->getNumElements()) {
1079 assert(NumDstElts == 2 && "Unexpected vector size");
1080 uint32_t ShuffleMask[2] = { 0, 1 };
1081 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
1082 ShuffleMask);
1083 }
1084
1085 bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
1086 bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
1087 if (SInt2Double)
1088 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
1089 else if (UInt2Double)
1090 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
1091 else
1092 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
1093
1094 if (CI->getNumArgOperands() == 3)
1095 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1096 CI->getArgOperand(1));
1097 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
1098 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1099 CI->getArgOperand(1), CI->getArgOperand(2),
1100 /*Aligned*/false);
1101 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
1102 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1103 CI->getArgOperand(1),CI->getArgOperand(2),
1104 /*Aligned*/true);
1105 } else if (IsX86 && Name.startswith("xop.vpcom")) {
1106 Intrinsic::ID intID;
1107 if (Name.endswith("ub"))
1108 intID = Intrinsic::x86_xop_vpcomub;
1109 else if (Name.endswith("uw"))
1110 intID = Intrinsic::x86_xop_vpcomuw;
1111 else if (Name.endswith("ud"))
1112 intID = Intrinsic::x86_xop_vpcomud;
1113 else if (Name.endswith("uq"))
1114 intID = Intrinsic::x86_xop_vpcomuq;
1115 else if (Name.endswith("b"))
1116 intID = Intrinsic::x86_xop_vpcomb;
1117 else if (Name.endswith("w"))
1118 intID = Intrinsic::x86_xop_vpcomw;
1119 else if (Name.endswith("d"))
1120 intID = Intrinsic::x86_xop_vpcomd;
1121 else if (Name.endswith("q"))
1122 intID = Intrinsic::x86_xop_vpcomq;
1123 else
1124 llvm_unreachable("Unknown suffix");
1125
1126 Name = Name.substr(9); // strip off "xop.vpcom"
1127 unsigned Imm;
1128 if (Name.startswith("lt"))
1129 Imm = 0;
1130 else if (Name.startswith("le"))
1131 Imm = 1;
1132 else if (Name.startswith("gt"))
1133 Imm = 2;
1134 else if (Name.startswith("ge"))
1135 Imm = 3;
1136 else if (Name.startswith("eq"))
1137 Imm = 4;
1138 else if (Name.startswith("ne"))
1139 Imm = 5;
1140 else if (Name.startswith("false"))
1141 Imm = 6;
1142 else if (Name.startswith("true"))
1143 Imm = 7;
1144 else
1145 llvm_unreachable("Unknown condition");
1146
1147 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
1148 Rep =
1149 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
1150 Builder.getInt8(Imm)});
1151 } else if (IsX86 && Name.startswith("xop.vpcmov")) {
1152 Value *Sel = CI->getArgOperand(2);
1153 Value *NotSel = Builder.CreateNot(Sel);
1154 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
1155 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
1156 Rep = Builder.CreateOr(Sel0, Sel1);
1157 } else if (IsX86 && Name == "sse42.crc32.64.8") {
1158 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
1159 Intrinsic::x86_sse42_crc32_32_8);
1160 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
1161 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
1162 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
1163 } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
1164 // Replace broadcasts with a series of insertelements.
1165 Type *VecTy = CI->getType();
1166 Type *EltTy = VecTy->getVectorElementType();
1167 unsigned EltNum = VecTy->getVectorNumElements();
1168 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
1169 EltTy->getPointerTo());
1170 Value *Load = Builder.CreateLoad(EltTy, Cast);
1171 Type *I32Ty = Type::getInt32Ty(C);
1172 Rep = UndefValue::get(VecTy);
1173 for (unsigned I = 0; I < EltNum; ++I)
1174 Rep = Builder.CreateInsertElement(Rep, Load,
1175 ConstantInt::get(I32Ty, I));
1176 } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
1177 Name.startswith("sse41.pmovzx") ||
1178 Name.startswith("avx2.pmovsx") ||
1179 Name.startswith("avx2.pmovzx") ||
1180 Name.startswith("avx512.mask.pmovsx") ||
1181 Name.startswith("avx512.mask.pmovzx"))) {
1182 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
1183 VectorType *DstTy = cast<VectorType>(CI->getType());
1184 unsigned NumDstElts = DstTy->getNumElements();
1185
1186 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
1187 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
1188 for (unsigned i = 0; i != NumDstElts; ++i)
1189 ShuffleMask[i] = i;
1190
1191 Value *SV = Builder.CreateShuffleVector(
1192 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
1193
1194 bool DoSext = (StringRef::npos != Name.find("pmovsx"));
1195 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
1196 : Builder.CreateZExt(SV, DstTy);
1197 // If there are 3 arguments, it's a masked intrinsic so we need a select.
1198 if (CI->getNumArgOperands() == 3)
1199 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1200 CI->getArgOperand(1));
1201 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
1202 Name == "avx2.vbroadcasti128")) {
1203 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
1204 Type *EltTy = CI->getType()->getVectorElementType();
1205 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
1206 Type *VT = VectorType::get(EltTy, NumSrcElts);
1207 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
1208 PointerType::getUnqual(VT));
1209 Value *Load = Builder.CreateAlignedLoad(Op, 1);
1210 if (NumSrcElts == 2)
1211 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1212 { 0, 1, 0, 1 });
1213 else
1214 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1215 { 0, 1, 2, 3, 0, 1, 2, 3 });
1216 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
1217 Name.startswith("avx2.vbroadcast") ||
1218 Name.startswith("avx512.pbroadcast") ||
1219 Name.startswith("avx512.mask.broadcast.s"))) {
1220 // Replace vp?broadcasts with a vector shuffle.
1221 Value *Op = CI->getArgOperand(0);
1222 unsigned NumElts = CI->getType()->getVectorNumElements();
1223 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
1224 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
1225 Constant::getNullValue(MaskTy));
1226
1227 if (CI->getNumArgOperands() == 3)
1228 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1229 CI->getArgOperand(1));
1230 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
1231 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1232 CI->getArgOperand(1),
1233 CI->getArgOperand(2),
1234 CI->getArgOperand(3),
1235 CI->getArgOperand(4),
1236 false);
1237 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
1238 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1239 CI->getArgOperand(1),
1240 CI->getArgOperand(2),
1241 CI->getArgOperand(3),
1242 CI->getArgOperand(4),
1243 true);
1244 } else if (IsX86 && (Name == "sse2.psll.dq" ||
1245 Name == "avx2.psll.dq")) {
1246 // 128/256-bit shift left specified in bits.
1247 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1248 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
1249 Shift / 8); // Shift is in bits.
1250 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
1251 Name == "avx2.psrl.dq")) {
1252 // 128/256-bit shift right specified in bits.
1253 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1254 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
1255 Shift / 8); // Shift is in bits.
1256 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
1257 Name == "avx2.psll.dq.bs" ||
1258 Name == "avx512.psll.dq.512")) {
1259 // 128/256/512-bit shift left specified in bytes.
1260 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1261 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1262 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
1263 Name == "avx2.psrl.dq.bs" ||
1264 Name == "avx512.psrl.dq.512")) {
1265 // 128/256/512-bit shift right specified in bytes.
1266 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1267 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
1268 } else if (IsX86 && (Name == "sse41.pblendw" ||
1269 Name.startswith("sse41.blendp") ||
1270 Name.startswith("avx.blend.p") ||
1271 Name == "avx2.pblendw" ||
1272 Name.startswith("avx2.pblendd."))) {
1273 Value *Op0 = CI->getArgOperand(0);
1274 Value *Op1 = CI->getArgOperand(1);
1275 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1276 VectorType *VecTy = cast<VectorType>(CI->getType());
1277 unsigned NumElts = VecTy->getNumElements();
1278
1279 SmallVector<uint32_t, 16> Idxs(NumElts);
1280 for (unsigned i = 0; i != NumElts; ++i)
1281 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
1282
1283 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1284 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
1285 Name == "avx2.vinserti128" ||
1286 Name.startswith("avx512.mask.insert"))) {
1287 Value *Op0 = CI->getArgOperand(0);
1288 Value *Op1 = CI->getArgOperand(1);
1289 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1290 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1291 unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
1292 unsigned Scale = DstNumElts / SrcNumElts;
1293
1294 // Mask off the high bits of the immediate value; hardware ignores those.
1295 Imm = Imm % Scale;
1296
1297 // Extend the second operand into a vector the size of the destination.
1298 Value *UndefV = UndefValue::get(Op1->getType());
1299 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1300 for (unsigned i = 0; i != SrcNumElts; ++i)
1301 Idxs[i] = i;
1302 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
1303 Idxs[i] = SrcNumElts;
1304 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
1305
1306 // Insert the second operand into the first operand.
1307
1308 // Note that there is no guarantee that instruction lowering will actually
1309 // produce a vinsertf128 instruction for the created shuffles. In
1310 // particular, the 0 immediate case involves no lane changes, so it can
1311 // be handled as a blend.
1312
1313 // Example of shuffle mask for 32-bit elements:
1314 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1315 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
1316
1317 // First fill with identify mask.
1318 for (unsigned i = 0; i != DstNumElts; ++i)
1319 Idxs[i] = i;
1320 // Then replace the elements where we need to insert.
1321 for (unsigned i = 0; i != SrcNumElts; ++i)
1322 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
1323 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
1324
1325 // If the intrinsic has a mask operand, handle that.
1326 if (CI->getNumArgOperands() == 5)
1327 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1328 CI->getArgOperand(3));
1329 } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
1330 Name == "avx2.vextracti128" ||
1331 Name.startswith("avx512.mask.vextract"))) {
1332 Value *Op0 = CI->getArgOperand(0);
1333 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1334 unsigned DstNumElts = CI->getType()->getVectorNumElements();
1335 unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
1336 unsigned Scale = SrcNumElts / DstNumElts;
1337
1338 // Mask off the high bits of the immediate value; hardware ignores those.
1339 Imm = Imm % Scale;
1340
1341 // Get indexes for the subvector of the input vector.
1342 SmallVector<uint32_t, 8> Idxs(DstNumElts);
1343 for (unsigned i = 0; i != DstNumElts; ++i) {
1344 Idxs[i] = i + (Imm * DstNumElts);
1345 }
1346 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1347
1348 // If the intrinsic has a mask operand, handle that.
1349 if (CI->getNumArgOperands() == 4)
1350 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1351 CI->getArgOperand(2));
1352 } else if (!IsX86 && Name == "stackprotectorcheck") {
1353 Rep = nullptr;
1354 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
1355 Name.startswith("avx512.mask.perm.di."))) {
1356 Value *Op0 = CI->getArgOperand(0);
1357 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1358 VectorType *VecTy = cast<VectorType>(CI->getType());
1359 unsigned NumElts = VecTy->getNumElements();
1360
1361 SmallVector<uint32_t, 8> Idxs(NumElts);
1362 for (unsigned i = 0; i != NumElts; ++i)
1363 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
1364
1365 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1366
1367 if (CI->getNumArgOperands() == 4)
1368 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1369 CI->getArgOperand(2));
1370 } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
1371 Name == "sse2.pshuf.d" ||
1372 Name.startswith("avx512.mask.vpermil.p") ||
1373 Name.startswith("avx512.mask.pshuf.d."))) {
1374 Value *Op0 = CI->getArgOperand(0);
1375 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1376 VectorType *VecTy = cast<VectorType>(CI->getType());
1377 unsigned NumElts = VecTy->getNumElements();
1378 // Calculate the size of each index in the immediate.
1379 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
1380 unsigned IdxMask = ((1 << IdxSize) - 1);
1381
1382 SmallVector<uint32_t, 8> Idxs(NumElts);
1383 // Lookup the bits for this element, wrapping around the immediate every
1384 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
1385 // to offset by the first index of each group.
1386 for (unsigned i = 0; i != NumElts; ++i)
1387 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
1388
1389 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1390
1391 if (CI->getNumArgOperands() == 4)
1392 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1393 CI->getArgOperand(2));
1394 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
1395 Name.startswith("avx512.mask.pshufl.w."))) {
1396 Value *Op0 = CI->getArgOperand(0);
1397 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1398 unsigned NumElts = CI->getType()->getVectorNumElements();
1399
1400 SmallVector<uint32_t, 16> Idxs(NumElts);
1401 for (unsigned l = 0; l != NumElts; l += 8) {
1402 for (unsigned i = 0; i != 4; ++i)
1403 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
1404 for (unsigned i = 4; i != 8; ++i)
1405 Idxs[i + l] = i + l;
1406 }
1407
1408 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1409
1410 if (CI->getNumArgOperands() == 4)
1411 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1412 CI->getArgOperand(2));
1413 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
1414 Name.startswith("avx512.mask.pshufh.w."))) {
1415 Value *Op0 = CI->getArgOperand(0);
1416 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
1417 unsigned NumElts = CI->getType()->getVectorNumElements();
1418
1419 SmallVector<uint32_t, 16> Idxs(NumElts);
1420 for (unsigned l = 0; l != NumElts; l += 8) {
1421 for (unsigned i = 0; i != 4; ++i)
1422 Idxs[i + l] = i + l;
1423 for (unsigned i = 0; i != 4; ++i)
1424 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
1425 }
1426
1427 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1428
1429 if (CI->getNumArgOperands() == 4)
1430 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1431 CI->getArgOperand(2));
1432 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
1433 Value *Op0 = CI->getArgOperand(0);
1434 Value *Op1 = CI->getArgOperand(1);
1435 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1436 unsigned NumElts = CI->getType()->getVectorNumElements();
1437
1438 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1439 unsigned HalfLaneElts = NumLaneElts / 2;
1440
1441 SmallVector<uint32_t, 16> Idxs(NumElts);
1442 for (unsigned i = 0; i != NumElts; ++i) {
1443 // Base index is the starting element of the lane.
1444 Idxs[i] = i - (i % NumLaneElts);
1445 // If we are half way through the lane switch to the other source.
1446 if ((i % NumLaneElts) >= HalfLaneElts)
1447 Idxs[i] += NumElts;
1448 // Now select the specific element. By adding HalfLaneElts bits from
1449 // the immediate. Wrapping around the immediate every 8-bits.
1450 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
1451 }
1452
1453 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1454
1455 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
1456 CI->getArgOperand(3));
1457 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
1458 Name.startswith("avx512.mask.movshdup") ||
1459 Name.startswith("avx512.mask.movsldup"))) {
1460 Value *Op0 = CI->getArgOperand(0);
1461 unsigned NumElts = CI->getType()->getVectorNumElements();
1462 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1463
1464 unsigned Offset = 0;
1465 if (Name.startswith("avx512.mask.movshdup."))
1466 Offset = 1;
1467
1468 SmallVector<uint32_t, 16> Idxs(NumElts);
1469 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
1470 for (unsigned i = 0; i != NumLaneElts; i += 2) {
1471 Idxs[i + l + 0] = i + l + Offset;
1472 Idxs[i + l + 1] = i + l + Offset;
1473 }
1474
1475 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1476
1477 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1478 CI->getArgOperand(1));
1479 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
1480 Name.startswith("avx512.mask.unpckl."))) {
1481 Value *Op0 = CI->getArgOperand(0);
1482 Value *Op1 = CI->getArgOperand(1);
1483 int NumElts = CI->getType()->getVectorNumElements();
1484 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1485
1486 SmallVector<uint32_t, 64> Idxs(NumElts);
1487 for (int l = 0; l != NumElts; l += NumLaneElts)
1488 for (int i = 0; i != NumLaneElts; ++i)
1489 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
1490
1491 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1492
1493 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1494 CI->getArgOperand(2));
1495 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
1496 Name.startswith("avx512.mask.unpckh."))) {
1497 Value *Op0 = CI->getArgOperand(0);
1498 Value *Op1 = CI->getArgOperand(1);
1499 int NumElts = CI->getType()->getVectorNumElements();
1500 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1501
1502 SmallVector<uint32_t, 64> Idxs(NumElts);
1503 for (int l = 0; l != NumElts; l += NumLaneElts)
1504 for (int i = 0; i != NumLaneElts; ++i)
1505 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
1506
1507 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1508
1509 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1510 CI->getArgOperand(2));
1511 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
1512 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
1513 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1514 CI->getArgOperand(2));
1515 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
1516 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
1517 CI->getArgOperand(1));
1518 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1519 CI->getArgOperand(2));
1520 } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
1521 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
1522 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1523 CI->getArgOperand(2));
1524 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
1525 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
1526 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1527 CI->getArgOperand(2));
1528 } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
1529 VectorType *FTy = cast<VectorType>(CI->getType());
1530 VectorType *ITy = VectorType::getInteger(FTy);
1531 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1532 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1533 Rep = Builder.CreateBitCast(Rep, FTy);
1534 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1535 CI->getArgOperand(2));
1536 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
1537 VectorType *FTy = cast<VectorType>(CI->getType());
1538 VectorType *ITy = VectorType::getInteger(FTy);
1539 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
1540 Rep = Builder.CreateAnd(Rep,
1541 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1542 Rep = Builder.CreateBitCast(Rep, FTy);
1543 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1544 CI->getArgOperand(2));
1545 } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
1546 VectorType *FTy = cast<VectorType>(CI->getType());
1547 VectorType *ITy = VectorType::getInteger(FTy);
1548 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1549 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1550 Rep = Builder.CreateBitCast(Rep, FTy);
1551 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1552 CI->getArgOperand(2));
1553 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
1554 VectorType *FTy = cast<VectorType>(CI->getType());
1555 VectorType *ITy = VectorType::getInteger(FTy);
1556 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1557 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1558 Rep = Builder.CreateBitCast(Rep, FTy);
1559 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1560 CI->getArgOperand(2));
1561 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
1562 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1563 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1564 CI->getArgOperand(2));
1565 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
1566 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
1567 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1568 CI->getArgOperand(2));
1569 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
1570 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
1571 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1572 CI->getArgOperand(2));
1573 } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
1574 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
1575 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1576 CI->getArgOperand(2));
1577 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
1578 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
1579 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1580 CI->getArgOperand(2));
1581 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
1582 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
1583 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1584 CI->getArgOperand(2));
1585 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
1586 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
1587 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1588 CI->getArgOperand(2));
1589 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
1590 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1591 Intrinsic::ctlz,
1592 CI->getType()),
1593 { CI->getArgOperand(0), Builder.getInt1(false) });
1594 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1595 CI->getArgOperand(1));
1596 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
1597 Name.startswith("avx512.mask.min.p"))) {
1598 bool IsMin = Name[13] == 'i';
1599 VectorType *VecTy = cast<VectorType>(CI->getType());
1600 unsigned VecWidth = VecTy->getPrimitiveSizeInBits();
1601 unsigned EltWidth = VecTy->getScalarSizeInBits();
1602 Intrinsic::ID IID;
1603 if (!IsMin && VecWidth == 128 && EltWidth == 32)
1604 IID = Intrinsic::x86_sse_max_ps;
1605 else if (!IsMin && VecWidth == 128 && EltWidth == 64)
1606 IID = Intrinsic::x86_sse2_max_pd;
1607 else if (!IsMin && VecWidth == 256 && EltWidth == 32)
1608 IID = Intrinsic::x86_avx_max_ps_256;
1609 else if (!IsMin && VecWidth == 256 && EltWidth == 64)
1610 IID = Intrinsic::x86_avx_max_pd_256;
1611 else if (IsMin && VecWidth == 128 && EltWidth == 32)
1612 IID = Intrinsic::x86_sse_min_ps;
1613 else if (IsMin && VecWidth == 128 && EltWidth == 64)
1614 IID = Intrinsic::x86_sse2_min_pd;
1615 else if (IsMin && VecWidth == 256 && EltWidth == 32)
1616 IID = Intrinsic::x86_avx_min_ps_256;
1617 else if (IsMin && VecWidth == 256 && EltWidth == 64)
1618 IID = Intrinsic::x86_avx_min_pd_256;
1619 else
1620 llvm_unreachable("Unexpected intrinsic");
1621
1622 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1623 { CI->getArgOperand(0), CI->getArgOperand(1) });
1624 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1625 CI->getArgOperand(2));
1626 } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
1627 VectorType *VecTy = cast<VectorType>(CI->getType());
1628 Intrinsic::ID IID;
1629 if (VecTy->getPrimitiveSizeInBits() == 128)
1630 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1631 else if (VecTy->getPrimitiveSizeInBits() == 256)
1632 IID = Intrinsic::x86_avx2_pshuf_b;
1633 else if (VecTy->getPrimitiveSizeInBits() == 512)
1634 IID = Intrinsic::x86_avx512_pshuf_b_512;
1635 else
1636 llvm_unreachable("Unexpected intrinsic");
1637
1638 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1639 { CI->getArgOperand(0), CI->getArgOperand(1) });
1640 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1641 CI->getArgOperand(2));
1642 } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") ||
1643 Name.startswith("avx512.mask.pmulu.dq."))) {
1644 bool IsUnsigned = Name[16] == 'u';
1645 VectorType *VecTy = cast<VectorType>(CI->getType());
1646 Intrinsic::ID IID;
1647 if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1648 IID = Intrinsic::x86_sse41_pmuldq;
1649 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1650 IID = Intrinsic::x86_avx2_pmul_dq;
1651 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1652 IID = Intrinsic::x86_avx512_pmul_dq_512;
1653 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1654 IID = Intrinsic::x86_sse2_pmulu_dq;
1655 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1656 IID = Intrinsic::x86_avx2_pmulu_dq;
1657 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1658 IID = Intrinsic::x86_avx512_pmulu_dq_512;
1659 else
1660 llvm_unreachable("Unexpected intrinsic");
1661
1662 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1663 { CI->getArgOperand(0), CI->getArgOperand(1) });
1664 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1665 CI->getArgOperand(2));
1666 } else if (IsX86 && Name.startswith("avx512.mask.pack")) {
1667 bool IsUnsigned = Name[16] == 'u';
1668 bool IsDW = Name[18] == 'd';
1669 VectorType *VecTy = cast<VectorType>(CI->getType());
1670 Intrinsic::ID IID;
1671 if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1672 IID = Intrinsic::x86_sse2_packsswb_128;
1673 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1674 IID = Intrinsic::x86_avx2_packsswb;
1675 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1676 IID = Intrinsic::x86_avx512_packsswb_512;
1677 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1678 IID = Intrinsic::x86_sse2_packssdw_128;
1679 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1680 IID = Intrinsic::x86_avx2_packssdw;
1681 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1682 IID = Intrinsic::x86_avx512_packssdw_512;
1683 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1684 IID = Intrinsic::x86_sse2_packuswb_128;
1685 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1686 IID = Intrinsic::x86_avx2_packuswb;
1687 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1688 IID = Intrinsic::x86_avx512_packuswb_512;
1689 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1690 IID = Intrinsic::x86_sse41_packusdw;
1691 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1692 IID = Intrinsic::x86_avx2_packusdw;
1693 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1694 IID = Intrinsic::x86_avx512_packusdw_512;
1695 else
1696 llvm_unreachable("Unexpected intrinsic");
1697
1698 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1699 { CI->getArgOperand(0), CI->getArgOperand(1) });
1700 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1701 CI->getArgOperand(2));
1702 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
1703 bool IsImmediate = Name[16] == 'i' ||
1704 (Name.size() > 18 && Name[18] == 'i');
1705 bool IsVariable = Name[16] == 'v';
1706 char Size = Name[16] == '.' ? Name[17] :
1707 Name[17] == '.' ? Name[18] :
1708 Name[18] == '.' ? Name[19] :
1709 Name[20];
1710
1711 Intrinsic::ID IID;
1712 if (IsVariable && Name[17] != '.') {
1713 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
1714 IID = Intrinsic::x86_avx2_psllv_q;
1715 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
1716 IID = Intrinsic::x86_avx2_psllv_q_256;
1717 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
1718 IID = Intrinsic::x86_avx2_psllv_d;
1719 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
1720 IID = Intrinsic::x86_avx2_psllv_d_256;
1721 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
1722 IID = Intrinsic::x86_avx512_psllv_w_128;
1723 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
1724 IID = Intrinsic::x86_avx512_psllv_w_256;
1725 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
1726 IID = Intrinsic::x86_avx512_psllv_w_512;
1727 else
1728 llvm_unreachable("Unexpected size");
1729 } else if (Name.endswith(".128")) {
1730 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
1731 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
1732 : Intrinsic::x86_sse2_psll_d;
1733 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
1734 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
1735 : Intrinsic::x86_sse2_psll_q;
1736 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
1737 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
1738 : Intrinsic::x86_sse2_psll_w;
1739 else
1740 llvm_unreachable("Unexpected size");
1741 } else if (Name.endswith(".256")) {
1742 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
1743 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
1744 : Intrinsic::x86_avx2_psll_d;
1745 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
1746 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
1747 : Intrinsic::x86_avx2_psll_q;
1748 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
1749 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
1750 : Intrinsic::x86_avx2_psll_w;
1751 else
1752 llvm_unreachable("Unexpected size");
1753 } else {
1754 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
1755 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
1756 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
1757 Intrinsic::x86_avx512_psll_d_512;
1758 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
1759 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
1760 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
1761 Intrinsic::x86_avx512_psll_q_512;
1762 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
1763 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
1764 : Intrinsic::x86_avx512_psll_w_512;
1765 else
1766 llvm_unreachable("Unexpected size");
1767 }
1768
1769 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1770 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
1771 bool IsImmediate = Name[16] == 'i' ||
1772 (Name.size() > 18 && Name[18] == 'i');
1773 bool IsVariable = Name[16] == 'v';
1774 char Size = Name[16] == '.' ? Name[17] :
1775 Name[17] == '.' ? Name[18] :
1776 Name[18] == '.' ? Name[19] :
1777 Name[20];
1778
1779 Intrinsic::ID IID;
1780 if (IsVariable && Name[17] != '.') {
1781 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
1782 IID = Intrinsic::x86_avx2_psrlv_q;
1783 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
1784 IID = Intrinsic::x86_avx2_psrlv_q_256;
1785 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
1786 IID = Intrinsic::x86_avx2_psrlv_d;
1787 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
1788 IID = Intrinsic::x86_avx2_psrlv_d_256;
1789 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
1790 IID = Intrinsic::x86_avx512_psrlv_w_128;
1791 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
1792 IID = Intrinsic::x86_avx512_psrlv_w_256;
1793 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
1794 IID = Intrinsic::x86_avx512_psrlv_w_512;
1795 else
1796 llvm_unreachable("Unexpected size");
1797 } else if (Name.endswith(".128")) {
1798 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
1799 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
1800 : Intrinsic::x86_sse2_psrl_d;
1801 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
1802 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
1803 : Intrinsic::x86_sse2_psrl_q;
1804 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
1805 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
1806 : Intrinsic::x86_sse2_psrl_w;
1807 else
1808 llvm_unreachable("Unexpected size");
1809 } else if (Name.endswith(".256")) {
1810 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
1811 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
1812 : Intrinsic::x86_avx2_psrl_d;
1813 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
1814 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
1815 : Intrinsic::x86_avx2_psrl_q;
1816 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
1817 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
1818 : Intrinsic::x86_avx2_psrl_w;
1819 else
1820 llvm_unreachable("Unexpected size");
1821 } else {
1822 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
1823 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
1824 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
1825 Intrinsic::x86_avx512_psrl_d_512;
1826 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
1827 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
1828 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
1829 Intrinsic::x86_avx512_psrl_q_512;
1830 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
1831 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
1832 : Intrinsic::x86_avx512_psrl_w_512;
1833 else
1834 llvm_unreachable("Unexpected size");
1835 }
1836
1837 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1838 } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
1839 bool IsImmediate = Name[16] == 'i' ||
1840 (Name.size() > 18 && Name[18] == 'i');
1841 bool IsVariable = Name[16] == 'v';
1842 char Size = Name[16] == '.' ? Name[17] :
1843 Name[17] == '.' ? Name[18] :
1844 Name[18] == '.' ? Name[19] :
1845 Name[20];
1846
1847 Intrinsic::ID IID;
1848 if (IsVariable && Name[17] != '.') {
1849 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
1850 IID = Intrinsic::x86_avx2_psrav_d;
1851 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
1852 IID = Intrinsic::x86_avx2_psrav_d_256;
1853 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
1854 IID = Intrinsic::x86_avx512_psrav_w_128;
1855 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
1856 IID = Intrinsic::x86_avx512_psrav_w_256;
1857 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
1858 IID = Intrinsic::x86_avx512_psrav_w_512;
1859 else
1860 llvm_unreachable("Unexpected size");
1861 } else if (Name.endswith(".128")) {
1862 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
1863 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
1864 : Intrinsic::x86_sse2_psra_d;
1865 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
1866 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
1867 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
1868 Intrinsic::x86_avx512_psra_q_128;
1869 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
1870 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
1871 : Intrinsic::x86_sse2_psra_w;
1872 else
1873 llvm_unreachable("Unexpected size");
1874 } else if (Name.endswith(".256")) {
1875 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
1876 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
1877 : Intrinsic::x86_avx2_psra_d;
1878 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
1879 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
1880 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
1881 Intrinsic::x86_avx512_psra_q_256;
1882 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
1883 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
1884 : Intrinsic::x86_avx2_psra_w;
1885 else
1886 llvm_unreachable("Unexpected size");
1887 } else {
1888 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
1889 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
1890 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
1891 Intrinsic::x86_avx512_psra_d_512;
1892 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
1893 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
1894 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
1895 Intrinsic::x86_avx512_psra_q_512;
1896 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
1897 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
1898 : Intrinsic::x86_avx512_psra_w_512;
1899 else
1900 llvm_unreachable("Unexpected size");
1901 }
1902
1903 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
1904 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
1905 Rep = upgradeMaskedMove(Builder, *CI);
1906 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
1907 Rep = UpgradeMaskToInt(Builder, *CI);
1908 } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
1909 Intrinsic::ID IID;
1910 if (Name.endswith("ps.128"))
1911 IID = Intrinsic::x86_avx_vpermilvar_ps;
1912 else if (Name.endswith("pd.128"))
1913 IID = Intrinsic::x86_avx_vpermilvar_pd;
1914 else if (Name.endswith("ps.256"))
1915 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1916 else if (Name.endswith("pd.256"))
1917 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1918 else if (Name.endswith("ps.512"))
1919 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1920 else if (Name.endswith("pd.512"))
1921 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1922 else
1923 llvm_unreachable("Unexpected vpermilvar intrinsic");
1924
1925 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
1926 Rep = Builder.CreateCall(Intrin,
1927 { CI->getArgOperand(0), CI->getArgOperand(1) });
1928 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1929 CI->getArgOperand(2));
1930 } else if (IsX86 && Name.endswith(".movntdqa")) {
1931 Module *M = F->getParent();
1932 MDNode *Node = MDNode::get(
1933 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1934
1935 Value *Ptr = CI->getArgOperand(0);
1936 VectorType *VTy = cast<VectorType>(CI->getType());
1937
1938 // Convert the type of the pointer to a pointer to the stored type.
1939 Value *BC =
1940 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
1941 LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
1942 LI->setMetadata(M->getMDKindID("nontemporal"), Node);
1943 Rep = LI;
1944 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
1945 Value *Arg = CI->getArgOperand(0);
1946 Value *Neg = Builder.CreateNeg(Arg, "neg");
1947 Value *Cmp = Builder.CreateICmpSGE(
1948 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
1949 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
1950 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
1951 Name == "max.ui" || Name == "max.ull")) {
1952 Value *Arg0 = CI->getArgOperand(0);
1953 Value *Arg1 = CI->getArgOperand(1);
1954 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
1955 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
1956 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
1957 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
1958 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
1959 Name == "min.ui" || Name == "min.ull")) {
1960 Value *Arg0 = CI->getArgOperand(0);
1961 Value *Arg1 = CI->getArgOperand(1);
1962 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
1963 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
1964 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
1965 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
1966 } else if (IsNVVM && Name == "clz.ll") {
1967 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
1968 Value *Arg = CI->getArgOperand(0);
1969 Value *Ctlz = Builder.CreateCall(
1970 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
1971 {Arg->getType()}),
1972 {Arg, Builder.getFalse()}, "ctlz");
1973 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
1974 } else if (IsNVVM && Name == "popc.ll") {
1975 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
1976 // i64.
1977 Value *Arg = CI->getArgOperand(0);
1978 Value *Popc = Builder.CreateCall(
1979 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
1980 {Arg->getType()}),
1981 Arg, "ctpop");
1982 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
1983 } else if (IsNVVM && Name == "h2f") {
1984 Rep = Builder.CreateCall(Intrinsic::getDeclaration(
1985 F->getParent(), Intrinsic::convert_from_fp16,
1986 {Builder.getFloatTy()}),
1987 CI->getArgOperand(0), "h2f");
1988 } else {
1989 llvm_unreachable("Unknown function for CallInst upgrade.");
1990 }
1991
1992 if (Rep)
1993 CI->replaceAllUsesWith(Rep);
1994 CI->eraseFromParent();
1995 return;
1996 }
1997
1998 CallInst *NewCall = nullptr;
1999 switch (NewFn->getIntrinsicID()) {
2000 default: {
2001 // Handle generic mangling change, but nothing else
2002 assert(
2003 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
2004 "Unknown function for CallInst upgrade and isn't just a name change");
2005 CI->setCalledFunction(NewFn);
2006 return;
2007 }
2008
2009 case Intrinsic::arm_neon_vld1:
2010 case Intrinsic::arm_neon_vld2:
2011 case Intrinsic::arm_neon_vld3:
2012 case Intrinsic::arm_neon_vld4:
2013 case Intrinsic::arm_neon_vld2lane:
2014 case Intrinsic::arm_neon_vld3lane:
2015 case Intrinsic::arm_neon_vld4lane:
2016 case Intrinsic::arm_neon_vst1:
2017 case Intrinsic::arm_neon_vst2:
2018 case Intrinsic::arm_neon_vst3:
2019 case Intrinsic::arm_neon_vst4:
2020 case Intrinsic::arm_neon_vst2lane:
2021 case Intrinsic::arm_neon_vst3lane:
2022 case Intrinsic::arm_neon_vst4lane: {
2023 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2024 CI->arg_operands().end());
2025 NewCall = Builder.CreateCall(NewFn, Args);
2026 break;
2027 }
2028
2029 case Intrinsic::bitreverse:
2030 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2031 break;
2032
2033 case Intrinsic::ctlz:
2034 case Intrinsic::cttz:
2035 assert(CI->getNumArgOperands() == 1 &&
2036 "Mismatch between function args and call args");
2037 NewCall =
2038 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
2039 break;
2040
2041 case Intrinsic::objectsize: {
2042 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
2043 ? Builder.getFalse()
2044 : CI->getArgOperand(2);
2045 NewCall = Builder.CreateCall(
2046 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
2047 break;
2048 }
2049
2050 case Intrinsic::ctpop:
2051 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2052 break;
2053
2054 case Intrinsic::convert_from_fp16:
2055 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2056 break;
2057
2058 case Intrinsic::x86_xop_vfrcz_ss:
2059 case Intrinsic::x86_xop_vfrcz_sd:
2060 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
2061 break;
2062
2063 case Intrinsic::x86_xop_vpermil2pd:
2064 case Intrinsic::x86_xop_vpermil2ps:
2065 case Intrinsic::x86_xop_vpermil2pd_256:
2066 case Intrinsic::x86_xop_vpermil2ps_256: {
2067 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2068 CI->arg_operands().end());
2069 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
2070 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
2071 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
2072 NewCall = Builder.CreateCall(NewFn, Args);
2073 break;
2074 }
2075
2076 case Intrinsic::x86_sse41_ptestc:
2077 case Intrinsic::x86_sse41_ptestz:
2078 case Intrinsic::x86_sse41_ptestnzc: {
2079 // The arguments for these intrinsics used to be v4f32, and changed
2080 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
2081 // So, the only thing required is a bitcast for both arguments.
2082 // First, check the arguments have the old type.
2083 Value *Arg0 = CI->getArgOperand(0);
2084 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
2085 return;
2086
2087 // Old intrinsic, add bitcasts
2088 Value *Arg1 = CI->getArgOperand(1);
2089
2090 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
2091
2092 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
2093 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2094
2095 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
2096 break;
2097 }
2098
2099 case Intrinsic::x86_sse41_insertps:
2100 case Intrinsic::x86_sse41_dppd:
2101 case Intrinsic::x86_sse41_dpps:
2102 case Intrinsic::x86_sse41_mpsadbw:
2103 case Intrinsic::x86_avx_dp_ps_256:
2104 case Intrinsic::x86_avx2_mpsadbw: {
2105 // Need to truncate the last argument from i32 to i8 -- this argument models
2106 // an inherently 8-bit immediate operand to these x86 instructions.
2107 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2108 CI->arg_operands().end());
2109
2110 // Replace the last argument with a trunc.
2111 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
2112 NewCall = Builder.CreateCall(NewFn, Args);
2113 break;
2114 }
2115
2116 case Intrinsic::thread_pointer: {
2117 NewCall = Builder.CreateCall(NewFn, {});
2118 break;
2119 }
2120
2121 case Intrinsic::invariant_start:
2122 case Intrinsic::invariant_end:
2123 case Intrinsic::masked_load:
2124 case Intrinsic::masked_store:
2125 case Intrinsic::masked_gather:
2126 case Intrinsic::masked_scatter: {
2127 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
2128 CI->arg_operands().end());
2129 NewCall = Builder.CreateCall(NewFn, Args);
2130 break;
2131 }
2132 }
2133 assert(NewCall && "Should have either set this variable or returned through "
2134 "the default case");
2135 std::string Name = CI->getName();
2136 if (!Name.empty()) {
2137 CI->setName(Name + ".old");
2138 NewCall->setName(Name);
2139 }
2140 CI->replaceAllUsesWith(NewCall);
2141 CI->eraseFromParent();
2142}
2143
2144void llvm::UpgradeCallsToIntrinsic(Function *F) {
2145 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
2146
2147 // Check if this function should be upgraded and get the replacement function
2148 // if there is one.
2149 Function *NewFn;
2150 if (UpgradeIntrinsicFunction(F, NewFn)) {
2151 // Replace all users of the old function with the new function or new
2152 // instructions. This is not a range loop because the call is deleted.
2153 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
2154 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
2155 UpgradeIntrinsicCall(CI, NewFn);
2156
2157 // Remove old function, no longer used, from the module.
2158 F->eraseFromParent();
2159 }
2160}
2161
2162MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
2163 // Check if the tag uses struct-path aware TBAA format.
2164 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
2165 return &MD;
2166
2167 auto &Context = MD.getContext();
2168 if (MD.getNumOperands() == 3) {
2169 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
2170 MDNode *ScalarType = MDNode::get(Context, Elts);
2171 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
2172 Metadata *Elts2[] = {ScalarType, ScalarType,
2173 ConstantAsMetadata::get(
2174 Constant::getNullValue(Type::getInt64Ty(Context))),
2175 MD.getOperand(2)};
2176 return MDNode::get(Context, Elts2);
2177 }
2178 // Create a MDNode <MD, MD, offset 0>
2179 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
2180 Type::getInt64Ty(Context)))};
2181 return MDNode::get(Context, Elts);
2182}
2183
2184Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
2185 Instruction *&Temp) {
2186 if (Opc != Instruction::BitCast)
2187 return nullptr;
2188
2189 Temp = nullptr;
2190 Type *SrcTy = V->getType();
2191 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
2192 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
2193 LLVMContext &Context = V->getContext();
2194
2195 // We have no information about target data layout, so we assume that
2196 // the maximum pointer size is 64bit.
2197 Type *MidTy = Type::getInt64Ty(Context);
2198 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
2199
2200 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
2201 }
2202
2203 return nullptr;
2204}
2205
2206Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
2207 if (Opc != Instruction::BitCast)
2208 return nullptr;
2209
2210 Type *SrcTy = C->getType();
2211 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
2212 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
2213 LLVMContext &Context = C->getContext();
2214
2215 // We have no information about target data layout, so we assume that
2216 // the maximum pointer size is 64bit.
2217 Type *MidTy = Type::getInt64Ty(Context);
2218
2219 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
2220 DestTy);
2221 }
2222
2223 return nullptr;
2224}
2225
2226/// Check the debug info version number, if it is out-dated, drop the debug
2227/// info. Return true if module is modified.
2228bool llvm::UpgradeDebugInfo(Module &M) {
2229 unsigned Version = getDebugMetadataVersionFromModule(M);
2230 if (Version == DEBUG_METADATA_VERSION)
2231 return false;
2232
2233 bool RetCode = StripDebugInfo(M);
2234 if (RetCode) {
2235 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
2236 M.getContext().diagnose(DiagVersion);
2237 }
2238 return RetCode;
2239}
2240
2241bool llvm::UpgradeModuleFlags(Module &M) {
2242 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
2243 if (!ModFlags)
2244 return false;
2245
2246 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
2247 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
2248 MDNode *Op = ModFlags->getOperand(I);
2249 if (Op->getNumOperands() != 3)
2250 continue;
2251 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
2252 if (!ID)
2253 continue;
2254 if (ID->getString() == "Objective-C Image Info Version")
2255 HasObjCFlag = true;
2256 if (ID->getString() == "Objective-C Class Properties")
2257 HasClassProperties = true;
2258 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
2259 // field was Error and now they are Max.
2260 if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
2261 if (auto *Behavior =
2262 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
2263 if (Behavior->getLimitedValue() == Module::Error) {
2264 Type *Int32Ty = Type::getInt32Ty(M.getContext());
2265 Metadata *Ops[3] = {
2266 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
2267 MDString::get(M.getContext(), ID->getString()),
2268 Op->getOperand(2)};
2269 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
2270 Changed = true;
2271 }
2272 }
2273 }
2274 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
2275 // section name so that llvm-lto will not complain about mismatching
2276 // module flags that is functionally the same.
2277 if (ID->getString() == "Objective-C Image Info Section") {
2278 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
2279 SmallVector<StringRef, 4> ValueComp;
2280 Value->getString().split(ValueComp, " ");
2281 if (ValueComp.size() != 1) {
2282 std::string NewValue;
2283 for (auto &S : ValueComp)
2284 NewValue += S.str();
2285 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
2286 MDString::get(M.getContext(), NewValue)};
2287 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
2288 Changed = true;
2289 }
2290 }
2291 }
2274 }
2275
2276 // "Objective-C Class Properties" is recently added for Objective-C. We
2277 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
2278 // flag of value 0, so we can correclty downgrade this flag when trying to
2279 // link an ObjC bitcode without this module flag with an ObjC bitcode with
2280 // this module flag.
2281 if (HasObjCFlag && !HasClassProperties) {
2282 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
2283 (uint32_t)0);
2284 Changed = true;
2285 }
2286
2287 return Changed;
2288}
2289
2292 }
2293
2294 // "Objective-C Class Properties" is recently added for Objective-C. We
2295 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
2296 // flag of value 0, so we can correclty downgrade this flag when trying to
2297 // link an ObjC bitcode without this module flag with an ObjC bitcode with
2298 // this module flag.
2299 if (HasObjCFlag && !HasClassProperties) {
2300 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
2301 (uint32_t)0);
2302 Changed = true;
2303 }
2304
2305 return Changed;
2306}
2307
2308void llvm::UpgradeSectionAttributes(Module &M) {
2309 auto TrimSpaces = [](StringRef Section) -> std::string {
2310 SmallVector<StringRef, 5> Components;
2311 Section.split(Components, ',');
2312
2313 SmallString<32> Buffer;
2314 raw_svector_ostream OS(Buffer);
2315
2316 for (auto Component : Components)
2317 OS << ',' << Component.trim();
2318
2319 return OS.str().substr(1);
2320 };
2321
2322 for (auto &GV : M.globals()) {
2323 if (!GV.hasSection())
2324 continue;
2325
2326 StringRef Section = GV.getSection();
2327
2328 if (!Section.startswith("__DATA, __objc_catlist"))
2329 continue;
2330
2331 // __DATA, __objc_catlist, regular, no_dead_strip
2332 // __DATA,__objc_catlist,regular,no_dead_strip
2333 GV.setSection(TrimSpaces(Section));
2334 }
2335}
2336
2290static bool isOldLoopArgument(Metadata *MD) {
2291 auto *T = dyn_cast_or_null<MDTuple>(MD);
2292 if (!T)
2293 return false;
2294 if (T->getNumOperands() < 1)
2295 return false;
2296 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
2297 if (!S)
2298 return false;
2299 return S->getString().startswith("llvm.vectorizer.");
2300}
2301
2302static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
2303 StringRef OldPrefix = "llvm.vectorizer.";
2304 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
2305
2306 if (OldTag == "llvm.vectorizer.unroll")
2307 return MDString::get(C, "llvm.loop.interleave.count");
2308
2309 return MDString::get(
2310 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
2311 .str());
2312}
2313
2314static Metadata *upgradeLoopArgument(Metadata *MD) {
2315 auto *T = dyn_cast_or_null<MDTuple>(MD);
2316 if (!T)
2317 return MD;
2318 if (T->getNumOperands() < 1)
2319 return MD;
2320 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
2321 if (!OldTag)
2322 return MD;
2323 if (!OldTag->getString().startswith("llvm.vectorizer."))
2324 return MD;
2325
2326 // This has an old tag. Upgrade it.
2327 SmallVector<Metadata *, 8> Ops;
2328 Ops.reserve(T->getNumOperands());
2329 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
2330 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
2331 Ops.push_back(T->getOperand(I));
2332
2333 return MDTuple::get(T->getContext(), Ops);
2334}
2335
2336MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
2337 auto *T = dyn_cast<MDTuple>(&N);
2338 if (!T)
2339 return &N;
2340
2341 if (none_of(T->operands(), isOldLoopArgument))
2342 return &N;
2343
2344 SmallVector<Metadata *, 8> Ops;
2345 Ops.reserve(T->getNumOperands());
2346 for (Metadata *MD : T->operands())
2347 Ops.push_back(upgradeLoopArgument(MD));
2348
2349 return MDTuple::get(T->getContext(), Ops);
2350}
2337static bool isOldLoopArgument(Metadata *MD) {
2338 auto *T = dyn_cast_or_null<MDTuple>(MD);
2339 if (!T)
2340 return false;
2341 if (T->getNumOperands() < 1)
2342 return false;
2343 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
2344 if (!S)
2345 return false;
2346 return S->getString().startswith("llvm.vectorizer.");
2347}
2348
2349static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
2350 StringRef OldPrefix = "llvm.vectorizer.";
2351 assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
2352
2353 if (OldTag == "llvm.vectorizer.unroll")
2354 return MDString::get(C, "llvm.loop.interleave.count");
2355
2356 return MDString::get(
2357 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
2358 .str());
2359}
2360
2361static Metadata *upgradeLoopArgument(Metadata *MD) {
2362 auto *T = dyn_cast_or_null<MDTuple>(MD);
2363 if (!T)
2364 return MD;
2365 if (T->getNumOperands() < 1)
2366 return MD;
2367 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
2368 if (!OldTag)
2369 return MD;
2370 if (!OldTag->getString().startswith("llvm.vectorizer."))
2371 return MD;
2372
2373 // This has an old tag. Upgrade it.
2374 SmallVector<Metadata *, 8> Ops;
2375 Ops.reserve(T->getNumOperands());
2376 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
2377 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
2378 Ops.push_back(T->getOperand(I));
2379
2380 return MDTuple::get(T->getContext(), Ops);
2381}
2382
2383MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
2384 auto *T = dyn_cast<MDTuple>(&N);
2385 if (!T)
2386 return &N;
2387
2388 if (none_of(T->operands(), isOldLoopArgument))
2389 return &N;
2390
2391 SmallVector<Metadata *, 8> Ops;
2392 Ops.reserve(T->getNumOperands());
2393 for (Metadata *MD : T->operands())
2394 Ops.push_back(upgradeLoopArgument(MD));
2395
2396 return MDTuple::get(T->getContext(), Ops);
2397}