1249259Sdim//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// 2249259Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6249259Sdim// 7249259Sdim//===----------------------------------------------------------------------===// 8249259Sdim// 9288943Sdim// This file implements the auto-upgrade helper functions. 10288943Sdim// This is where deprecated IR intrinsics and other IR features are updated to 11288943Sdim// current specifications. 12249259Sdim// 13249259Sdim//===----------------------------------------------------------------------===// 14249259Sdim 15276479Sdim#include "llvm/IR/AutoUpgrade.h" 16321369Sdim#include "llvm/ADT/StringSwitch.h" 17249259Sdim#include "llvm/IR/Constants.h" 18280031Sdim#include "llvm/IR/DIBuilder.h" 19276479Sdim#include "llvm/IR/DebugInfo.h" 20276479Sdim#include "llvm/IR/DiagnosticInfo.h" 21249259Sdim#include "llvm/IR/Function.h" 22249259Sdim#include "llvm/IR/IRBuilder.h" 23249259Sdim#include "llvm/IR/Instruction.h" 24341825Sdim#include "llvm/IR/IntrinsicInst.h" 25360784Sdim#include "llvm/IR/IntrinsicsAArch64.h" 26360784Sdim#include "llvm/IR/IntrinsicsARM.h" 27360784Sdim#include "llvm/IR/IntrinsicsX86.h" 28249259Sdim#include "llvm/IR/LLVMContext.h" 29249259Sdim#include "llvm/IR/Module.h" 30327952Sdim#include "llvm/IR/Verifier.h" 31249259Sdim#include "llvm/Support/ErrorHandling.h" 32296417Sdim#include "llvm/Support/Regex.h" 33249259Sdim#include <cstring> 34249259Sdimusing namespace llvm; 35249259Sdim 36314564Sdimstatic void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); } 37314564Sdim 38321369Sdim// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have 39249259Sdim// changed their type from v4f32 to v2i64. 40321369Sdimstatic bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID, 41321369Sdim Function *&NewFn) { 42249259Sdim // Check whether this is an old version of the function, which received 43249259Sdim // v4f32 arguments. 44249259Sdim Type *Arg0Type = F->getFunctionType()->getParamType(0); 45249259Sdim if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) 46249259Sdim return false; 47249259Sdim 48249259Sdim // Yes, it's old, replace it with new version. 49314564Sdim rename(F); 50249259Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 51249259Sdim return true; 52249259Sdim} 53249259Sdim 54280031Sdim// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask 55280031Sdim// arguments have changed their type from i32 to i8. 56280031Sdimstatic bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, 57280031Sdim Function *&NewFn) { 58280031Sdim // Check that the last argument is an i32. 59280031Sdim Type *LastArgType = F->getFunctionType()->getParamType( 60280031Sdim F->getFunctionType()->getNumParams() - 1); 61280031Sdim if (!LastArgType->isIntegerTy(32)) 62280031Sdim return false; 63280031Sdim 64280031Sdim // Move this function aside and map down. 65314564Sdim rename(F); 66280031Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 67280031Sdim return true; 68280031Sdim} 69280031Sdim 70321369Sdimstatic bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { 71321369Sdim // All of the intrinsics matches below should be marked with which llvm 72321369Sdim // version started autoupgrading them. At some point in the future we would 73321369Sdim // like to use this information to remove upgrade code for some older 74321369Sdim // intrinsics. It is currently undecided how we will determine that future 75321369Sdim // point. 76344779Sdim if (Name == "addcarryx.u32" || // Added in 8.0 77344779Sdim Name == "addcarryx.u64" || // Added in 8.0 78344779Sdim Name == "addcarry.u32" || // Added in 8.0 79344779Sdim Name == "addcarry.u64" || // Added in 8.0 80344779Sdim Name == "subborrow.u32" || // Added in 8.0 81344779Sdim Name == "subborrow.u64" || // Added in 8.0 82344779Sdim Name.startswith("sse2.padds.") || // Added in 8.0 83344779Sdim Name.startswith("sse2.psubs.") || // Added in 8.0 84344779Sdim Name.startswith("sse2.paddus.") || // Added in 8.0 85344779Sdim Name.startswith("sse2.psubus.") || // Added in 8.0 86344779Sdim Name.startswith("avx2.padds.") || // Added in 8.0 87344779Sdim Name.startswith("avx2.psubs.") || // Added in 8.0 88344779Sdim Name.startswith("avx2.paddus.") || // Added in 8.0 89344779Sdim Name.startswith("avx2.psubus.") || // Added in 8.0 90344779Sdim Name.startswith("avx512.padds.") || // Added in 8.0 91344779Sdim Name.startswith("avx512.psubs.") || // Added in 8.0 92344779Sdim Name.startswith("avx512.mask.padds.") || // Added in 8.0 93344779Sdim Name.startswith("avx512.mask.psubs.") || // Added in 8.0 94344779Sdim Name.startswith("avx512.mask.paddus.") || // Added in 8.0 95344779Sdim Name.startswith("avx512.mask.psubus.") || // Added in 8.0 96344779Sdim Name=="ssse3.pabs.b.128" || // Added in 6.0 97327952Sdim Name=="ssse3.pabs.w.128" || // Added in 6.0 98327952Sdim Name=="ssse3.pabs.d.128" || // Added in 6.0 99341825Sdim Name.startswith("fma4.vfmadd.s") || // Added in 7.0 100341825Sdim Name.startswith("fma.vfmadd.") || // Added in 7.0 101341825Sdim Name.startswith("fma.vfmsub.") || // Added in 7.0 102341825Sdim Name.startswith("fma.vfmaddsub.") || // Added in 7.0 103341825Sdim Name.startswith("fma.vfmsubadd.") || // Added in 7.0 104341825Sdim Name.startswith("fma.vfnmadd.") || // Added in 7.0 105341825Sdim Name.startswith("fma.vfnmsub.") || // Added in 7.0 106341825Sdim Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0 107341825Sdim Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0 108341825Sdim Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0 109341825Sdim Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0 110341825Sdim Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0 111341825Sdim Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0 112341825Sdim Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0 113341825Sdim Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0 114341825Sdim Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0 115341825Sdim Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0 116341825Sdim Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0 117327952Sdim Name.startswith("avx512.mask.shuf.i") || // Added in 6.0 118327952Sdim Name.startswith("avx512.mask.shuf.f") || // Added in 6.0 119341825Sdim Name.startswith("avx512.kunpck") || //added in 6.0 120327952Sdim Name.startswith("avx2.pabs.") || // Added in 6.0 121327952Sdim Name.startswith("avx512.mask.pabs.") || // Added in 6.0 122327952Sdim Name.startswith("avx512.broadcastm") || // Added in 6.0 123341825Sdim Name == "sse.sqrt.ss" || // Added in 7.0 124341825Sdim Name == "sse2.sqrt.sd" || // Added in 7.0 125341825Sdim Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0 126341825Sdim Name.startswith("avx.sqrt.p") || // Added in 7.0 127341825Sdim Name.startswith("sse2.sqrt.p") || // Added in 7.0 128341825Sdim Name.startswith("sse.sqrt.p") || // Added in 7.0 129327952Sdim Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0 130327952Sdim Name.startswith("sse2.pcmpeq.") || // Added in 3.1 131321369Sdim Name.startswith("sse2.pcmpgt.") || // Added in 3.1 132321369Sdim Name.startswith("avx2.pcmpeq.") || // Added in 3.1 133321369Sdim Name.startswith("avx2.pcmpgt.") || // Added in 3.1 134321369Sdim Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9 135321369Sdim Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9 136327952Sdim Name.startswith("avx.vperm2f128.") || // Added in 6.0 137327952Sdim Name == "avx2.vperm2i128" || // Added in 6.0 138321369Sdim Name == "sse.add.ss" || // Added in 4.0 139321369Sdim Name == "sse2.add.sd" || // Added in 4.0 140321369Sdim Name == "sse.sub.ss" || // Added in 4.0 141321369Sdim Name == "sse2.sub.sd" || // Added in 4.0 142321369Sdim Name == "sse.mul.ss" || // Added in 4.0 143321369Sdim Name == "sse2.mul.sd" || // Added in 4.0 144321369Sdim Name == "sse.div.ss" || // Added in 4.0 145321369Sdim Name == "sse2.div.sd" || // Added in 4.0 146321369Sdim Name == "sse41.pmaxsb" || // Added in 3.9 147321369Sdim Name == "sse2.pmaxs.w" || // Added in 3.9 148321369Sdim Name == "sse41.pmaxsd" || // Added in 3.9 149321369Sdim Name == "sse2.pmaxu.b" || // Added in 3.9 150321369Sdim Name == "sse41.pmaxuw" || // Added in 3.9 151321369Sdim Name == "sse41.pmaxud" || // Added in 3.9 152321369Sdim Name == "sse41.pminsb" || // Added in 3.9 153321369Sdim Name == "sse2.pmins.w" || // Added in 3.9 154321369Sdim Name == "sse41.pminsd" || // Added in 3.9 155321369Sdim Name == "sse2.pminu.b" || // Added in 3.9 156321369Sdim Name == "sse41.pminuw" || // Added in 3.9 157321369Sdim Name == "sse41.pminud" || // Added in 3.9 158341825Sdim Name == "avx512.kand.w" || // Added in 7.0 159341825Sdim Name == "avx512.kandn.w" || // Added in 7.0 160341825Sdim Name == "avx512.knot.w" || // Added in 7.0 161341825Sdim Name == "avx512.kor.w" || // Added in 7.0 162341825Sdim Name == "avx512.kxor.w" || // Added in 7.0 163341825Sdim Name == "avx512.kxnor.w" || // Added in 7.0 164341825Sdim Name == "avx512.kortestc.w" || // Added in 7.0 165341825Sdim Name == "avx512.kortestz.w" || // Added in 7.0 166321369Sdim Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0 167321369Sdim Name.startswith("avx2.pmax") || // Added in 3.9 168321369Sdim Name.startswith("avx2.pmin") || // Added in 3.9 169321369Sdim Name.startswith("avx512.mask.pmax") || // Added in 4.0 170321369Sdim Name.startswith("avx512.mask.pmin") || // Added in 4.0 171321369Sdim Name.startswith("avx2.vbroadcast") || // Added in 3.8 172321369Sdim Name.startswith("avx2.pbroadcast") || // Added in 3.8 173321369Sdim Name.startswith("avx.vpermil.") || // Added in 3.1 174321369Sdim Name.startswith("sse2.pshuf") || // Added in 3.9 175321369Sdim Name.startswith("avx512.pbroadcast") || // Added in 3.9 176321369Sdim Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9 177321369Sdim Name.startswith("avx512.mask.movddup") || // Added in 3.9 178321369Sdim Name.startswith("avx512.mask.movshdup") || // Added in 3.9 179321369Sdim Name.startswith("avx512.mask.movsldup") || // Added in 3.9 180321369Sdim Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9 181321369Sdim Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9 182321369Sdim Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9 183321369Sdim Name.startswith("avx512.mask.shuf.p") || // Added in 4.0 184321369Sdim Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9 185321369Sdim Name.startswith("avx512.mask.perm.df.") || // Added in 3.9 186321369Sdim Name.startswith("avx512.mask.perm.di.") || // Added in 3.9 187321369Sdim Name.startswith("avx512.mask.punpckl") || // Added in 3.9 188321369Sdim Name.startswith("avx512.mask.punpckh") || // Added in 3.9 189321369Sdim Name.startswith("avx512.mask.unpckl.") || // Added in 3.9 190321369Sdim Name.startswith("avx512.mask.unpckh.") || // Added in 3.9 191321369Sdim Name.startswith("avx512.mask.pand.") || // Added in 3.9 192321369Sdim Name.startswith("avx512.mask.pandn.") || // Added in 3.9 193321369Sdim Name.startswith("avx512.mask.por.") || // Added in 3.9 194321369Sdim Name.startswith("avx512.mask.pxor.") || // Added in 3.9 195321369Sdim Name.startswith("avx512.mask.and.") || // Added in 3.9 196321369Sdim Name.startswith("avx512.mask.andn.") || // Added in 3.9 197321369Sdim Name.startswith("avx512.mask.or.") || // Added in 3.9 198321369Sdim Name.startswith("avx512.mask.xor.") || // Added in 3.9 199321369Sdim Name.startswith("avx512.mask.padd.") || // Added in 4.0 200321369Sdim Name.startswith("avx512.mask.psub.") || // Added in 4.0 201321369Sdim Name.startswith("avx512.mask.pmull.") || // Added in 4.0 202321369Sdim Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0 203321369Sdim Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0 204353358Sdim Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0 205353358Sdim Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0 206353358Sdim Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0 207353358Sdim Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0 208353358Sdim Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0 209353358Sdim Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0 210353358Sdim Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0 211353358Sdim Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0 212341825Sdim Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0 213341825Sdim Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0 214341825Sdim Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0 215341825Sdim Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0 216341825Sdim Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0 217341825Sdim Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0 218341825Sdim Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0 219341825Sdim Name == "avx512.cvtusi2sd" || // Added in 7.0 220341825Sdim Name.startswith("avx512.mask.permvar.") || // Added in 7.0 221341825Sdim Name == "sse2.pmulu.dq" || // Added in 7.0 222341825Sdim Name == "sse41.pmuldq" || // Added in 7.0 223341825Sdim Name == "avx2.pmulu.dq" || // Added in 7.0 224341825Sdim Name == "avx2.pmul.dq" || // Added in 7.0 225341825Sdim Name == "avx512.pmulu.dq.512" || // Added in 7.0 226341825Sdim Name == "avx512.pmul.dq.512" || // Added in 7.0 227321369Sdim Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0 228321369Sdim Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0 229341825Sdim Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0 230341825Sdim Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0 231341825Sdim Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0 232341825Sdim Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0 233341825Sdim Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0 234321369Sdim Name.startswith("avx512.mask.packsswb.") || // Added in 5.0 235321369Sdim Name.startswith("avx512.mask.packssdw.") || // Added in 5.0 236321369Sdim Name.startswith("avx512.mask.packuswb.") || // Added in 5.0 237321369Sdim Name.startswith("avx512.mask.packusdw.") || // Added in 5.0 238321369Sdim Name.startswith("avx512.mask.cmp.b") || // Added in 5.0 239321369Sdim Name.startswith("avx512.mask.cmp.d") || // Added in 5.0 240321369Sdim Name.startswith("avx512.mask.cmp.q") || // Added in 5.0 241321369Sdim Name.startswith("avx512.mask.cmp.w") || // Added in 5.0 242341825Sdim Name.startswith("avx512.mask.cmp.p") || // Added in 7.0 243321369Sdim Name.startswith("avx512.mask.ucmp.") || // Added in 5.0 244341825Sdim Name.startswith("avx512.cvtb2mask.") || // Added in 7.0 245341825Sdim Name.startswith("avx512.cvtw2mask.") || // Added in 7.0 246341825Sdim Name.startswith("avx512.cvtd2mask.") || // Added in 7.0 247341825Sdim Name.startswith("avx512.cvtq2mask.") || // Added in 7.0 248321369Sdim Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0 249321369Sdim Name.startswith("avx512.mask.psll.d") || // Added in 4.0 250321369Sdim Name.startswith("avx512.mask.psll.q") || // Added in 4.0 251321369Sdim Name.startswith("avx512.mask.psll.w") || // Added in 4.0 252321369Sdim Name.startswith("avx512.mask.psra.d") || // Added in 4.0 253321369Sdim Name.startswith("avx512.mask.psra.q") || // Added in 4.0 254321369Sdim Name.startswith("avx512.mask.psra.w") || // Added in 4.0 255321369Sdim Name.startswith("avx512.mask.psrl.d") || // Added in 4.0 256321369Sdim Name.startswith("avx512.mask.psrl.q") || // Added in 4.0 257321369Sdim Name.startswith("avx512.mask.psrl.w") || // Added in 4.0 258321369Sdim Name.startswith("avx512.mask.pslli") || // Added in 4.0 259321369Sdim Name.startswith("avx512.mask.psrai") || // Added in 4.0 260321369Sdim Name.startswith("avx512.mask.psrli") || // Added in 4.0 261321369Sdim Name.startswith("avx512.mask.psllv") || // Added in 4.0 262321369Sdim Name.startswith("avx512.mask.psrav") || // Added in 4.0 263321369Sdim Name.startswith("avx512.mask.psrlv") || // Added in 4.0 264321369Sdim Name.startswith("sse41.pmovsx") || // Added in 3.8 265321369Sdim Name.startswith("sse41.pmovzx") || // Added in 3.9 266321369Sdim Name.startswith("avx2.pmovsx") || // Added in 3.9 267321369Sdim Name.startswith("avx2.pmovzx") || // Added in 3.9 268321369Sdim Name.startswith("avx512.mask.pmovsx") || // Added in 4.0 269321369Sdim Name.startswith("avx512.mask.pmovzx") || // Added in 4.0 270321369Sdim Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0 271341825Sdim Name.startswith("avx512.mask.pternlog.") || // Added in 7.0 272341825Sdim Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0 273341825Sdim Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0 274341825Sdim Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0 275341825Sdim Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0 276341825Sdim Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0 277341825Sdim Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0 278341825Sdim Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0 279341825Sdim Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0 280341825Sdim Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0 281341825Sdim Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0 282341825Sdim Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0 283341825Sdim Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0 284341825Sdim Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0 285341825Sdim Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0 286341825Sdim Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0 287341825Sdim Name.startswith("avx512.mask.vpshld.") || // Added in 7.0 288341825Sdim Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0 289344779Sdim Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0 290344779Sdim Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0 291344779Sdim Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0 292344779Sdim Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0 293344779Sdim Name.startswith("avx512.vpshld.") || // Added in 8.0 294344779Sdim Name.startswith("avx512.vpshrd.") || // Added in 8.0 295341825Sdim Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0 296341825Sdim Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0 297341825Sdim Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0 298341825Sdim Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0 299341825Sdim Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0 300341825Sdim Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0 301341825Sdim Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0 302344779Sdim Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0 303344779Sdim Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0 304353358Sdim Name.startswith("avx512.mask.conflict.") || // Added in 9.0 305353358Sdim Name == "avx512.mask.pmov.qd.256" || // Added in 9.0 306353358Sdim Name == "avx512.mask.pmov.qd.512" || // Added in 9.0 307353358Sdim Name == "avx512.mask.pmov.wb.256" || // Added in 9.0 308353358Sdim Name == "avx512.mask.pmov.wb.512" || // Added in 9.0 309341825Sdim Name == "sse.cvtsi2ss" || // Added in 7.0 310341825Sdim Name == "sse.cvtsi642ss" || // Added in 7.0 311341825Sdim Name == "sse2.cvtsi2sd" || // Added in 7.0 312341825Sdim Name == "sse2.cvtsi642sd" || // Added in 7.0 313341825Sdim Name == "sse2.cvtss2sd" || // Added in 7.0 314321369Sdim Name == "sse2.cvtdq2pd" || // Added in 3.9 315341825Sdim Name == "sse2.cvtdq2ps" || // Added in 7.0 316321369Sdim Name == "sse2.cvtps2pd" || // Added in 3.9 317321369Sdim Name == "avx.cvtdq2.pd.256" || // Added in 3.9 318341825Sdim Name == "avx.cvtdq2.ps.256" || // Added in 7.0 319321369Sdim Name == "avx.cvt.ps2.pd.256" || // Added in 3.9 320321369Sdim Name.startswith("avx.vinsertf128.") || // Added in 3.7 321321369Sdim Name == "avx2.vinserti128" || // Added in 3.7 322321369Sdim Name.startswith("avx512.mask.insert") || // Added in 4.0 323321369Sdim Name.startswith("avx.vextractf128.") || // Added in 3.7 324321369Sdim Name == "avx2.vextracti128" || // Added in 3.7 325321369Sdim Name.startswith("avx512.mask.vextract") || // Added in 4.0 326321369Sdim Name.startswith("sse4a.movnt.") || // Added in 3.9 327321369Sdim Name.startswith("avx.movnt.") || // Added in 3.2 328321369Sdim Name.startswith("avx512.storent.") || // Added in 3.9 329321369Sdim Name == "sse41.movntdqa" || // Added in 5.0 330321369Sdim Name == "avx2.movntdqa" || // Added in 5.0 331321369Sdim Name == "avx512.movntdqa" || // Added in 5.0 332321369Sdim Name == "sse2.storel.dq" || // Added in 3.9 333321369Sdim Name.startswith("sse.storeu.") || // Added in 3.9 334321369Sdim Name.startswith("sse2.storeu.") || // Added in 3.9 335321369Sdim Name.startswith("avx.storeu.") || // Added in 3.9 336321369Sdim Name.startswith("avx512.mask.storeu.") || // Added in 3.9 337321369Sdim Name.startswith("avx512.mask.store.p") || // Added in 3.9 338321369Sdim Name.startswith("avx512.mask.store.b.") || // Added in 3.9 339321369Sdim Name.startswith("avx512.mask.store.w.") || // Added in 3.9 340321369Sdim Name.startswith("avx512.mask.store.d.") || // Added in 3.9 341321369Sdim Name.startswith("avx512.mask.store.q.") || // Added in 3.9 342341825Sdim Name == "avx512.mask.store.ss" || // Added in 7.0 343321369Sdim Name.startswith("avx512.mask.loadu.") || // Added in 3.9 344321369Sdim Name.startswith("avx512.mask.load.") || // Added in 3.9 345341825Sdim Name.startswith("avx512.mask.expand.load.") || // Added in 7.0 346341825Sdim Name.startswith("avx512.mask.compress.store.") || // Added in 7.0 347353358Sdim Name.startswith("avx512.mask.expand.b") || // Added in 9.0 348353358Sdim Name.startswith("avx512.mask.expand.w") || // Added in 9.0 349353358Sdim Name.startswith("avx512.mask.expand.d") || // Added in 9.0 350353358Sdim Name.startswith("avx512.mask.expand.q") || // Added in 9.0 351353358Sdim Name.startswith("avx512.mask.expand.p") || // Added in 9.0 352353358Sdim Name.startswith("avx512.mask.compress.b") || // Added in 9.0 353353358Sdim Name.startswith("avx512.mask.compress.w") || // Added in 9.0 354353358Sdim Name.startswith("avx512.mask.compress.d") || // Added in 9.0 355353358Sdim Name.startswith("avx512.mask.compress.q") || // Added in 9.0 356353358Sdim Name.startswith("avx512.mask.compress.p") || // Added in 9.0 357321369Sdim Name == "sse42.crc32.64.8" || // Added in 3.4 358321369Sdim Name.startswith("avx.vbroadcast.s") || // Added in 3.5 359341825Sdim Name.startswith("avx512.vbroadcast.s") || // Added in 7.0 360321369Sdim Name.startswith("avx512.mask.palignr.") || // Added in 3.9 361321369Sdim Name.startswith("avx512.mask.valign.") || // Added in 4.0 362321369Sdim Name.startswith("sse2.psll.dq") || // Added in 3.7 363321369Sdim Name.startswith("sse2.psrl.dq") || // Added in 3.7 364321369Sdim Name.startswith("avx2.psll.dq") || // Added in 3.7 365321369Sdim Name.startswith("avx2.psrl.dq") || // Added in 3.7 366321369Sdim Name.startswith("avx512.psll.dq") || // Added in 3.9 367321369Sdim Name.startswith("avx512.psrl.dq") || // Added in 3.9 368321369Sdim Name == "sse41.pblendw" || // Added in 3.7 369321369Sdim Name.startswith("sse41.blendp") || // Added in 3.7 370321369Sdim Name.startswith("avx.blend.p") || // Added in 3.7 371321369Sdim Name == "avx2.pblendw" || // Added in 3.7 372321369Sdim Name.startswith("avx2.pblendd.") || // Added in 3.7 373321369Sdim Name.startswith("avx.vbroadcastf128") || // Added in 4.0 374321369Sdim Name == "avx2.vbroadcasti128" || // Added in 3.7 375327952Sdim Name.startswith("avx512.mask.broadcastf") || // Added in 6.0 376327952Sdim Name.startswith("avx512.mask.broadcasti") || // Added in 6.0 377321369Sdim Name == "xop.vpcmov" || // Added in 3.8 378321369Sdim Name == "xop.vpcmov.256" || // Added in 5.0 379321369Sdim Name.startswith("avx512.mask.move.s") || // Added in 4.0 380321369Sdim Name.startswith("avx512.cvtmask2") || // Added in 5.0 381353358Sdim Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0 382344779Sdim Name.startswith("xop.vprot") || // Added in 8.0 383344779Sdim Name.startswith("avx512.prol") || // Added in 8.0 384344779Sdim Name.startswith("avx512.pror") || // Added in 8.0 385344779Sdim Name.startswith("avx512.mask.prorv.") || // Added in 8.0 386344779Sdim Name.startswith("avx512.mask.pror.") || // Added in 8.0 387344779Sdim Name.startswith("avx512.mask.prolv.") || // Added in 8.0 388344779Sdim Name.startswith("avx512.mask.prol.") || // Added in 8.0 389327952Sdim Name.startswith("avx512.ptestm") || //Added in 6.0 390327952Sdim Name.startswith("avx512.ptestnm") || //Added in 6.0 391327952Sdim Name.startswith("avx512.mask.pavg")) // Added in 6.0 392321369Sdim return true; 393321369Sdim 394321369Sdim return false; 395321369Sdim} 396321369Sdim 397321369Sdimstatic bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name, 398321369Sdim Function *&NewFn) { 399321369Sdim // Only handle intrinsics that start with "x86.". 400321369Sdim if (!Name.startswith("x86.")) 401321369Sdim return false; 402321369Sdim // Remove "x86." prefix. 403321369Sdim Name = Name.substr(4); 404321369Sdim 405321369Sdim if (ShouldUpgradeX86Intrinsic(F, Name)) { 406321369Sdim NewFn = nullptr; 407321369Sdim return true; 408321369Sdim } 409321369Sdim 410344779Sdim if (Name == "rdtscp") { // Added in 8.0 411344779Sdim // If this intrinsic has 0 operands, it's the new version. 412344779Sdim if (F->getFunctionType()->getNumParams() == 0) 413344779Sdim return false; 414344779Sdim 415344779Sdim rename(F); 416344779Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), 417344779Sdim Intrinsic::x86_rdtscp); 418344779Sdim return true; 419344779Sdim } 420344779Sdim 421321369Sdim // SSE4.1 ptest functions may have an old signature. 422321369Sdim if (Name.startswith("sse41.ptest")) { // Added in 3.2 423321369Sdim if (Name.substr(11) == "c") 424321369Sdim return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn); 425321369Sdim if (Name.substr(11) == "z") 426321369Sdim return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn); 427321369Sdim if (Name.substr(11) == "nzc") 428321369Sdim return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn); 429321369Sdim } 430321369Sdim // Several blend and other instructions with masks used the wrong number of 431321369Sdim // bits. 432321369Sdim if (Name == "sse41.insertps") // Added in 3.6 433321369Sdim return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, 434321369Sdim NewFn); 435321369Sdim if (Name == "sse41.dppd") // Added in 3.6 436321369Sdim return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd, 437321369Sdim NewFn); 438321369Sdim if (Name == "sse41.dpps") // Added in 3.6 439321369Sdim return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps, 440321369Sdim NewFn); 441321369Sdim if (Name == "sse41.mpsadbw") // Added in 3.6 442321369Sdim return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, 443321369Sdim NewFn); 444321369Sdim if (Name == "avx.dp.ps.256") // Added in 3.6 445321369Sdim return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, 446321369Sdim NewFn); 447321369Sdim if (Name == "avx2.mpsadbw") // Added in 3.6 448321369Sdim return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, 449321369Sdim NewFn); 450321369Sdim 451321369Sdim // frcz.ss/sd may need to have an argument dropped. Added in 3.2 452321369Sdim if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) { 453321369Sdim rename(F); 454321369Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), 455321369Sdim Intrinsic::x86_xop_vfrcz_ss); 456321369Sdim return true; 457321369Sdim } 458321369Sdim if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) { 459321369Sdim rename(F); 460321369Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), 461321369Sdim Intrinsic::x86_xop_vfrcz_sd); 462321369Sdim return true; 463321369Sdim } 464321369Sdim // Upgrade any XOP PERMIL2 index operand still using a float/double vector. 465321369Sdim if (Name.startswith("xop.vpermil2")) { // Added in 3.9 466321369Sdim auto Idx = F->getFunctionType()->getParamType(2); 467321369Sdim if (Idx->isFPOrFPVectorTy()) { 468321369Sdim rename(F); 469321369Sdim unsigned IdxSize = Idx->getPrimitiveSizeInBits(); 470321369Sdim unsigned EltSize = Idx->getScalarSizeInBits(); 471321369Sdim Intrinsic::ID Permil2ID; 472321369Sdim if (EltSize == 64 && IdxSize == 128) 473321369Sdim Permil2ID = Intrinsic::x86_xop_vpermil2pd; 474321369Sdim else if (EltSize == 32 && IdxSize == 128) 475321369Sdim Permil2ID = Intrinsic::x86_xop_vpermil2ps; 476321369Sdim else if (EltSize == 64 && IdxSize == 256) 477321369Sdim Permil2ID = Intrinsic::x86_xop_vpermil2pd_256; 478321369Sdim else 479321369Sdim Permil2ID = Intrinsic::x86_xop_vpermil2ps_256; 480321369Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID); 481321369Sdim return true; 482321369Sdim } 483321369Sdim } 484321369Sdim 485344779Sdim if (Name == "seh.recoverfp") { 486344779Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp); 487344779Sdim return true; 488344779Sdim } 489344779Sdim 490321369Sdim return false; 491321369Sdim} 492321369Sdim 493249259Sdimstatic bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { 494249259Sdim assert(F && "Illegal to upgrade a non-existent Function."); 495249259Sdim 496249259Sdim // Quickly eliminate it, if it's not a candidate. 497249259Sdim StringRef Name = F->getName(); 498249259Sdim if (Name.size() <= 8 || !Name.startswith("llvm.")) 499249259Sdim return false; 500249259Sdim Name = Name.substr(5); // Strip off "llvm." 501249259Sdim 502249259Sdim switch (Name[0]) { 503249259Sdim default: break; 504249259Sdim case 'a': { 505314564Sdim if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) { 506314564Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse, 507314564Sdim F->arg_begin()->getType()); 508314564Sdim return true; 509314564Sdim } 510249259Sdim if (Name.startswith("arm.neon.vclz")) { 511249259Sdim Type* args[2] = { 512261991Sdim F->arg_begin()->getType(), 513249259Sdim Type::getInt1Ty(F->getContext()) 514249259Sdim }; 515249259Sdim // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to 516249259Sdim // the end of the name. Change name from llvm.arm.neon.vclz.* to 517249259Sdim // llvm.ctlz.* 518249259Sdim FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); 519344779Sdim NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(), 520249259Sdim "llvm.ctlz." + Name.substr(14), F->getParent()); 521249259Sdim return true; 522249259Sdim } 523249259Sdim if (Name.startswith("arm.neon.vcnt")) { 524249259Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 525249259Sdim F->arg_begin()->getType()); 526249259Sdim return true; 527249259Sdim } 528360784Sdim static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); 529296417Sdim if (vldRegex.match(Name)) { 530296417Sdim auto fArgs = F->getFunctionType()->params(); 531296417Sdim SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); 532296417Sdim // Can't use Intrinsic::getDeclaration here as the return types might 533296417Sdim // then only be structurally equal. 534296417Sdim FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); 535344779Sdim NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(), 536296417Sdim "llvm." + Name + ".p0i8", F->getParent()); 537296417Sdim return true; 538296417Sdim } 539360784Sdim static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); 540296417Sdim if (vstRegex.match(Name)) { 541296417Sdim static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, 542296417Sdim Intrinsic::arm_neon_vst2, 543296417Sdim Intrinsic::arm_neon_vst3, 544296417Sdim Intrinsic::arm_neon_vst4}; 545296417Sdim 546296417Sdim static const Intrinsic::ID StoreLaneInts[] = { 547296417Sdim Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, 548296417Sdim Intrinsic::arm_neon_vst4lane 549296417Sdim }; 550296417Sdim 551296417Sdim auto fArgs = F->getFunctionType()->params(); 552296417Sdim Type *Tys[] = {fArgs[0], fArgs[1]}; 553296417Sdim if (Name.find("lane") == StringRef::npos) 554296417Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), 555296417Sdim StoreInts[fArgs.size() - 3], Tys); 556296417Sdim else 557296417Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), 558296417Sdim StoreLaneInts[fArgs.size() - 5], Tys); 559296417Sdim return true; 560296417Sdim } 561309124Sdim if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") { 562309124Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); 563309124Sdim return true; 564309124Sdim } 565360784Sdim if (Name.startswith("arm.neon.vqadds.")) { 566360784Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat, 567360784Sdim F->arg_begin()->getType()); 568360784Sdim return true; 569360784Sdim } 570360784Sdim if (Name.startswith("arm.neon.vqaddu.")) { 571360784Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat, 572360784Sdim F->arg_begin()->getType()); 573360784Sdim return true; 574360784Sdim } 575360784Sdim if (Name.startswith("arm.neon.vqsubs.")) { 576360784Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat, 577360784Sdim F->arg_begin()->getType()); 578360784Sdim return true; 579360784Sdim } 580360784Sdim if (Name.startswith("arm.neon.vqsubu.")) { 581360784Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat, 582360784Sdim F->arg_begin()->getType()); 583360784Sdim return true; 584360784Sdim } 585353358Sdim if (Name.startswith("aarch64.neon.addp")) { 586353358Sdim if (F->arg_size() != 2) 587353358Sdim break; // Invalid IR. 588360784Sdim VectorType *Ty = dyn_cast<VectorType>(F->getReturnType()); 589360784Sdim if (Ty && Ty->getElementType()->isFloatingPointTy()) { 590353358Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), 591360784Sdim Intrinsic::aarch64_neon_faddp, Ty); 592353358Sdim return true; 593353358Sdim } 594353358Sdim } 595249259Sdim break; 596249259Sdim } 597296417Sdim 598249259Sdim case 'c': { 599249259Sdim if (Name.startswith("ctlz.") && F->arg_size() == 1) { 600314564Sdim rename(F); 601249259Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 602249259Sdim F->arg_begin()->getType()); 603249259Sdim return true; 604249259Sdim } 605249259Sdim if (Name.startswith("cttz.") && F->arg_size() == 1) { 606314564Sdim rename(F); 607249259Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, 608249259Sdim F->arg_begin()->getType()); 609249259Sdim return true; 610249259Sdim } 611249259Sdim break; 612249259Sdim } 613327952Sdim case 'd': { 614327952Sdim if (Name == "dbg.value" && F->arg_size() == 4) { 615327952Sdim rename(F); 616327952Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value); 617327952Sdim return true; 618327952Sdim } 619327952Sdim break; 620327952Sdim } 621353358Sdim case 'e': { 622353358Sdim SmallVector<StringRef, 2> Groups; 623360784Sdim static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+"); 624353358Sdim if (R.match(Name, &Groups)) { 625353358Sdim Intrinsic::ID ID = Intrinsic::not_intrinsic; 626353358Sdim if (Groups[1] == "fadd") 627353358Sdim ID = Intrinsic::experimental_vector_reduce_v2_fadd; 628353358Sdim if (Groups[1] == "fmul") 629353358Sdim ID = Intrinsic::experimental_vector_reduce_v2_fmul; 630353358Sdim 631353358Sdim if (ID != Intrinsic::not_intrinsic) { 632353358Sdim rename(F); 633353358Sdim auto Args = F->getFunctionType()->params(); 634353358Sdim Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]}; 635353358Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys); 636353358Sdim return true; 637353358Sdim } 638353358Sdim } 639353358Sdim break; 640353358Sdim } 641321369Sdim case 'i': 642321369Sdim case 'l': { 643321369Sdim bool IsLifetimeStart = Name.startswith("lifetime.start"); 644321369Sdim if (IsLifetimeStart || Name.startswith("invariant.start")) { 645321369Sdim Intrinsic::ID ID = IsLifetimeStart ? 646321369Sdim Intrinsic::lifetime_start : Intrinsic::invariant_start; 647314564Sdim auto Args = F->getFunctionType()->params(); 648314564Sdim Type* ObjectPtr[1] = {Args[1]}; 649321369Sdim if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) { 650314564Sdim rename(F); 651321369Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr); 652314564Sdim return true; 653314564Sdim } 654314564Sdim } 655321369Sdim 656321369Sdim bool IsLifetimeEnd = Name.startswith("lifetime.end"); 657321369Sdim if (IsLifetimeEnd || Name.startswith("invariant.end")) { 658321369Sdim Intrinsic::ID ID = IsLifetimeEnd ? 659321369Sdim Intrinsic::lifetime_end : Intrinsic::invariant_end; 660321369Sdim 661314564Sdim auto Args = F->getFunctionType()->params(); 662321369Sdim Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]}; 663321369Sdim if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) { 664314564Sdim rename(F); 665321369Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr); 666314564Sdim return true; 667314564Sdim } 668314564Sdim } 669341825Sdim if (Name.startswith("invariant.group.barrier")) { 670341825Sdim // Rename invariant.group.barrier to launder.invariant.group 671341825Sdim auto Args = F->getFunctionType()->params(); 672341825Sdim Type* ObjectPtr[1] = {Args[0]}; 673341825Sdim rename(F); 674341825Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), 675341825Sdim Intrinsic::launder_invariant_group, ObjectPtr); 676341825Sdim return true; 677341825Sdim 678341825Sdim } 679341825Sdim 680314564Sdim break; 681314564Sdim } 682309124Sdim case 'm': { 683309124Sdim if (Name.startswith("masked.load.")) { 684309124Sdim Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() }; 685309124Sdim if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) { 686314564Sdim rename(F); 687309124Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), 688309124Sdim Intrinsic::masked_load, 689309124Sdim Tys); 690309124Sdim return true; 691309124Sdim } 692309124Sdim } 693309124Sdim if (Name.startswith("masked.store.")) { 694309124Sdim auto Args = F->getFunctionType()->params(); 695309124Sdim Type *Tys[] = { Args[0], Args[1] }; 696309124Sdim if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) { 697314564Sdim rename(F); 698309124Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), 699309124Sdim Intrinsic::masked_store, 700309124Sdim Tys); 701309124Sdim return true; 702309124Sdim } 703309124Sdim } 704321369Sdim // Renaming gather/scatter intrinsics with no address space overloading 705321369Sdim // to the new overload which includes an address space 706321369Sdim if (Name.startswith("masked.gather.")) { 707321369Sdim Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; 708321369Sdim if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) { 709321369Sdim rename(F); 710321369Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), 711321369Sdim Intrinsic::masked_gather, Tys); 712321369Sdim return true; 713321369Sdim } 714321369Sdim } 715321369Sdim if (Name.startswith("masked.scatter.")) { 716321369Sdim auto Args = F->getFunctionType()->params(); 717321369Sdim Type *Tys[] = {Args[0], Args[1]}; 718321369Sdim if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) { 719321369Sdim rename(F); 720321369Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), 721321369Sdim Intrinsic::masked_scatter, Tys); 722321369Sdim return true; 723321369Sdim } 724321369Sdim } 725341825Sdim // Updating the memory intrinsics (memcpy/memmove/memset) that have an 726341825Sdim // alignment parameter to embedding the alignment as an attribute of 727341825Sdim // the pointer args. 728341825Sdim if (Name.startswith("memcpy.") && F->arg_size() == 5) { 729341825Sdim rename(F); 730341825Sdim // Get the types of dest, src, and len 731341825Sdim ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3); 732341825Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy, 733341825Sdim ParamTypes); 734341825Sdim return true; 735341825Sdim } 736341825Sdim if (Name.startswith("memmove.") && F->arg_size() == 5) { 737341825Sdim rename(F); 738341825Sdim // Get the types of dest, src, and len 739341825Sdim ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3); 740341825Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove, 741341825Sdim ParamTypes); 742341825Sdim return true; 743341825Sdim } 744341825Sdim if (Name.startswith("memset.") && F->arg_size() == 5) { 745341825Sdim rename(F); 746341825Sdim // Get the types of dest, and len 747341825Sdim const auto *FT = F->getFunctionType(); 748341825Sdim Type *ParamTypes[2] = { 749341825Sdim FT->getParamType(0), // Dest 750341825Sdim FT->getParamType(2) // len 751341825Sdim }; 752341825Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset, 753341825Sdim ParamTypes); 754341825Sdim return true; 755341825Sdim } 756309124Sdim break; 757309124Sdim } 758321369Sdim case 'n': { 759321369Sdim if (Name.startswith("nvvm.")) { 760321369Sdim Name = Name.substr(5); 761309124Sdim 762321369Sdim // The following nvvm intrinsics correspond exactly to an LLVM intrinsic. 763321369Sdim Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name) 764321369Sdim .Cases("brev32", "brev64", Intrinsic::bitreverse) 765321369Sdim .Case("clz.i", Intrinsic::ctlz) 766321369Sdim .Case("popc.i", Intrinsic::ctpop) 767321369Sdim .Default(Intrinsic::not_intrinsic); 768321369Sdim if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) { 769321369Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), IID, 770321369Sdim {F->getReturnType()}); 771321369Sdim return true; 772321369Sdim } 773321369Sdim 774321369Sdim // The following nvvm intrinsics correspond exactly to an LLVM idiom, but 775321369Sdim // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall. 776321369Sdim // 777321369Sdim // TODO: We could add lohi.i2d. 778321369Sdim bool Expand = StringSwitch<bool>(Name) 779321369Sdim .Cases("abs.i", "abs.ll", true) 780321369Sdim .Cases("clz.ll", "popc.ll", "h2f", true) 781321369Sdim .Cases("max.i", "max.ll", "max.ui", "max.ull", true) 782321369Sdim .Cases("min.i", "min.ll", "min.ui", "min.ull", true) 783353358Sdim .StartsWith("atomic.load.add.f32.p", true) 784353358Sdim .StartsWith("atomic.load.add.f64.p", true) 785321369Sdim .Default(false); 786321369Sdim if (Expand) { 787321369Sdim NewFn = nullptr; 788321369Sdim return true; 789321369Sdim } 790321369Sdim } 791321369Sdim break; 792321369Sdim } 793261991Sdim case 'o': 794261991Sdim // We only need to change the name to match the mangling including the 795261991Sdim // address space. 796321369Sdim if (Name.startswith("objectsize.")) { 797261991Sdim Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; 798353358Sdim if (F->arg_size() == 2 || F->arg_size() == 3 || 799321369Sdim F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { 800314564Sdim rename(F); 801321369Sdim NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize, 802321369Sdim Tys); 803261991Sdim return true; 804261991Sdim } 805261991Sdim } 806261991Sdim break; 807261991Sdim 808360784Sdim case 'p': 809360784Sdim if (Name == "prefetch") { 810360784Sdim // Handle address space overloading. 811360784Sdim Type *Tys[] = {F->arg_begin()->getType()}; 812360784Sdim if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) { 813360784Sdim rename(F); 814360784Sdim NewFn = 815360784Sdim Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys); 816360784Sdim return true; 817360784Sdim } 818360784Sdim } 819360784Sdim break; 820360784Sdim 821309124Sdim case 's': 822309124Sdim if (Name == "stackprotectorcheck") { 823309124Sdim NewFn = nullptr; 824309124Sdim return true; 825309124Sdim } 826314564Sdim break; 827309124Sdim 828321369Sdim case 'x': 829321369Sdim if (UpgradeX86IntrinsicFunction(F, Name, NewFn)) 830249259Sdim return true; 831249259Sdim } 832321369Sdim // Remangle our intrinsic since we upgrade the mangling 833321369Sdim auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F); 834321369Sdim if (Result != None) { 835321369Sdim NewFn = Result.getValue(); 836321369Sdim return true; 837249259Sdim } 838249259Sdim 839249259Sdim // This may not belong here. This function is effectively being overloaded 840249259Sdim // to both detect an intrinsic which needs upgrading, and to provide the 841249259Sdim // upgraded form of the intrinsic. We should perhaps have two separate 842249259Sdim // functions for this. 843249259Sdim return false; 844249259Sdim} 845249259Sdim 846249259Sdimbool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { 847276479Sdim NewFn = nullptr; 848249259Sdim bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); 849288943Sdim assert(F != NewFn && "Intrinsic function upgraded to the same function"); 850249259Sdim 851249259Sdim // Upgrade intrinsic attributes. This does not change the function. 852249259Sdim if (NewFn) 853249259Sdim F = NewFn; 854288943Sdim if (Intrinsic::ID id = F->getIntrinsicID()) 855288943Sdim F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); 856249259Sdim return Upgraded; 857249259Sdim} 858249259Sdim 859353358SdimGlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) { 860353358Sdim if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" || 861353358Sdim GV->getName() == "llvm.global_dtors")) || 862353358Sdim !GV->hasInitializer()) 863353358Sdim return nullptr; 864353358Sdim ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType()); 865353358Sdim if (!ATy) 866353358Sdim return nullptr; 867353358Sdim StructType *STy = dyn_cast<StructType>(ATy->getElementType()); 868353358Sdim if (!STy || STy->getNumElements() != 2) 869353358Sdim return nullptr; 870353358Sdim 871353358Sdim LLVMContext &C = GV->getContext(); 872353358Sdim IRBuilder<> IRB(C); 873353358Sdim auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1), 874353358Sdim IRB.getInt8PtrTy()); 875353358Sdim Constant *Init = GV->getInitializer(); 876353358Sdim unsigned N = Init->getNumOperands(); 877353358Sdim std::vector<Constant *> NewCtors(N); 878353358Sdim for (unsigned i = 0; i != N; ++i) { 879353358Sdim auto Ctor = cast<Constant>(Init->getOperand(i)); 880353358Sdim NewCtors[i] = ConstantStruct::get( 881353358Sdim EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1), 882353358Sdim Constant::getNullValue(IRB.getInt8PtrTy())); 883353358Sdim } 884353358Sdim Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors); 885353358Sdim 886353358Sdim return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(), 887353358Sdim NewInit, GV->getName()); 888280031Sdim} 889276479Sdim 890309124Sdim// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them 891288943Sdim// to byte shuffles. 892309124Sdimstatic Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, 893309124Sdim Value *Op, unsigned Shift) { 894309124Sdim Type *ResultTy = Op->getType(); 895309124Sdim unsigned NumElts = ResultTy->getVectorNumElements() * 8; 896288943Sdim 897288943Sdim // Bitcast from a 64-bit element type to a byte element type. 898309124Sdim Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts); 899309124Sdim Op = Builder.CreateBitCast(Op, VecTy, "cast"); 900309124Sdim 901288943Sdim // We'll be shuffling in zeroes. 902309124Sdim Value *Res = Constant::getNullValue(VecTy); 903288943Sdim 904288943Sdim // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 905288943Sdim // we'll just return the zero vector. 906288943Sdim if (Shift < 16) { 907309124Sdim uint32_t Idxs[64]; 908309124Sdim // 256/512-bit version is split into 2/4 16-byte lanes. 909288943Sdim for (unsigned l = 0; l != NumElts; l += 16) 910288943Sdim for (unsigned i = 0; i != 16; ++i) { 911288943Sdim unsigned Idx = NumElts + i - Shift; 912288943Sdim if (Idx < NumElts) 913288943Sdim Idx -= NumElts - 16; // end of lane, switch operand. 914309124Sdim Idxs[l + i] = Idx + l; 915288943Sdim } 916288943Sdim 917309124Sdim Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts)); 918288943Sdim } 919288943Sdim 920288943Sdim // Bitcast back to a 64-bit element type. 921309124Sdim return Builder.CreateBitCast(Res, ResultTy, "cast"); 922280031Sdim} 923276479Sdim 924309124Sdim// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them 925288943Sdim// to byte shuffles. 926309124Sdimstatic Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, 927288943Sdim unsigned Shift) { 928309124Sdim Type *ResultTy = Op->getType(); 929309124Sdim unsigned NumElts = ResultTy->getVectorNumElements() * 8; 930288943Sdim 931288943Sdim // Bitcast from a 64-bit element type to a byte element type. 932309124Sdim Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts); 933309124Sdim Op = Builder.CreateBitCast(Op, VecTy, "cast"); 934309124Sdim 935288943Sdim // We'll be shuffling in zeroes. 936309124Sdim Value *Res = Constant::getNullValue(VecTy); 937288943Sdim 938288943Sdim // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 939288943Sdim // we'll just return the zero vector. 940288943Sdim if (Shift < 16) { 941309124Sdim uint32_t Idxs[64]; 942309124Sdim // 256/512-bit version is split into 2/4 16-byte lanes. 943288943Sdim for (unsigned l = 0; l != NumElts; l += 16) 944288943Sdim for (unsigned i = 0; i != 16; ++i) { 945288943Sdim unsigned Idx = i + Shift; 946288943Sdim if (Idx >= 16) 947288943Sdim Idx += NumElts - 16; // end of lane, switch operand. 948309124Sdim Idxs[l + i] = Idx + l; 949288943Sdim } 950288943Sdim 951309124Sdim Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts)); 952276479Sdim } 953288943Sdim 954288943Sdim // Bitcast back to a 64-bit element type. 955309124Sdim return Builder.CreateBitCast(Res, ResultTy, "cast"); 956276479Sdim} 957276479Sdim 958309124Sdimstatic Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask, 959309124Sdim unsigned NumElts) { 960309124Sdim llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), 961309124Sdim cast<IntegerType>(Mask->getType())->getBitWidth()); 962309124Sdim Mask = Builder.CreateBitCast(Mask, MaskTy); 963309124Sdim 964309124Sdim // If we have less than 8 elements, then the starting mask was an i8 and 965309124Sdim // we need to extract down to the right number of elements. 966309124Sdim if (NumElts < 8) { 967309124Sdim uint32_t Indices[4]; 968309124Sdim for (unsigned i = 0; i != NumElts; ++i) 969309124Sdim Indices[i] = i; 970309124Sdim Mask = Builder.CreateShuffleVector(Mask, Mask, 971309124Sdim makeArrayRef(Indices, NumElts), 972309124Sdim "extract"); 973309124Sdim } 974309124Sdim 975309124Sdim return Mask; 976309124Sdim} 977309124Sdim 978309124Sdimstatic Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask, 979309124Sdim Value *Op0, Value *Op1) { 980341825Sdim // If the mask is all ones just emit the first operation. 981309124Sdim if (const auto *C = dyn_cast<Constant>(Mask)) 982309124Sdim if (C->isAllOnesValue()) 983309124Sdim return Op0; 984309124Sdim 985309124Sdim Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements()); 986309124Sdim return Builder.CreateSelect(Mask, Op0, Op1); 987309124Sdim} 988309124Sdim 989341825Sdimstatic Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, 990341825Sdim Value *Op0, Value *Op1) { 991341825Sdim // If the mask is all ones just emit the first operation. 992341825Sdim if (const auto *C = dyn_cast<Constant>(Mask)) 993341825Sdim if (C->isAllOnesValue()) 994341825Sdim return Op0; 995341825Sdim 996341825Sdim llvm::VectorType *MaskTy = 997341825Sdim llvm::VectorType::get(Builder.getInt1Ty(), 998341825Sdim Mask->getType()->getIntegerBitWidth()); 999341825Sdim Mask = Builder.CreateBitCast(Mask, MaskTy); 1000341825Sdim Mask = Builder.CreateExtractElement(Mask, (uint64_t)0); 1001341825Sdim return Builder.CreateSelect(Mask, Op0, Op1); 1002341825Sdim} 1003341825Sdim 1004314564Sdim// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics. 1005314564Sdim// PALIGNR handles large immediates by shifting while VALIGN masks the immediate 1006314564Sdim// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes. 1007314564Sdimstatic Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, 1008314564Sdim Value *Op1, Value *Shift, 1009314564Sdim Value *Passthru, Value *Mask, 1010314564Sdim bool IsVALIGN) { 1011309124Sdim unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue(); 1012309124Sdim 1013309124Sdim unsigned NumElts = Op0->getType()->getVectorNumElements(); 1014314564Sdim assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!"); 1015314564Sdim assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!"); 1016314564Sdim assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!"); 1017309124Sdim 1018314564Sdim // Mask the immediate for VALIGN. 1019314564Sdim if (IsVALIGN) 1020314564Sdim ShiftVal &= (NumElts - 1); 1021314564Sdim 1022309124Sdim // If palignr is shifting the pair of vectors more than the size of two 1023309124Sdim // lanes, emit zero. 1024309124Sdim if (ShiftVal >= 32) 1025309124Sdim return llvm::Constant::getNullValue(Op0->getType()); 1026309124Sdim 1027309124Sdim // If palignr is shifting the pair of input vectors more than one lane, 1028309124Sdim // but less than two lanes, convert to shifting in zeroes. 1029309124Sdim if (ShiftVal > 16) { 1030309124Sdim ShiftVal -= 16; 1031309124Sdim Op1 = Op0; 1032309124Sdim Op0 = llvm::Constant::getNullValue(Op0->getType()); 1033309124Sdim } 1034309124Sdim 1035309124Sdim uint32_t Indices[64]; 1036309124Sdim // 256-bit palignr operates on 128-bit lanes so we need to handle that 1037314564Sdim for (unsigned l = 0; l < NumElts; l += 16) { 1038309124Sdim for (unsigned i = 0; i != 16; ++i) { 1039309124Sdim unsigned Idx = ShiftVal + i; 1040314564Sdim if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN. 1041309124Sdim Idx += NumElts - 16; // End of lane, switch operand. 1042309124Sdim Indices[l + i] = Idx + l; 1043309124Sdim } 1044309124Sdim } 1045309124Sdim 1046309124Sdim Value *Align = Builder.CreateShuffleVector(Op1, Op0, 1047309124Sdim makeArrayRef(Indices, NumElts), 1048309124Sdim "palignr"); 1049309124Sdim 1050309124Sdim return EmitX86Select(Builder, Mask, Align, Passthru); 1051309124Sdim} 1052309124Sdim 1053344779Sdimstatic Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI, 1054344779Sdim bool ZeroMask, bool IndexForm) { 1055344779Sdim Type *Ty = CI.getType(); 1056344779Sdim unsigned VecWidth = Ty->getPrimitiveSizeInBits(); 1057344779Sdim unsigned EltWidth = Ty->getScalarSizeInBits(); 1058344779Sdim bool IsFloat = Ty->isFPOrFPVectorTy(); 1059344779Sdim Intrinsic::ID IID; 1060344779Sdim if (VecWidth == 128 && EltWidth == 32 && IsFloat) 1061344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_ps_128; 1062344779Sdim else if (VecWidth == 128 && EltWidth == 32 && !IsFloat) 1063344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_d_128; 1064344779Sdim else if (VecWidth == 128 && EltWidth == 64 && IsFloat) 1065344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_pd_128; 1066344779Sdim else if (VecWidth == 128 && EltWidth == 64 && !IsFloat) 1067344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_q_128; 1068344779Sdim else if (VecWidth == 256 && EltWidth == 32 && IsFloat) 1069344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_ps_256; 1070344779Sdim else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) 1071344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_d_256; 1072344779Sdim else if (VecWidth == 256 && EltWidth == 64 && IsFloat) 1073344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_pd_256; 1074344779Sdim else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) 1075344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_q_256; 1076344779Sdim else if (VecWidth == 512 && EltWidth == 32 && IsFloat) 1077344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_ps_512; 1078344779Sdim else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) 1079344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_d_512; 1080344779Sdim else if (VecWidth == 512 && EltWidth == 64 && IsFloat) 1081344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_pd_512; 1082344779Sdim else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) 1083344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_q_512; 1084344779Sdim else if (VecWidth == 128 && EltWidth == 16) 1085344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_hi_128; 1086344779Sdim else if (VecWidth == 256 && EltWidth == 16) 1087344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_hi_256; 1088344779Sdim else if (VecWidth == 512 && EltWidth == 16) 1089344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_hi_512; 1090344779Sdim else if (VecWidth == 128 && EltWidth == 8) 1091344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_qi_128; 1092344779Sdim else if (VecWidth == 256 && EltWidth == 8) 1093344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_qi_256; 1094344779Sdim else if (VecWidth == 512 && EltWidth == 8) 1095344779Sdim IID = Intrinsic::x86_avx512_vpermi2var_qi_512; 1096344779Sdim else 1097344779Sdim llvm_unreachable("Unexpected intrinsic"); 1098344779Sdim 1099344779Sdim Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1), 1100344779Sdim CI.getArgOperand(2) }; 1101344779Sdim 1102344779Sdim // If this isn't index form we need to swap operand 0 and 1. 1103344779Sdim if (!IndexForm) 1104344779Sdim std::swap(Args[0], Args[1]); 1105344779Sdim 1106344779Sdim Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID), 1107344779Sdim Args); 1108344779Sdim Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) 1109344779Sdim : Builder.CreateBitCast(CI.getArgOperand(1), 1110344779Sdim Ty); 1111344779Sdim return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru); 1112344779Sdim} 1113344779Sdim 1114344779Sdimstatic Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI, 1115344779Sdim bool IsSigned, bool IsAddition) { 1116344779Sdim Type *Ty = CI.getType(); 1117344779Sdim Value *Op0 = CI.getOperand(0); 1118344779Sdim Value *Op1 = CI.getOperand(1); 1119344779Sdim 1120344779Sdim Intrinsic::ID IID = 1121344779Sdim IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat) 1122344779Sdim : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat); 1123344779Sdim Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty); 1124344779Sdim Value *Res = Builder.CreateCall(Intrin, {Op0, Op1}); 1125344779Sdim 1126344779Sdim if (CI.getNumArgOperands() == 4) { // For masked intrinsics. 1127344779Sdim Value *VecSrc = CI.getOperand(2); 1128344779Sdim Value *Mask = CI.getOperand(3); 1129344779Sdim Res = EmitX86Select(Builder, Mask, Res, VecSrc); 1130344779Sdim } 1131344779Sdim return Res; 1132344779Sdim} 1133344779Sdim 1134344779Sdimstatic Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI, 1135344779Sdim bool IsRotateRight) { 1136344779Sdim Type *Ty = CI.getType(); 1137344779Sdim Value *Src = CI.getArgOperand(0); 1138344779Sdim Value *Amt = CI.getArgOperand(1); 1139344779Sdim 1140344779Sdim // Amount may be scalar immediate, in which case create a splat vector. 1141344779Sdim // Funnel shifts amounts are treated as modulo and types are all power-of-2 so 1142344779Sdim // we only care about the lowest log2 bits anyway. 1143344779Sdim if (Amt->getType() != Ty) { 1144344779Sdim unsigned NumElts = Ty->getVectorNumElements(); 1145344779Sdim Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false); 1146344779Sdim Amt = Builder.CreateVectorSplat(NumElts, Amt); 1147344779Sdim } 1148344779Sdim 1149344779Sdim Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; 1150344779Sdim Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty); 1151344779Sdim Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt}); 1152344779Sdim 1153344779Sdim if (CI.getNumArgOperands() == 4) { // For masked intrinsics. 1154344779Sdim Value *VecSrc = CI.getOperand(2); 1155344779Sdim Value *Mask = CI.getOperand(3); 1156344779Sdim Res = EmitX86Select(Builder, Mask, Res, VecSrc); 1157344779Sdim } 1158344779Sdim return Res; 1159344779Sdim} 1160344779Sdim 1161353358Sdimstatic Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm, 1162353358Sdim bool IsSigned) { 1163353358Sdim Type *Ty = CI.getType(); 1164353358Sdim Value *LHS = CI.getArgOperand(0); 1165353358Sdim Value *RHS = CI.getArgOperand(1); 1166353358Sdim 1167353358Sdim CmpInst::Predicate Pred; 1168353358Sdim switch (Imm) { 1169353358Sdim case 0x0: 1170353358Sdim Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; 1171353358Sdim break; 1172353358Sdim case 0x1: 1173353358Sdim Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; 1174353358Sdim break; 1175353358Sdim case 0x2: 1176353358Sdim Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; 1177353358Sdim break; 1178353358Sdim case 0x3: 1179353358Sdim Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; 1180353358Sdim break; 1181353358Sdim case 0x4: 1182353358Sdim Pred = ICmpInst::ICMP_EQ; 1183353358Sdim break; 1184353358Sdim case 0x5: 1185353358Sdim Pred = ICmpInst::ICMP_NE; 1186353358Sdim break; 1187353358Sdim case 0x6: 1188353358Sdim return Constant::getNullValue(Ty); // FALSE 1189353358Sdim case 0x7: 1190353358Sdim return Constant::getAllOnesValue(Ty); // TRUE 1191353358Sdim default: 1192353358Sdim llvm_unreachable("Unknown XOP vpcom/vpcomu predicate"); 1193353358Sdim } 1194353358Sdim 1195353358Sdim Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS); 1196353358Sdim Value *Ext = Builder.CreateSExt(Cmp, Ty); 1197353358Sdim return Ext; 1198353358Sdim} 1199353358Sdim 1200344779Sdimstatic Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI, 1201344779Sdim bool IsShiftRight, bool ZeroMask) { 1202344779Sdim Type *Ty = CI.getType(); 1203344779Sdim Value *Op0 = CI.getArgOperand(0); 1204344779Sdim Value *Op1 = CI.getArgOperand(1); 1205344779Sdim Value *Amt = CI.getArgOperand(2); 1206344779Sdim 1207344779Sdim if (IsShiftRight) 1208344779Sdim std::swap(Op0, Op1); 1209344779Sdim 1210344779Sdim // Amount may be scalar immediate, in which case create a splat vector. 1211344779Sdim // Funnel shifts amounts are treated as modulo and types are all power-of-2 so 1212344779Sdim // we only care about the lowest log2 bits anyway. 1213344779Sdim if (Amt->getType() != Ty) { 1214344779Sdim unsigned NumElts = Ty->getVectorNumElements(); 1215344779Sdim Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false); 1216344779Sdim Amt = Builder.CreateVectorSplat(NumElts, Amt); 1217344779Sdim } 1218344779Sdim 1219344779Sdim Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl; 1220344779Sdim Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty); 1221344779Sdim Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt}); 1222344779Sdim 1223344779Sdim unsigned NumArgs = CI.getNumArgOperands(); 1224344779Sdim if (NumArgs >= 4) { // For masked intrinsics. 1225344779Sdim Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) : 1226344779Sdim ZeroMask ? ConstantAggregateZero::get(CI.getType()) : 1227344779Sdim CI.getArgOperand(0); 1228344779Sdim Value *Mask = CI.getOperand(NumArgs - 1); 1229344779Sdim Res = EmitX86Select(Builder, Mask, Res, VecSrc); 1230344779Sdim } 1231344779Sdim return Res; 1232344779Sdim} 1233344779Sdim 1234309124Sdimstatic Value *UpgradeMaskedStore(IRBuilder<> &Builder, 1235309124Sdim Value *Ptr, Value *Data, Value *Mask, 1236309124Sdim bool Aligned) { 1237309124Sdim // Cast the pointer to the right type. 1238309124Sdim Ptr = Builder.CreateBitCast(Ptr, 1239309124Sdim llvm::PointerType::getUnqual(Data->getType())); 1240309124Sdim unsigned Align = 1241309124Sdim Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1; 1242309124Sdim 1243309124Sdim // If the mask is all ones just emit a regular store. 1244309124Sdim if (const auto *C = dyn_cast<Constant>(Mask)) 1245309124Sdim if (C->isAllOnesValue()) 1246309124Sdim return Builder.CreateAlignedStore(Data, Ptr, Align); 1247309124Sdim 1248309124Sdim // Convert the mask from an integer type to a vector of i1. 1249309124Sdim unsigned NumElts = Data->getType()->getVectorNumElements(); 1250309124Sdim Mask = getX86MaskVec(Builder, Mask, NumElts); 1251309124Sdim return Builder.CreateMaskedStore(Data, Ptr, Align, Mask); 1252309124Sdim} 1253309124Sdim 1254309124Sdimstatic Value *UpgradeMaskedLoad(IRBuilder<> &Builder, 1255309124Sdim Value *Ptr, Value *Passthru, Value *Mask, 1256309124Sdim bool Aligned) { 1257353358Sdim Type *ValTy = Passthru->getType(); 1258309124Sdim // Cast the pointer to the right type. 1259353358Sdim Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy)); 1260309124Sdim unsigned Align = 1261309124Sdim Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1; 1262309124Sdim 1263309124Sdim // If the mask is all ones just emit a regular store. 1264309124Sdim if (const auto *C = dyn_cast<Constant>(Mask)) 1265309124Sdim if (C->isAllOnesValue()) 1266353358Sdim return Builder.CreateAlignedLoad(ValTy, Ptr, Align); 1267309124Sdim 1268309124Sdim // Convert the mask from an integer type to a vector of i1. 1269309124Sdim unsigned NumElts = Passthru->getType()->getVectorNumElements(); 1270309124Sdim Mask = getX86MaskVec(Builder, Mask, NumElts); 1271309124Sdim return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru); 1272309124Sdim} 1273309124Sdim 1274327952Sdimstatic Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) { 1275327952Sdim Value *Op0 = CI.getArgOperand(0); 1276327952Sdim llvm::Type *Ty = Op0->getType(); 1277327952Sdim Value *Zero = llvm::Constant::getNullValue(Ty); 1278327952Sdim Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero); 1279327952Sdim Value *Neg = Builder.CreateNeg(Op0); 1280327952Sdim Value *Res = Builder.CreateSelect(Cmp, Op0, Neg); 1281327952Sdim 1282327952Sdim if (CI.getNumArgOperands() == 3) 1283327952Sdim Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1)); 1284327952Sdim 1285327952Sdim return Res; 1286327952Sdim} 1287327952Sdim 1288309124Sdimstatic Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, 1289309124Sdim ICmpInst::Predicate Pred) { 1290309124Sdim Value *Op0 = CI.getArgOperand(0); 1291309124Sdim Value *Op1 = CI.getArgOperand(1); 1292309124Sdim Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1); 1293314564Sdim Value *Res = Builder.CreateSelect(Cmp, Op0, Op1); 1294314564Sdim 1295314564Sdim if (CI.getNumArgOperands() == 4) 1296314564Sdim Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2)); 1297314564Sdim 1298314564Sdim return Res; 1299309124Sdim} 1300309124Sdim 1301341825Sdimstatic Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) { 1302341825Sdim Type *Ty = CI.getType(); 1303341825Sdim 1304341825Sdim // Arguments have a vXi32 type so cast to vXi64. 1305341825Sdim Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty); 1306341825Sdim Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty); 1307341825Sdim 1308341825Sdim if (IsSigned) { 1309341825Sdim // Shift left then arithmetic shift right. 1310341825Sdim Constant *ShiftAmt = ConstantInt::get(Ty, 32); 1311341825Sdim LHS = Builder.CreateShl(LHS, ShiftAmt); 1312341825Sdim LHS = Builder.CreateAShr(LHS, ShiftAmt); 1313341825Sdim RHS = Builder.CreateShl(RHS, ShiftAmt); 1314341825Sdim RHS = Builder.CreateAShr(RHS, ShiftAmt); 1315341825Sdim } else { 1316341825Sdim // Clear the upper bits. 1317341825Sdim Constant *Mask = ConstantInt::get(Ty, 0xffffffff); 1318341825Sdim LHS = Builder.CreateAnd(LHS, Mask); 1319341825Sdim RHS = Builder.CreateAnd(RHS, Mask); 1320341825Sdim } 1321341825Sdim 1322341825Sdim Value *Res = Builder.CreateMul(LHS, RHS); 1323341825Sdim 1324341825Sdim if (CI.getNumArgOperands() == 4) 1325341825Sdim Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2)); 1326341825Sdim 1327341825Sdim return Res; 1328341825Sdim} 1329341825Sdim 1330327952Sdim// Applying mask on vector of i1's and make sure result is at least 8 bits wide. 1331341825Sdimstatic Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, 1332341825Sdim Value *Mask) { 1333341825Sdim unsigned NumElts = Vec->getType()->getVectorNumElements(); 1334341825Sdim if (Mask) { 1335341825Sdim const auto *C = dyn_cast<Constant>(Mask); 1336341825Sdim if (!C || !C->isAllOnesValue()) 1337341825Sdim Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); 1338341825Sdim } 1339327952Sdim 1340327952Sdim if (NumElts < 8) { 1341327952Sdim uint32_t Indices[8]; 1342327952Sdim for (unsigned i = 0; i != NumElts; ++i) 1343327952Sdim Indices[i] = i; 1344327952Sdim for (unsigned i = NumElts; i != 8; ++i) 1345327952Sdim Indices[i] = NumElts + i % NumElts; 1346327952Sdim Vec = Builder.CreateShuffleVector(Vec, 1347327952Sdim Constant::getNullValue(Vec->getType()), 1348327952Sdim Indices); 1349327952Sdim } 1350327952Sdim return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U))); 1351327952Sdim} 1352327952Sdim 1353309124Sdimstatic Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, 1354321369Sdim unsigned CC, bool Signed) { 1355309124Sdim Value *Op0 = CI.getArgOperand(0); 1356309124Sdim unsigned NumElts = Op0->getType()->getVectorNumElements(); 1357309124Sdim 1358321369Sdim Value *Cmp; 1359321369Sdim if (CC == 3) { 1360321369Sdim Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts)); 1361321369Sdim } else if (CC == 7) { 1362321369Sdim Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts)); 1363321369Sdim } else { 1364321369Sdim ICmpInst::Predicate Pred; 1365321369Sdim switch (CC) { 1366321369Sdim default: llvm_unreachable("Unknown condition code"); 1367321369Sdim case 0: Pred = ICmpInst::ICMP_EQ; break; 1368321369Sdim case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 1369321369Sdim case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 1370321369Sdim case 4: Pred = ICmpInst::ICMP_NE; break; 1371321369Sdim case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 1372321369Sdim case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 1373321369Sdim } 1374321369Sdim Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); 1375321369Sdim } 1376321369Sdim 1377321369Sdim Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1); 1378309124Sdim 1379341825Sdim return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask); 1380309124Sdim} 1381309124Sdim 1382314564Sdim// Replace a masked intrinsic with an older unmasked intrinsic. 1383314564Sdimstatic Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI, 1384314564Sdim Intrinsic::ID IID) { 1385341825Sdim Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID); 1386314564Sdim Value *Rep = Builder.CreateCall(Intrin, 1387314564Sdim { CI.getArgOperand(0), CI.getArgOperand(1) }); 1388314564Sdim return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2)); 1389314564Sdim} 1390314564Sdim 1391314564Sdimstatic Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) { 1392314564Sdim Value* A = CI.getArgOperand(0); 1393314564Sdim Value* B = CI.getArgOperand(1); 1394314564Sdim Value* Src = CI.getArgOperand(2); 1395314564Sdim Value* Mask = CI.getArgOperand(3); 1396314564Sdim 1397314564Sdim Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1)); 1398314564Sdim Value* Cmp = Builder.CreateIsNotNull(AndNode); 1399314564Sdim Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0); 1400314564Sdim Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0); 1401314564Sdim Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2); 1402314564Sdim return Builder.CreateInsertElement(A, Select, (uint64_t)0); 1403314564Sdim} 1404314564Sdim 1405321369Sdim 1406321369Sdimstatic Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) { 1407321369Sdim Value* Op = CI.getArgOperand(0); 1408321369Sdim Type* ReturnOp = CI.getType(); 1409321369Sdim unsigned NumElts = CI.getType()->getVectorNumElements(); 1410321369Sdim Value *Mask = getX86MaskVec(Builder, Op, NumElts); 1411321369Sdim return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2"); 1412321369Sdim} 1413321369Sdim 1414341825Sdim// Replace intrinsic with unmasked version and a select. 1415341825Sdimstatic bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, 1416341825Sdim CallInst &CI, Value *&Rep) { 1417341825Sdim Name = Name.substr(12); // Remove avx512.mask. 1418341825Sdim 1419341825Sdim unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits(); 1420341825Sdim unsigned EltWidth = CI.getType()->getScalarSizeInBits(); 1421341825Sdim Intrinsic::ID IID; 1422341825Sdim if (Name.startswith("max.p")) { 1423341825Sdim if (VecWidth == 128 && EltWidth == 32) 1424341825Sdim IID = Intrinsic::x86_sse_max_ps; 1425341825Sdim else if (VecWidth == 128 && EltWidth == 64) 1426341825Sdim IID = Intrinsic::x86_sse2_max_pd; 1427341825Sdim else if (VecWidth == 256 && EltWidth == 32) 1428341825Sdim IID = Intrinsic::x86_avx_max_ps_256; 1429341825Sdim else if (VecWidth == 256 && EltWidth == 64) 1430341825Sdim IID = Intrinsic::x86_avx_max_pd_256; 1431341825Sdim else 1432341825Sdim llvm_unreachable("Unexpected intrinsic"); 1433341825Sdim } else if (Name.startswith("min.p")) { 1434341825Sdim if (VecWidth == 128 && EltWidth == 32) 1435341825Sdim IID = Intrinsic::x86_sse_min_ps; 1436341825Sdim else if (VecWidth == 128 && EltWidth == 64) 1437341825Sdim IID = Intrinsic::x86_sse2_min_pd; 1438341825Sdim else if (VecWidth == 256 && EltWidth == 32) 1439341825Sdim IID = Intrinsic::x86_avx_min_ps_256; 1440341825Sdim else if (VecWidth == 256 && EltWidth == 64) 1441341825Sdim IID = Intrinsic::x86_avx_min_pd_256; 1442341825Sdim else 1443341825Sdim llvm_unreachable("Unexpected intrinsic"); 1444341825Sdim } else if (Name.startswith("pshuf.b.")) { 1445341825Sdim if (VecWidth == 128) 1446341825Sdim IID = Intrinsic::x86_ssse3_pshuf_b_128; 1447341825Sdim else if (VecWidth == 256) 1448341825Sdim IID = Intrinsic::x86_avx2_pshuf_b; 1449341825Sdim else if (VecWidth == 512) 1450341825Sdim IID = Intrinsic::x86_avx512_pshuf_b_512; 1451341825Sdim else 1452341825Sdim llvm_unreachable("Unexpected intrinsic"); 1453341825Sdim } else if (Name.startswith("pmul.hr.sw.")) { 1454341825Sdim if (VecWidth == 128) 1455341825Sdim IID = Intrinsic::x86_ssse3_pmul_hr_sw_128; 1456341825Sdim else if (VecWidth == 256) 1457341825Sdim IID = Intrinsic::x86_avx2_pmul_hr_sw; 1458341825Sdim else if (VecWidth == 512) 1459341825Sdim IID = Intrinsic::x86_avx512_pmul_hr_sw_512; 1460341825Sdim else 1461341825Sdim llvm_unreachable("Unexpected intrinsic"); 1462341825Sdim } else if (Name.startswith("pmulh.w.")) { 1463341825Sdim if (VecWidth == 128) 1464341825Sdim IID = Intrinsic::x86_sse2_pmulh_w; 1465341825Sdim else if (VecWidth == 256) 1466341825Sdim IID = Intrinsic::x86_avx2_pmulh_w; 1467341825Sdim else if (VecWidth == 512) 1468341825Sdim IID = Intrinsic::x86_avx512_pmulh_w_512; 1469341825Sdim else 1470341825Sdim llvm_unreachable("Unexpected intrinsic"); 1471341825Sdim } else if (Name.startswith("pmulhu.w.")) { 1472341825Sdim if (VecWidth == 128) 1473341825Sdim IID = Intrinsic::x86_sse2_pmulhu_w; 1474341825Sdim else if (VecWidth == 256) 1475341825Sdim IID = Intrinsic::x86_avx2_pmulhu_w; 1476341825Sdim else if (VecWidth == 512) 1477341825Sdim IID = Intrinsic::x86_avx512_pmulhu_w_512; 1478341825Sdim else 1479341825Sdim llvm_unreachable("Unexpected intrinsic"); 1480341825Sdim } else if (Name.startswith("pmaddw.d.")) { 1481341825Sdim if (VecWidth == 128) 1482341825Sdim IID = Intrinsic::x86_sse2_pmadd_wd; 1483341825Sdim else if (VecWidth == 256) 1484341825Sdim IID = Intrinsic::x86_avx2_pmadd_wd; 1485341825Sdim else if (VecWidth == 512) 1486341825Sdim IID = Intrinsic::x86_avx512_pmaddw_d_512; 1487341825Sdim else 1488341825Sdim llvm_unreachable("Unexpected intrinsic"); 1489341825Sdim } else if (Name.startswith("pmaddubs.w.")) { 1490341825Sdim if (VecWidth == 128) 1491341825Sdim IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128; 1492341825Sdim else if (VecWidth == 256) 1493341825Sdim IID = Intrinsic::x86_avx2_pmadd_ub_sw; 1494341825Sdim else if (VecWidth == 512) 1495341825Sdim IID = Intrinsic::x86_avx512_pmaddubs_w_512; 1496341825Sdim else 1497341825Sdim llvm_unreachable("Unexpected intrinsic"); 1498341825Sdim } else if (Name.startswith("packsswb.")) { 1499341825Sdim if (VecWidth == 128) 1500341825Sdim IID = Intrinsic::x86_sse2_packsswb_128; 1501341825Sdim else if (VecWidth == 256) 1502341825Sdim IID = Intrinsic::x86_avx2_packsswb; 1503341825Sdim else if (VecWidth == 512) 1504341825Sdim IID = Intrinsic::x86_avx512_packsswb_512; 1505341825Sdim else 1506341825Sdim llvm_unreachable("Unexpected intrinsic"); 1507341825Sdim } else if (Name.startswith("packssdw.")) { 1508341825Sdim if (VecWidth == 128) 1509341825Sdim IID = Intrinsic::x86_sse2_packssdw_128; 1510341825Sdim else if (VecWidth == 256) 1511341825Sdim IID = Intrinsic::x86_avx2_packssdw; 1512341825Sdim else if (VecWidth == 512) 1513341825Sdim IID = Intrinsic::x86_avx512_packssdw_512; 1514341825Sdim else 1515341825Sdim llvm_unreachable("Unexpected intrinsic"); 1516341825Sdim } else if (Name.startswith("packuswb.")) { 1517341825Sdim if (VecWidth == 128) 1518341825Sdim IID = Intrinsic::x86_sse2_packuswb_128; 1519341825Sdim else if (VecWidth == 256) 1520341825Sdim IID = Intrinsic::x86_avx2_packuswb; 1521341825Sdim else if (VecWidth == 512) 1522341825Sdim IID = Intrinsic::x86_avx512_packuswb_512; 1523341825Sdim else 1524341825Sdim llvm_unreachable("Unexpected intrinsic"); 1525341825Sdim } else if (Name.startswith("packusdw.")) { 1526341825Sdim if (VecWidth == 128) 1527341825Sdim IID = Intrinsic::x86_sse41_packusdw; 1528341825Sdim else if (VecWidth == 256) 1529341825Sdim IID = Intrinsic::x86_avx2_packusdw; 1530341825Sdim else if (VecWidth == 512) 1531341825Sdim IID = Intrinsic::x86_avx512_packusdw_512; 1532341825Sdim else 1533341825Sdim llvm_unreachable("Unexpected intrinsic"); 1534341825Sdim } else if (Name.startswith("vpermilvar.")) { 1535341825Sdim if (VecWidth == 128 && EltWidth == 32) 1536341825Sdim IID = Intrinsic::x86_avx_vpermilvar_ps; 1537341825Sdim else if (VecWidth == 128 && EltWidth == 64) 1538341825Sdim IID = Intrinsic::x86_avx_vpermilvar_pd; 1539341825Sdim else if (VecWidth == 256 && EltWidth == 32) 1540341825Sdim IID = Intrinsic::x86_avx_vpermilvar_ps_256; 1541341825Sdim else if (VecWidth == 256 && EltWidth == 64) 1542341825Sdim IID = Intrinsic::x86_avx_vpermilvar_pd_256; 1543341825Sdim else if (VecWidth == 512 && EltWidth == 32) 1544341825Sdim IID = Intrinsic::x86_avx512_vpermilvar_ps_512; 1545341825Sdim else if (VecWidth == 512 && EltWidth == 64) 1546341825Sdim IID = Intrinsic::x86_avx512_vpermilvar_pd_512; 1547341825Sdim else 1548341825Sdim llvm_unreachable("Unexpected intrinsic"); 1549341825Sdim } else if (Name == "cvtpd2dq.256") { 1550341825Sdim IID = Intrinsic::x86_avx_cvt_pd2dq_256; 1551341825Sdim } else if (Name == "cvtpd2ps.256") { 1552341825Sdim IID = Intrinsic::x86_avx_cvt_pd2_ps_256; 1553341825Sdim } else if (Name == "cvttpd2dq.256") { 1554341825Sdim IID = Intrinsic::x86_avx_cvtt_pd2dq_256; 1555341825Sdim } else if (Name == "cvttps2dq.128") { 1556341825Sdim IID = Intrinsic::x86_sse2_cvttps2dq; 1557341825Sdim } else if (Name == "cvttps2dq.256") { 1558341825Sdim IID = Intrinsic::x86_avx_cvtt_ps2dq_256; 1559341825Sdim } else if (Name.startswith("permvar.")) { 1560341825Sdim bool IsFloat = CI.getType()->isFPOrFPVectorTy(); 1561341825Sdim if (VecWidth == 256 && EltWidth == 32 && IsFloat) 1562341825Sdim IID = Intrinsic::x86_avx2_permps; 1563341825Sdim else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) 1564341825Sdim IID = Intrinsic::x86_avx2_permd; 1565341825Sdim else if (VecWidth == 256 && EltWidth == 64 && IsFloat) 1566341825Sdim IID = Intrinsic::x86_avx512_permvar_df_256; 1567341825Sdim else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) 1568341825Sdim IID = Intrinsic::x86_avx512_permvar_di_256; 1569341825Sdim else if (VecWidth == 512 && EltWidth == 32 && IsFloat) 1570341825Sdim IID = Intrinsic::x86_avx512_permvar_sf_512; 1571341825Sdim else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) 1572341825Sdim IID = Intrinsic::x86_avx512_permvar_si_512; 1573341825Sdim else if (VecWidth == 512 && EltWidth == 64 && IsFloat) 1574341825Sdim IID = Intrinsic::x86_avx512_permvar_df_512; 1575341825Sdim else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) 1576341825Sdim IID = Intrinsic::x86_avx512_permvar_di_512; 1577341825Sdim else if (VecWidth == 128 && EltWidth == 16) 1578341825Sdim IID = Intrinsic::x86_avx512_permvar_hi_128; 1579341825Sdim else if (VecWidth == 256 && EltWidth == 16) 1580341825Sdim IID = Intrinsic::x86_avx512_permvar_hi_256; 1581341825Sdim else if (VecWidth == 512 && EltWidth == 16) 1582341825Sdim IID = Intrinsic::x86_avx512_permvar_hi_512; 1583341825Sdim else if (VecWidth == 128 && EltWidth == 8) 1584341825Sdim IID = Intrinsic::x86_avx512_permvar_qi_128; 1585341825Sdim else if (VecWidth == 256 && EltWidth == 8) 1586341825Sdim IID = Intrinsic::x86_avx512_permvar_qi_256; 1587341825Sdim else if (VecWidth == 512 && EltWidth == 8) 1588341825Sdim IID = Intrinsic::x86_avx512_permvar_qi_512; 1589341825Sdim else 1590341825Sdim llvm_unreachable("Unexpected intrinsic"); 1591341825Sdim } else if (Name.startswith("dbpsadbw.")) { 1592341825Sdim if (VecWidth == 128) 1593341825Sdim IID = Intrinsic::x86_avx512_dbpsadbw_128; 1594341825Sdim else if (VecWidth == 256) 1595341825Sdim IID = Intrinsic::x86_avx512_dbpsadbw_256; 1596341825Sdim else if (VecWidth == 512) 1597341825Sdim IID = Intrinsic::x86_avx512_dbpsadbw_512; 1598341825Sdim else 1599341825Sdim llvm_unreachable("Unexpected intrinsic"); 1600344779Sdim } else if (Name.startswith("pmultishift.qb.")) { 1601344779Sdim if (VecWidth == 128) 1602344779Sdim IID = Intrinsic::x86_avx512_pmultishift_qb_128; 1603344779Sdim else if (VecWidth == 256) 1604344779Sdim IID = Intrinsic::x86_avx512_pmultishift_qb_256; 1605344779Sdim else if (VecWidth == 512) 1606344779Sdim IID = Intrinsic::x86_avx512_pmultishift_qb_512; 1607341825Sdim else 1608341825Sdim llvm_unreachable("Unexpected intrinsic"); 1609353358Sdim } else if (Name.startswith("conflict.")) { 1610353358Sdim if (Name[9] == 'd' && VecWidth == 128) 1611353358Sdim IID = Intrinsic::x86_avx512_conflict_d_128; 1612353358Sdim else if (Name[9] == 'd' && VecWidth == 256) 1613353358Sdim IID = Intrinsic::x86_avx512_conflict_d_256; 1614353358Sdim else if (Name[9] == 'd' && VecWidth == 512) 1615353358Sdim IID = Intrinsic::x86_avx512_conflict_d_512; 1616353358Sdim else if (Name[9] == 'q' && VecWidth == 128) 1617353358Sdim IID = Intrinsic::x86_avx512_conflict_q_128; 1618353358Sdim else if (Name[9] == 'q' && VecWidth == 256) 1619353358Sdim IID = Intrinsic::x86_avx512_conflict_q_256; 1620353358Sdim else if (Name[9] == 'q' && VecWidth == 512) 1621353358Sdim IID = Intrinsic::x86_avx512_conflict_q_512; 1622353358Sdim else 1623353358Sdim llvm_unreachable("Unexpected intrinsic"); 1624353358Sdim } else if (Name.startswith("pavg.")) { 1625353358Sdim if (Name[5] == 'b' && VecWidth == 128) 1626353358Sdim IID = Intrinsic::x86_sse2_pavg_b; 1627353358Sdim else if (Name[5] == 'b' && VecWidth == 256) 1628353358Sdim IID = Intrinsic::x86_avx2_pavg_b; 1629353358Sdim else if (Name[5] == 'b' && VecWidth == 512) 1630353358Sdim IID = Intrinsic::x86_avx512_pavg_b_512; 1631353358Sdim else if (Name[5] == 'w' && VecWidth == 128) 1632353358Sdim IID = Intrinsic::x86_sse2_pavg_w; 1633353358Sdim else if (Name[5] == 'w' && VecWidth == 256) 1634353358Sdim IID = Intrinsic::x86_avx2_pavg_w; 1635353358Sdim else if (Name[5] == 'w' && VecWidth == 512) 1636353358Sdim IID = Intrinsic::x86_avx512_pavg_w_512; 1637353358Sdim else 1638353358Sdim llvm_unreachable("Unexpected intrinsic"); 1639341825Sdim } else 1640341825Sdim return false; 1641341825Sdim 1642341825Sdim SmallVector<Value *, 4> Args(CI.arg_operands().begin(), 1643341825Sdim CI.arg_operands().end()); 1644341825Sdim Args.pop_back(); 1645341825Sdim Args.pop_back(); 1646341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID), 1647341825Sdim Args); 1648341825Sdim unsigned NumArgs = CI.getNumArgOperands(); 1649341825Sdim Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep, 1650341825Sdim CI.getArgOperand(NumArgs - 2)); 1651341825Sdim return true; 1652341825Sdim} 1653341825Sdim 1654341825Sdim/// Upgrade comment in call to inline asm that represents an objc retain release 1655341825Sdim/// marker. 1656341825Sdimvoid llvm::UpgradeInlineAsmString(std::string *AsmStr) { 1657341825Sdim size_t Pos; 1658341825Sdim if (AsmStr->find("mov\tfp") == 0 && 1659341825Sdim AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos && 1660341825Sdim (Pos = AsmStr->find("# marker")) != std::string::npos) { 1661341825Sdim AsmStr->replace(Pos, 1, ";"); 1662341825Sdim } 1663341825Sdim return; 1664341825Sdim} 1665341825Sdim 1666309124Sdim/// Upgrade a call to an old intrinsic. All argument and return casting must be 1667309124Sdim/// provided to seamlessly integrate with existing context. 1668249259Sdimvoid llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { 1669249259Sdim Function *F = CI->getCalledFunction(); 1670249259Sdim LLVMContext &C = CI->getContext(); 1671249259Sdim IRBuilder<> Builder(C); 1672296417Sdim Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); 1673249259Sdim 1674249259Sdim assert(F && "Intrinsic call is not direct?"); 1675249259Sdim 1676249259Sdim if (!NewFn) { 1677249259Sdim // Get the Function's name. 1678249259Sdim StringRef Name = F->getName(); 1679249259Sdim 1680309124Sdim assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'"); 1681309124Sdim Name = Name.substr(5); 1682309124Sdim 1683309124Sdim bool IsX86 = Name.startswith("x86."); 1684309124Sdim if (IsX86) 1685309124Sdim Name = Name.substr(4); 1686321369Sdim bool IsNVVM = Name.startswith("nvvm."); 1687321369Sdim if (IsNVVM) 1688321369Sdim Name = Name.substr(5); 1689309124Sdim 1690314564Sdim if (IsX86 && Name.startswith("sse4a.movnt.")) { 1691249259Sdim Module *M = F->getParent(); 1692280031Sdim SmallVector<Metadata *, 1> Elts; 1693280031Sdim Elts.push_back( 1694280031Sdim ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 1695249259Sdim MDNode *Node = MDNode::get(C, Elts); 1696249259Sdim 1697249259Sdim Value *Arg0 = CI->getArgOperand(0); 1698249259Sdim Value *Arg1 = CI->getArgOperand(1); 1699249259Sdim 1700309124Sdim // Nontemporal (unaligned) store of the 0'th element of the float/double 1701309124Sdim // vector. 1702309124Sdim Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType(); 1703309124Sdim PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy); 1704309124Sdim Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast"); 1705309124Sdim Value *Extract = 1706309124Sdim Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement"); 1707309124Sdim 1708309124Sdim StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1); 1709309124Sdim SI->setMetadata(M->getMDKindID("nontemporal"), Node); 1710309124Sdim 1711309124Sdim // Remove intrinsic. 1712309124Sdim CI->eraseFromParent(); 1713309124Sdim return; 1714314564Sdim } 1715314564Sdim 1716314564Sdim if (IsX86 && (Name.startswith("avx.movnt.") || 1717314564Sdim Name.startswith("avx512.storent."))) { 1718309124Sdim Module *M = F->getParent(); 1719309124Sdim SmallVector<Metadata *, 1> Elts; 1720309124Sdim Elts.push_back( 1721309124Sdim ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 1722309124Sdim MDNode *Node = MDNode::get(C, Elts); 1723309124Sdim 1724309124Sdim Value *Arg0 = CI->getArgOperand(0); 1725309124Sdim Value *Arg1 = CI->getArgOperand(1); 1726309124Sdim 1727249259Sdim // Convert the type of the pointer to a pointer to the stored type. 1728249259Sdim Value *BC = Builder.CreateBitCast(Arg0, 1729249259Sdim PointerType::getUnqual(Arg1->getType()), 1730249259Sdim "cast"); 1731309124Sdim VectorType *VTy = cast<VectorType>(Arg1->getType()); 1732309124Sdim StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC, 1733309124Sdim VTy->getBitWidth() / 8); 1734249259Sdim SI->setMetadata(M->getMDKindID("nontemporal"), Node); 1735249259Sdim 1736249259Sdim // Remove intrinsic. 1737249259Sdim CI->eraseFromParent(); 1738249259Sdim return; 1739314564Sdim } 1740314564Sdim 1741314564Sdim if (IsX86 && Name == "sse2.storel.dq") { 1742309124Sdim Value *Arg0 = CI->getArgOperand(0); 1743309124Sdim Value *Arg1 = CI->getArgOperand(1); 1744309124Sdim 1745309124Sdim Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 1746309124Sdim Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 1747309124Sdim Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0); 1748309124Sdim Value *BC = Builder.CreateBitCast(Arg0, 1749309124Sdim PointerType::getUnqual(Elt->getType()), 1750309124Sdim "cast"); 1751309124Sdim Builder.CreateAlignedStore(Elt, BC, 1); 1752309124Sdim 1753309124Sdim // Remove intrinsic. 1754309124Sdim CI->eraseFromParent(); 1755309124Sdim return; 1756314564Sdim } 1757314564Sdim 1758314564Sdim if (IsX86 && (Name.startswith("sse.storeu.") || 1759314564Sdim Name.startswith("sse2.storeu.") || 1760314564Sdim Name.startswith("avx.storeu."))) { 1761309124Sdim Value *Arg0 = CI->getArgOperand(0); 1762309124Sdim Value *Arg1 = CI->getArgOperand(1); 1763309124Sdim 1764309124Sdim Arg0 = Builder.CreateBitCast(Arg0, 1765309124Sdim PointerType::getUnqual(Arg1->getType()), 1766309124Sdim "cast"); 1767309124Sdim Builder.CreateAlignedStore(Arg1, Arg0, 1); 1768309124Sdim 1769309124Sdim // Remove intrinsic. 1770309124Sdim CI->eraseFromParent(); 1771309124Sdim return; 1772314564Sdim } 1773314564Sdim 1774341825Sdim if (IsX86 && Name == "avx512.mask.store.ss") { 1775341825Sdim Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1)); 1776341825Sdim UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1), 1777341825Sdim Mask, false); 1778341825Sdim 1779341825Sdim // Remove intrinsic. 1780341825Sdim CI->eraseFromParent(); 1781341825Sdim return; 1782341825Sdim } 1783341825Sdim 1784321369Sdim if (IsX86 && (Name.startswith("avx512.mask.store"))) { 1785321369Sdim // "avx512.mask.storeu." or "avx512.mask.store." 1786321369Sdim bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu". 1787309124Sdim UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1), 1788321369Sdim CI->getArgOperand(2), Aligned); 1789309124Sdim 1790309124Sdim // Remove intrinsic. 1791309124Sdim CI->eraseFromParent(); 1792309124Sdim return; 1793314564Sdim } 1794314564Sdim 1795314564Sdim Value *Rep; 1796314564Sdim // Upgrade packed integer vector compare intrinsics to compare instructions. 1797321369Sdim if (IsX86 && (Name.startswith("sse2.pcmp") || 1798321369Sdim Name.startswith("avx2.pcmp"))) { 1799321369Sdim // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt." 1800321369Sdim bool CmpEq = Name[9] == 'e'; 1801321369Sdim Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT, 1802321369Sdim CI->getArgOperand(0), CI->getArgOperand(1)); 1803314564Sdim Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 1804327952Sdim } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) { 1805327952Sdim Type *ExtTy = Type::getInt32Ty(C); 1806327952Sdim if (CI->getOperand(0)->getType()->isIntegerTy(8)) 1807327952Sdim ExtTy = Type::getInt64Ty(C); 1808327952Sdim unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 1809327952Sdim ExtTy->getPrimitiveSizeInBits(); 1810327952Sdim Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy); 1811327952Sdim Rep = Builder.CreateVectorSplat(NumElts, Rep); 1812341825Sdim } else if (IsX86 && (Name == "sse.sqrt.ss" || 1813341825Sdim Name == "sse2.sqrt.sd")) { 1814341825Sdim Value *Vec = CI->getArgOperand(0); 1815341825Sdim Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0); 1816341825Sdim Function *Intr = Intrinsic::getDeclaration(F->getParent(), 1817341825Sdim Intrinsic::sqrt, Elt0->getType()); 1818341825Sdim Elt0 = Builder.CreateCall(Intr, Elt0); 1819341825Sdim Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0); 1820341825Sdim } else if (IsX86 && (Name.startswith("avx.sqrt.p") || 1821341825Sdim Name.startswith("sse2.sqrt.p") || 1822341825Sdim Name.startswith("sse.sqrt.p"))) { 1823341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), 1824341825Sdim Intrinsic::sqrt, 1825341825Sdim CI->getType()), 1826341825Sdim {CI->getArgOperand(0)}); 1827341825Sdim } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) { 1828341825Sdim if (CI->getNumArgOperands() == 4 && 1829341825Sdim (!isa<ConstantInt>(CI->getArgOperand(3)) || 1830341825Sdim cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) { 1831341825Sdim Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512 1832341825Sdim : Intrinsic::x86_avx512_sqrt_pd_512; 1833341825Sdim 1834341825Sdim Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) }; 1835341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), 1836341825Sdim IID), Args); 1837341825Sdim } else { 1838341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), 1839341825Sdim Intrinsic::sqrt, 1840341825Sdim CI->getType()), 1841341825Sdim {CI->getArgOperand(0)}); 1842341825Sdim } 1843341825Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1844341825Sdim CI->getArgOperand(1)); 1845327952Sdim } else if (IsX86 && (Name.startswith("avx512.ptestm") || 1846327952Sdim Name.startswith("avx512.ptestnm"))) { 1847327952Sdim Value *Op0 = CI->getArgOperand(0); 1848327952Sdim Value *Op1 = CI->getArgOperand(1); 1849327952Sdim Value *Mask = CI->getArgOperand(2); 1850327952Sdim Rep = Builder.CreateAnd(Op0, Op1); 1851327952Sdim llvm::Type *Ty = Op0->getType(); 1852327952Sdim Value *Zero = llvm::Constant::getNullValue(Ty); 1853327952Sdim ICmpInst::Predicate Pred = 1854327952Sdim Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1855327952Sdim Rep = Builder.CreateICmp(Pred, Rep, Zero); 1856341825Sdim Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask); 1857327952Sdim } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){ 1858327952Sdim unsigned NumElts = 1859327952Sdim CI->getArgOperand(1)->getType()->getVectorNumElements(); 1860327952Sdim Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0)); 1861327952Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1862327952Sdim CI->getArgOperand(1)); 1863341825Sdim } else if (IsX86 && (Name.startswith("avx512.kunpck"))) { 1864341825Sdim unsigned NumElts = CI->getType()->getScalarSizeInBits(); 1865341825Sdim Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts); 1866341825Sdim Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts); 1867341825Sdim uint32_t Indices[64]; 1868341825Sdim for (unsigned i = 0; i != NumElts; ++i) 1869341825Sdim Indices[i] = i; 1870341825Sdim 1871341825Sdim // First extract half of each vector. This gives better codegen than 1872341825Sdim // doing it in a single shuffle. 1873341825Sdim LHS = Builder.CreateShuffleVector(LHS, LHS, 1874341825Sdim makeArrayRef(Indices, NumElts / 2)); 1875341825Sdim RHS = Builder.CreateShuffleVector(RHS, RHS, 1876341825Sdim makeArrayRef(Indices, NumElts / 2)); 1877341825Sdim // Concat the vectors. 1878341825Sdim // NOTE: Operands have to be swapped to match intrinsic definition. 1879341825Sdim Rep = Builder.CreateShuffleVector(RHS, LHS, 1880341825Sdim makeArrayRef(Indices, NumElts)); 1881341825Sdim Rep = Builder.CreateBitCast(Rep, CI->getType()); 1882341825Sdim } else if (IsX86 && Name == "avx512.kand.w") { 1883341825Sdim Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 1884341825Sdim Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 1885341825Sdim Rep = Builder.CreateAnd(LHS, RHS); 1886341825Sdim Rep = Builder.CreateBitCast(Rep, CI->getType()); 1887341825Sdim } else if (IsX86 && Name == "avx512.kandn.w") { 1888341825Sdim Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 1889341825Sdim Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 1890341825Sdim LHS = Builder.CreateNot(LHS); 1891341825Sdim Rep = Builder.CreateAnd(LHS, RHS); 1892341825Sdim Rep = Builder.CreateBitCast(Rep, CI->getType()); 1893341825Sdim } else if (IsX86 && Name == "avx512.kor.w") { 1894341825Sdim Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 1895341825Sdim Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 1896341825Sdim Rep = Builder.CreateOr(LHS, RHS); 1897341825Sdim Rep = Builder.CreateBitCast(Rep, CI->getType()); 1898341825Sdim } else if (IsX86 && Name == "avx512.kxor.w") { 1899341825Sdim Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 1900341825Sdim Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 1901341825Sdim Rep = Builder.CreateXor(LHS, RHS); 1902341825Sdim Rep = Builder.CreateBitCast(Rep, CI->getType()); 1903341825Sdim } else if (IsX86 && Name == "avx512.kxnor.w") { 1904341825Sdim Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 1905341825Sdim Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 1906341825Sdim LHS = Builder.CreateNot(LHS); 1907341825Sdim Rep = Builder.CreateXor(LHS, RHS); 1908341825Sdim Rep = Builder.CreateBitCast(Rep, CI->getType()); 1909341825Sdim } else if (IsX86 && Name == "avx512.knot.w") { 1910341825Sdim Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 1911341825Sdim Rep = Builder.CreateNot(Rep); 1912341825Sdim Rep = Builder.CreateBitCast(Rep, CI->getType()); 1913341825Sdim } else if (IsX86 && 1914341825Sdim (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) { 1915341825Sdim Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 1916341825Sdim Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 1917341825Sdim Rep = Builder.CreateOr(LHS, RHS); 1918341825Sdim Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty()); 1919341825Sdim Value *C; 1920341825Sdim if (Name[14] == 'c') 1921341825Sdim C = ConstantInt::getAllOnesValue(Builder.getInt16Ty()); 1922341825Sdim else 1923341825Sdim C = ConstantInt::getNullValue(Builder.getInt16Ty()); 1924341825Sdim Rep = Builder.CreateICmpEQ(Rep, C); 1925341825Sdim Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty()); 1926344779Sdim } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" || 1927344779Sdim Name == "sse.sub.ss" || Name == "sse2.sub.sd" || 1928344779Sdim Name == "sse.mul.ss" || Name == "sse2.mul.sd" || 1929344779Sdim Name == "sse.div.ss" || Name == "sse2.div.sd")) { 1930314564Sdim Type *I32Ty = Type::getInt32Ty(C); 1931314564Sdim Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), 1932314564Sdim ConstantInt::get(I32Ty, 0)); 1933314564Sdim Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), 1934314564Sdim ConstantInt::get(I32Ty, 0)); 1935344779Sdim Value *EltOp; 1936344779Sdim if (Name.contains(".add.")) 1937344779Sdim EltOp = Builder.CreateFAdd(Elt0, Elt1); 1938344779Sdim else if (Name.contains(".sub.")) 1939344779Sdim EltOp = Builder.CreateFSub(Elt0, Elt1); 1940344779Sdim else if (Name.contains(".mul.")) 1941344779Sdim EltOp = Builder.CreateFMul(Elt0, Elt1); 1942344779Sdim else 1943344779Sdim EltOp = Builder.CreateFDiv(Elt0, Elt1); 1944344779Sdim Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp, 1945314564Sdim ConstantInt::get(I32Ty, 0)); 1946321369Sdim } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) { 1947321369Sdim // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt." 1948321369Sdim bool CmpEq = Name[16] == 'e'; 1949321369Sdim Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true); 1950344779Sdim } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) { 1951344779Sdim Type *OpTy = CI->getArgOperand(0)->getType(); 1952344779Sdim unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); 1953344779Sdim Intrinsic::ID IID; 1954344779Sdim switch (VecWidth) { 1955344779Sdim default: llvm_unreachable("Unexpected intrinsic"); 1956344779Sdim case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break; 1957344779Sdim case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break; 1958344779Sdim case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break; 1959344779Sdim } 1960344779Sdim 1961344779Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 1962344779Sdim { CI->getOperand(0), CI->getArgOperand(1) }); 1963344779Sdim Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2)); 1964341825Sdim } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) { 1965341825Sdim Type *OpTy = CI->getArgOperand(0)->getType(); 1966341825Sdim unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); 1967341825Sdim unsigned EltWidth = OpTy->getScalarSizeInBits(); 1968341825Sdim Intrinsic::ID IID; 1969341825Sdim if (VecWidth == 128 && EltWidth == 32) 1970341825Sdim IID = Intrinsic::x86_avx512_fpclass_ps_128; 1971341825Sdim else if (VecWidth == 256 && EltWidth == 32) 1972341825Sdim IID = Intrinsic::x86_avx512_fpclass_ps_256; 1973341825Sdim else if (VecWidth == 512 && EltWidth == 32) 1974341825Sdim IID = Intrinsic::x86_avx512_fpclass_ps_512; 1975341825Sdim else if (VecWidth == 128 && EltWidth == 64) 1976341825Sdim IID = Intrinsic::x86_avx512_fpclass_pd_128; 1977341825Sdim else if (VecWidth == 256 && EltWidth == 64) 1978341825Sdim IID = Intrinsic::x86_avx512_fpclass_pd_256; 1979341825Sdim else if (VecWidth == 512 && EltWidth == 64) 1980341825Sdim IID = Intrinsic::x86_avx512_fpclass_pd_512; 1981341825Sdim else 1982341825Sdim llvm_unreachable("Unexpected intrinsic"); 1983341825Sdim 1984341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 1985341825Sdim { CI->getOperand(0), CI->getArgOperand(1) }); 1986341825Sdim Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2)); 1987341825Sdim } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) { 1988341825Sdim Type *OpTy = CI->getArgOperand(0)->getType(); 1989341825Sdim unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); 1990341825Sdim unsigned EltWidth = OpTy->getScalarSizeInBits(); 1991341825Sdim Intrinsic::ID IID; 1992341825Sdim if (VecWidth == 128 && EltWidth == 32) 1993341825Sdim IID = Intrinsic::x86_avx512_cmp_ps_128; 1994341825Sdim else if (VecWidth == 256 && EltWidth == 32) 1995341825Sdim IID = Intrinsic::x86_avx512_cmp_ps_256; 1996341825Sdim else if (VecWidth == 512 && EltWidth == 32) 1997341825Sdim IID = Intrinsic::x86_avx512_cmp_ps_512; 1998341825Sdim else if (VecWidth == 128 && EltWidth == 64) 1999341825Sdim IID = Intrinsic::x86_avx512_cmp_pd_128; 2000341825Sdim else if (VecWidth == 256 && EltWidth == 64) 2001341825Sdim IID = Intrinsic::x86_avx512_cmp_pd_256; 2002341825Sdim else if (VecWidth == 512 && EltWidth == 64) 2003341825Sdim IID = Intrinsic::x86_avx512_cmp_pd_512; 2004341825Sdim else 2005341825Sdim llvm_unreachable("Unexpected intrinsic"); 2006341825Sdim 2007341825Sdim SmallVector<Value *, 4> Args; 2008341825Sdim Args.push_back(CI->getArgOperand(0)); 2009341825Sdim Args.push_back(CI->getArgOperand(1)); 2010341825Sdim Args.push_back(CI->getArgOperand(2)); 2011341825Sdim if (CI->getNumArgOperands() == 5) 2012341825Sdim Args.push_back(CI->getArgOperand(4)); 2013341825Sdim 2014341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 2015341825Sdim Args); 2016341825Sdim Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3)); 2017341825Sdim } else if (IsX86 && Name.startswith("avx512.mask.cmp.") && 2018341825Sdim Name[16] != 'p') { 2019341825Sdim // Integer compare intrinsics. 2020321369Sdim unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2021321369Sdim Rep = upgradeMaskedCompare(Builder, *CI, Imm, true); 2022341825Sdim } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) { 2023321369Sdim unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2024321369Sdim Rep = upgradeMaskedCompare(Builder, *CI, Imm, false); 2025341825Sdim } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") || 2026341825Sdim Name.startswith("avx512.cvtw2mask.") || 2027341825Sdim Name.startswith("avx512.cvtd2mask.") || 2028341825Sdim Name.startswith("avx512.cvtq2mask."))) { 2029341825Sdim Value *Op = CI->getArgOperand(0); 2030341825Sdim Value *Zero = llvm::Constant::getNullValue(Op->getType()); 2031341825Sdim Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero); 2032341825Sdim Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr); 2033327952Sdim } else if(IsX86 && (Name == "ssse3.pabs.b.128" || 2034327952Sdim Name == "ssse3.pabs.w.128" || 2035327952Sdim Name == "ssse3.pabs.d.128" || 2036327952Sdim Name.startswith("avx2.pabs") || 2037327952Sdim Name.startswith("avx512.mask.pabs"))) { 2038327952Sdim Rep = upgradeAbs(Builder, *CI); 2039314564Sdim } else if (IsX86 && (Name == "sse41.pmaxsb" || 2040314564Sdim Name == "sse2.pmaxs.w" || 2041314564Sdim Name == "sse41.pmaxsd" || 2042314564Sdim Name.startswith("avx2.pmaxs") || 2043314564Sdim Name.startswith("avx512.mask.pmaxs"))) { 2044314564Sdim Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT); 2045314564Sdim } else if (IsX86 && (Name == "sse2.pmaxu.b" || 2046314564Sdim Name == "sse41.pmaxuw" || 2047314564Sdim Name == "sse41.pmaxud" || 2048314564Sdim Name.startswith("avx2.pmaxu") || 2049314564Sdim Name.startswith("avx512.mask.pmaxu"))) { 2050314564Sdim Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT); 2051314564Sdim } else if (IsX86 && (Name == "sse41.pminsb" || 2052314564Sdim Name == "sse2.pmins.w" || 2053314564Sdim Name == "sse41.pminsd" || 2054314564Sdim Name.startswith("avx2.pmins") || 2055314564Sdim Name.startswith("avx512.mask.pmins"))) { 2056314564Sdim Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT); 2057314564Sdim } else if (IsX86 && (Name == "sse2.pminu.b" || 2058314564Sdim Name == "sse41.pminuw" || 2059314564Sdim Name == "sse41.pminud" || 2060314564Sdim Name.startswith("avx2.pminu") || 2061314564Sdim Name.startswith("avx512.mask.pminu"))) { 2062314564Sdim Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT); 2063341825Sdim } else if (IsX86 && (Name == "sse2.pmulu.dq" || 2064341825Sdim Name == "avx2.pmulu.dq" || 2065341825Sdim Name == "avx512.pmulu.dq.512" || 2066341825Sdim Name.startswith("avx512.mask.pmulu.dq."))) { 2067341825Sdim Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false); 2068341825Sdim } else if (IsX86 && (Name == "sse41.pmuldq" || 2069341825Sdim Name == "avx2.pmul.dq" || 2070341825Sdim Name == "avx512.pmul.dq.512" || 2071341825Sdim Name.startswith("avx512.mask.pmul.dq."))) { 2072341825Sdim Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true); 2073341825Sdim } else if (IsX86 && (Name == "sse.cvtsi2ss" || 2074341825Sdim Name == "sse2.cvtsi2sd" || 2075341825Sdim Name == "sse.cvtsi642ss" || 2076341825Sdim Name == "sse2.cvtsi642sd")) { 2077341825Sdim Rep = Builder.CreateSIToFP(CI->getArgOperand(1), 2078341825Sdim CI->getType()->getVectorElementType()); 2079341825Sdim Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); 2080341825Sdim } else if (IsX86 && Name == "avx512.cvtusi2sd") { 2081341825Sdim Rep = Builder.CreateUIToFP(CI->getArgOperand(1), 2082341825Sdim CI->getType()->getVectorElementType()); 2083341825Sdim Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); 2084341825Sdim } else if (IsX86 && Name == "sse2.cvtss2sd") { 2085341825Sdim Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0); 2086341825Sdim Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType()); 2087341825Sdim Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); 2088314564Sdim } else if (IsX86 && (Name == "sse2.cvtdq2pd" || 2089341825Sdim Name == "sse2.cvtdq2ps" || 2090341825Sdim Name == "avx.cvtdq2.pd.256" || 2091341825Sdim Name == "avx.cvtdq2.ps.256" || 2092341825Sdim Name.startswith("avx512.mask.cvtdq2pd.") || 2093341825Sdim Name.startswith("avx512.mask.cvtudq2pd.") || 2094353358Sdim Name.startswith("avx512.mask.cvtdq2ps.") || 2095353358Sdim Name.startswith("avx512.mask.cvtudq2ps.") || 2096353358Sdim Name.startswith("avx512.mask.cvtqq2pd.") || 2097353358Sdim Name.startswith("avx512.mask.cvtuqq2pd.") || 2098353358Sdim Name == "avx512.mask.cvtqq2ps.256" || 2099353358Sdim Name == "avx512.mask.cvtqq2ps.512" || 2100353358Sdim Name == "avx512.mask.cvtuqq2ps.256" || 2101353358Sdim Name == "avx512.mask.cvtuqq2ps.512" || 2102314564Sdim Name == "sse2.cvtps2pd" || 2103314564Sdim Name == "avx.cvt.ps2.pd.256" || 2104341825Sdim Name == "avx512.mask.cvtps2pd.128" || 2105341825Sdim Name == "avx512.mask.cvtps2pd.256")) { 2106341825Sdim Type *DstTy = CI->getType(); 2107314564Sdim Rep = CI->getArgOperand(0); 2108353358Sdim Type *SrcTy = Rep->getType(); 2109314564Sdim 2110341825Sdim unsigned NumDstElts = DstTy->getVectorNumElements(); 2111353358Sdim if (NumDstElts < SrcTy->getVectorNumElements()) { 2112314564Sdim assert(NumDstElts == 2 && "Unexpected vector size"); 2113314564Sdim uint32_t ShuffleMask[2] = { 0, 1 }; 2114341825Sdim Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask); 2115314564Sdim } 2116314564Sdim 2117353358Sdim bool IsPS2PD = SrcTy->getVectorElementType()->isFloatTy(); 2118341825Sdim bool IsUnsigned = (StringRef::npos != Name.find("cvtu")); 2119341825Sdim if (IsPS2PD) 2120341825Sdim Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); 2121353358Sdim else if (CI->getNumArgOperands() == 4 && 2122353358Sdim (!isa<ConstantInt>(CI->getArgOperand(3)) || 2123353358Sdim cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) { 2124353358Sdim Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round 2125353358Sdim : Intrinsic::x86_avx512_sitofp_round; 2126353358Sdim Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, 2127353358Sdim { DstTy, SrcTy }); 2128353358Sdim Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) }); 2129353358Sdim } else { 2130353358Sdim Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt") 2131353358Sdim : Builder.CreateSIToFP(Rep, DstTy, "cvt"); 2132353358Sdim } 2133314564Sdim 2134353358Sdim if (CI->getNumArgOperands() >= 3) 2135314564Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 2136314564Sdim CI->getArgOperand(1)); 2137314564Sdim } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) { 2138309124Sdim Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), 2139309124Sdim CI->getArgOperand(1), CI->getArgOperand(2), 2140309124Sdim /*Aligned*/false); 2141314564Sdim } else if (IsX86 && (Name.startswith("avx512.mask.load."))) { 2142309124Sdim Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), 2143309124Sdim CI->getArgOperand(1),CI->getArgOperand(2), 2144309124Sdim /*Aligned*/true); 2145341825Sdim } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) { 2146341825Sdim Type *ResultTy = CI->getType(); 2147341825Sdim Type *PtrTy = ResultTy->getVectorElementType(); 2148341825Sdim 2149341825Sdim // Cast the pointer to element type. 2150341825Sdim Value *Ptr = Builder.CreateBitCast(CI->getOperand(0), 2151341825Sdim llvm::PointerType::getUnqual(PtrTy)); 2152341825Sdim 2153341825Sdim Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2), 2154341825Sdim ResultTy->getVectorNumElements()); 2155341825Sdim 2156341825Sdim Function *ELd = Intrinsic::getDeclaration(F->getParent(), 2157341825Sdim Intrinsic::masked_expandload, 2158341825Sdim ResultTy); 2159341825Sdim Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) }); 2160341825Sdim } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) { 2161341825Sdim Type *ResultTy = CI->getArgOperand(1)->getType(); 2162341825Sdim Type *PtrTy = ResultTy->getVectorElementType(); 2163341825Sdim 2164341825Sdim // Cast the pointer to element type. 2165341825Sdim Value *Ptr = Builder.CreateBitCast(CI->getOperand(0), 2166341825Sdim llvm::PointerType::getUnqual(PtrTy)); 2167341825Sdim 2168341825Sdim Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2), 2169341825Sdim ResultTy->getVectorNumElements()); 2170341825Sdim 2171341825Sdim Function *CSt = Intrinsic::getDeclaration(F->getParent(), 2172341825Sdim Intrinsic::masked_compressstore, 2173341825Sdim ResultTy); 2174341825Sdim Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec }); 2175353358Sdim } else if (IsX86 && (Name.startswith("avx512.mask.compress.") || 2176353358Sdim Name.startswith("avx512.mask.expand."))) { 2177353358Sdim Type *ResultTy = CI->getType(); 2178353358Sdim 2179353358Sdim Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2), 2180353358Sdim ResultTy->getVectorNumElements()); 2181353358Sdim 2182353358Sdim bool IsCompress = Name[12] == 'c'; 2183353358Sdim Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress 2184353358Sdim : Intrinsic::x86_avx512_mask_expand; 2185353358Sdim Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy); 2186353358Sdim Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1), 2187353358Sdim MaskVec }); 2188309124Sdim } else if (IsX86 && Name.startswith("xop.vpcom")) { 2189353358Sdim bool IsSigned; 2190353358Sdim if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") || 2191353358Sdim Name.endswith("uq")) 2192353358Sdim IsSigned = false; 2193353358Sdim else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") || 2194353358Sdim Name.endswith("q")) 2195353358Sdim IsSigned = true; 2196249259Sdim else 2197249259Sdim llvm_unreachable("Unknown suffix"); 2198249259Sdim 2199249259Sdim unsigned Imm; 2200353358Sdim if (CI->getNumArgOperands() == 3) { 2201353358Sdim Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2202353358Sdim } else { 2203353358Sdim Name = Name.substr(9); // strip off "xop.vpcom" 2204353358Sdim if (Name.startswith("lt")) 2205353358Sdim Imm = 0; 2206353358Sdim else if (Name.startswith("le")) 2207353358Sdim Imm = 1; 2208353358Sdim else if (Name.startswith("gt")) 2209353358Sdim Imm = 2; 2210353358Sdim else if (Name.startswith("ge")) 2211353358Sdim Imm = 3; 2212353358Sdim else if (Name.startswith("eq")) 2213353358Sdim Imm = 4; 2214353358Sdim else if (Name.startswith("ne")) 2215353358Sdim Imm = 5; 2216353358Sdim else if (Name.startswith("false")) 2217353358Sdim Imm = 6; 2218353358Sdim else if (Name.startswith("true")) 2219353358Sdim Imm = 7; 2220353358Sdim else 2221353358Sdim llvm_unreachable("Unknown condition"); 2222353358Sdim } 2223249259Sdim 2224353358Sdim Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned); 2225321369Sdim } else if (IsX86 && Name.startswith("xop.vpcmov")) { 2226296417Sdim Value *Sel = CI->getArgOperand(2); 2227321369Sdim Value *NotSel = Builder.CreateNot(Sel); 2228321369Sdim Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel); 2229321369Sdim Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel); 2230296417Sdim Rep = Builder.CreateOr(Sel0, Sel1); 2231344779Sdim } else if (IsX86 && (Name.startswith("xop.vprot") || 2232344779Sdim Name.startswith("avx512.prol") || 2233344779Sdim Name.startswith("avx512.mask.prol"))) { 2234344779Sdim Rep = upgradeX86Rotate(Builder, *CI, false); 2235344779Sdim } else if (IsX86 && (Name.startswith("avx512.pror") || 2236344779Sdim Name.startswith("avx512.mask.pror"))) { 2237344779Sdim Rep = upgradeX86Rotate(Builder, *CI, true); 2238344779Sdim } else if (IsX86 && (Name.startswith("avx512.vpshld.") || 2239344779Sdim Name.startswith("avx512.mask.vpshld") || 2240344779Sdim Name.startswith("avx512.maskz.vpshld"))) { 2241344779Sdim bool ZeroMask = Name[11] == 'z'; 2242344779Sdim Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask); 2243344779Sdim } else if (IsX86 && (Name.startswith("avx512.vpshrd.") || 2244344779Sdim Name.startswith("avx512.mask.vpshrd") || 2245344779Sdim Name.startswith("avx512.maskz.vpshrd"))) { 2246344779Sdim bool ZeroMask = Name[11] == 'z'; 2247344779Sdim Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask); 2248309124Sdim } else if (IsX86 && Name == "sse42.crc32.64.8") { 2249261991Sdim Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), 2250261991Sdim Intrinsic::x86_sse42_crc32_32_8); 2251261991Sdim Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); 2252288943Sdim Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); 2253261991Sdim Rep = Builder.CreateZExt(Rep, CI->getType(), ""); 2254341825Sdim } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") || 2255341825Sdim Name.startswith("avx512.vbroadcast.s"))) { 2256276479Sdim // Replace broadcasts with a series of insertelements. 2257276479Sdim Type *VecTy = CI->getType(); 2258276479Sdim Type *EltTy = VecTy->getVectorElementType(); 2259276479Sdim unsigned EltNum = VecTy->getVectorNumElements(); 2260276479Sdim Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), 2261276479Sdim EltTy->getPointerTo()); 2262288943Sdim Value *Load = Builder.CreateLoad(EltTy, Cast); 2263276479Sdim Type *I32Ty = Type::getInt32Ty(C); 2264276479Sdim Rep = UndefValue::get(VecTy); 2265276479Sdim for (unsigned I = 0; I < EltNum; ++I) 2266276479Sdim Rep = Builder.CreateInsertElement(Rep, Load, 2267276479Sdim ConstantInt::get(I32Ty, I)); 2268309124Sdim } else if (IsX86 && (Name.startswith("sse41.pmovsx") || 2269309124Sdim Name.startswith("sse41.pmovzx") || 2270309124Sdim Name.startswith("avx2.pmovsx") || 2271314564Sdim Name.startswith("avx2.pmovzx") || 2272314564Sdim Name.startswith("avx512.mask.pmovsx") || 2273314564Sdim Name.startswith("avx512.mask.pmovzx"))) { 2274296417Sdim VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType()); 2275296417Sdim VectorType *DstTy = cast<VectorType>(CI->getType()); 2276296417Sdim unsigned NumDstElts = DstTy->getNumElements(); 2277296417Sdim 2278309124Sdim // Extract a subvector of the first NumDstElts lanes and sign/zero extend. 2279309124Sdim SmallVector<uint32_t, 8> ShuffleMask(NumDstElts); 2280309124Sdim for (unsigned i = 0; i != NumDstElts; ++i) 2281309124Sdim ShuffleMask[i] = i; 2282296417Sdim 2283296417Sdim Value *SV = Builder.CreateShuffleVector( 2284296417Sdim CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask); 2285309124Sdim 2286309124Sdim bool DoSext = (StringRef::npos != Name.find("pmovsx")); 2287309124Sdim Rep = DoSext ? Builder.CreateSExt(SV, DstTy) 2288309124Sdim : Builder.CreateZExt(SV, DstTy); 2289314564Sdim // If there are 3 arguments, it's a masked intrinsic so we need a select. 2290314564Sdim if (CI->getNumArgOperands() == 3) 2291314564Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 2292314564Sdim CI->getArgOperand(1)); 2293353358Sdim } else if (Name == "avx512.mask.pmov.qd.256" || 2294353358Sdim Name == "avx512.mask.pmov.qd.512" || 2295353358Sdim Name == "avx512.mask.pmov.wb.256" || 2296353358Sdim Name == "avx512.mask.pmov.wb.512") { 2297353358Sdim Type *Ty = CI->getArgOperand(1)->getType(); 2298353358Sdim Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty); 2299353358Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 2300353358Sdim CI->getArgOperand(1)); 2301314564Sdim } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") || 2302314564Sdim Name == "avx2.vbroadcasti128")) { 2303314564Sdim // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle. 2304314564Sdim Type *EltTy = CI->getType()->getVectorElementType(); 2305314564Sdim unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits(); 2306314564Sdim Type *VT = VectorType::get(EltTy, NumSrcElts); 2307288943Sdim Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), 2308288943Sdim PointerType::getUnqual(VT)); 2309353358Sdim Value *Load = Builder.CreateAlignedLoad(VT, Op, 1); 2310314564Sdim if (NumSrcElts == 2) 2311314564Sdim Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 2312314564Sdim { 0, 1, 0, 1 }); 2313314564Sdim else 2314314564Sdim Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 2315314564Sdim { 0, 1, 2, 3, 0, 1, 2, 3 }); 2316327952Sdim } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") || 2317327952Sdim Name.startswith("avx512.mask.shuf.f"))) { 2318327952Sdim unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2319327952Sdim Type *VT = CI->getType(); 2320327952Sdim unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128; 2321327952Sdim unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits(); 2322327952Sdim unsigned ControlBitsMask = NumLanes - 1; 2323327952Sdim unsigned NumControlBits = NumLanes / 2; 2324327952Sdim SmallVector<uint32_t, 8> ShuffleMask(0); 2325327952Sdim 2326327952Sdim for (unsigned l = 0; l != NumLanes; ++l) { 2327327952Sdim unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask; 2328327952Sdim // We actually need the other source. 2329327952Sdim if (l >= NumLanes / 2) 2330327952Sdim LaneMask += NumLanes; 2331327952Sdim for (unsigned i = 0; i != NumElementsInLane; ++i) 2332327952Sdim ShuffleMask.push_back(LaneMask * NumElementsInLane + i); 2333327952Sdim } 2334327952Sdim Rep = Builder.CreateShuffleVector(CI->getArgOperand(0), 2335327952Sdim CI->getArgOperand(1), ShuffleMask); 2336327952Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, 2337327952Sdim CI->getArgOperand(3)); 2338327952Sdim }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") || 2339327952Sdim Name.startswith("avx512.mask.broadcasti"))) { 2340327952Sdim unsigned NumSrcElts = 2341327952Sdim CI->getArgOperand(0)->getType()->getVectorNumElements(); 2342327952Sdim unsigned NumDstElts = CI->getType()->getVectorNumElements(); 2343327952Sdim 2344327952Sdim SmallVector<uint32_t, 8> ShuffleMask(NumDstElts); 2345327952Sdim for (unsigned i = 0; i != NumDstElts; ++i) 2346327952Sdim ShuffleMask[i] = i % NumSrcElts; 2347327952Sdim 2348327952Sdim Rep = Builder.CreateShuffleVector(CI->getArgOperand(0), 2349327952Sdim CI->getArgOperand(0), 2350327952Sdim ShuffleMask); 2351327952Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 2352327952Sdim CI->getArgOperand(1)); 2353309124Sdim } else if (IsX86 && (Name.startswith("avx2.pbroadcast") || 2354309124Sdim Name.startswith("avx2.vbroadcast") || 2355309124Sdim Name.startswith("avx512.pbroadcast") || 2356309124Sdim Name.startswith("avx512.mask.broadcast.s"))) { 2357296417Sdim // Replace vp?broadcasts with a vector shuffle. 2358296417Sdim Value *Op = CI->getArgOperand(0); 2359296417Sdim unsigned NumElts = CI->getType()->getVectorNumElements(); 2360296417Sdim Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts); 2361296417Sdim Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()), 2362296417Sdim Constant::getNullValue(MaskTy)); 2363309124Sdim 2364309124Sdim if (CI->getNumArgOperands() == 3) 2365309124Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 2366309124Sdim CI->getArgOperand(1)); 2367344779Sdim } else if (IsX86 && (Name.startswith("sse2.padds.") || 2368344779Sdim Name.startswith("sse2.psubs.") || 2369344779Sdim Name.startswith("avx2.padds.") || 2370344779Sdim Name.startswith("avx2.psubs.") || 2371344779Sdim Name.startswith("avx512.padds.") || 2372344779Sdim Name.startswith("avx512.psubs.") || 2373344779Sdim Name.startswith("avx512.mask.padds.") || 2374344779Sdim Name.startswith("avx512.mask.psubs."))) { 2375344779Sdim bool IsAdd = Name.contains(".padds"); 2376344779Sdim Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd); 2377344779Sdim } else if (IsX86 && (Name.startswith("sse2.paddus.") || 2378344779Sdim Name.startswith("sse2.psubus.") || 2379344779Sdim Name.startswith("avx2.paddus.") || 2380344779Sdim Name.startswith("avx2.psubus.") || 2381344779Sdim Name.startswith("avx512.mask.paddus.") || 2382344779Sdim Name.startswith("avx512.mask.psubus."))) { 2383344779Sdim bool IsAdd = Name.contains(".paddus"); 2384344779Sdim Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd); 2385309124Sdim } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) { 2386314564Sdim Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), 2387314564Sdim CI->getArgOperand(1), 2388314564Sdim CI->getArgOperand(2), 2389314564Sdim CI->getArgOperand(3), 2390314564Sdim CI->getArgOperand(4), 2391314564Sdim false); 2392314564Sdim } else if (IsX86 && Name.startswith("avx512.mask.valign.")) { 2393314564Sdim Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), 2394314564Sdim CI->getArgOperand(1), 2395314564Sdim CI->getArgOperand(2), 2396314564Sdim CI->getArgOperand(3), 2397314564Sdim CI->getArgOperand(4), 2398314564Sdim true); 2399309124Sdim } else if (IsX86 && (Name == "sse2.psll.dq" || 2400309124Sdim Name == "avx2.psll.dq")) { 2401309124Sdim // 128/256-bit shift left specified in bits. 2402288943Sdim unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2403309124Sdim Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), 2404288943Sdim Shift / 8); // Shift is in bits. 2405309124Sdim } else if (IsX86 && (Name == "sse2.psrl.dq" || 2406309124Sdim Name == "avx2.psrl.dq")) { 2407309124Sdim // 128/256-bit shift right specified in bits. 2408288943Sdim unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2409309124Sdim Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), 2410288943Sdim Shift / 8); // Shift is in bits. 2411309124Sdim } else if (IsX86 && (Name == "sse2.psll.dq.bs" || 2412309124Sdim Name == "avx2.psll.dq.bs" || 2413309124Sdim Name == "avx512.psll.dq.512")) { 2414309124Sdim // 128/256/512-bit shift left specified in bytes. 2415288943Sdim unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2416309124Sdim Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift); 2417309124Sdim } else if (IsX86 && (Name == "sse2.psrl.dq.bs" || 2418309124Sdim Name == "avx2.psrl.dq.bs" || 2419309124Sdim Name == "avx512.psrl.dq.512")) { 2420309124Sdim // 128/256/512-bit shift right specified in bytes. 2421288943Sdim unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2422309124Sdim Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift); 2423309124Sdim } else if (IsX86 && (Name == "sse41.pblendw" || 2424309124Sdim Name.startswith("sse41.blendp") || 2425309124Sdim Name.startswith("avx.blend.p") || 2426309124Sdim Name == "avx2.pblendw" || 2427309124Sdim Name.startswith("avx2.pblendd."))) { 2428288943Sdim Value *Op0 = CI->getArgOperand(0); 2429288943Sdim Value *Op1 = CI->getArgOperand(1); 2430288943Sdim unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2431288943Sdim VectorType *VecTy = cast<VectorType>(CI->getType()); 2432288943Sdim unsigned NumElts = VecTy->getNumElements(); 2433288943Sdim 2434309124Sdim SmallVector<uint32_t, 16> Idxs(NumElts); 2435309124Sdim for (unsigned i = 0; i != NumElts; ++i) 2436309124Sdim Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i; 2437288943Sdim 2438309124Sdim Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 2439309124Sdim } else if (IsX86 && (Name.startswith("avx.vinsertf128.") || 2440314564Sdim Name == "avx2.vinserti128" || 2441314564Sdim Name.startswith("avx512.mask.insert"))) { 2442288943Sdim Value *Op0 = CI->getArgOperand(0); 2443288943Sdim Value *Op1 = CI->getArgOperand(1); 2444288943Sdim unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2445314564Sdim unsigned DstNumElts = CI->getType()->getVectorNumElements(); 2446314564Sdim unsigned SrcNumElts = Op1->getType()->getVectorNumElements(); 2447314564Sdim unsigned Scale = DstNumElts / SrcNumElts; 2448296417Sdim 2449288943Sdim // Mask off the high bits of the immediate value; hardware ignores those. 2450314564Sdim Imm = Imm % Scale; 2451296417Sdim 2452314564Sdim // Extend the second operand into a vector the size of the destination. 2453288943Sdim Value *UndefV = UndefValue::get(Op1->getType()); 2454314564Sdim SmallVector<uint32_t, 8> Idxs(DstNumElts); 2455314564Sdim for (unsigned i = 0; i != SrcNumElts; ++i) 2456309124Sdim Idxs[i] = i; 2457314564Sdim for (unsigned i = SrcNumElts; i != DstNumElts; ++i) 2458314564Sdim Idxs[i] = SrcNumElts; 2459309124Sdim Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs); 2460288943Sdim 2461288943Sdim // Insert the second operand into the first operand. 2462288943Sdim 2463288943Sdim // Note that there is no guarantee that instruction lowering will actually 2464288943Sdim // produce a vinsertf128 instruction for the created shuffles. In 2465288943Sdim // particular, the 0 immediate case involves no lane changes, so it can 2466288943Sdim // be handled as a blend. 2467288943Sdim 2468288943Sdim // Example of shuffle mask for 32-bit elements: 2469288943Sdim // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 2470288943Sdim // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > 2471288943Sdim 2472314564Sdim // First fill with identify mask. 2473314564Sdim for (unsigned i = 0; i != DstNumElts; ++i) 2474314564Sdim Idxs[i] = i; 2475314564Sdim // Then replace the elements where we need to insert. 2476314564Sdim for (unsigned i = 0; i != SrcNumElts; ++i) 2477314564Sdim Idxs[i + Imm * SrcNumElts] = i + DstNumElts; 2478309124Sdim Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs); 2479314564Sdim 2480314564Sdim // If the intrinsic has a mask operand, handle that. 2481314564Sdim if (CI->getNumArgOperands() == 5) 2482314564Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, 2483314564Sdim CI->getArgOperand(3)); 2484309124Sdim } else if (IsX86 && (Name.startswith("avx.vextractf128.") || 2485314564Sdim Name == "avx2.vextracti128" || 2486314564Sdim Name.startswith("avx512.mask.vextract"))) { 2487288943Sdim Value *Op0 = CI->getArgOperand(0); 2488288943Sdim unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2489314564Sdim unsigned DstNumElts = CI->getType()->getVectorNumElements(); 2490314564Sdim unsigned SrcNumElts = Op0->getType()->getVectorNumElements(); 2491314564Sdim unsigned Scale = SrcNumElts / DstNumElts; 2492296417Sdim 2493288943Sdim // Mask off the high bits of the immediate value; hardware ignores those. 2494314564Sdim Imm = Imm % Scale; 2495288943Sdim 2496314564Sdim // Get indexes for the subvector of the input vector. 2497314564Sdim SmallVector<uint32_t, 8> Idxs(DstNumElts); 2498314564Sdim for (unsigned i = 0; i != DstNumElts; ++i) { 2499314564Sdim Idxs[i] = i + (Imm * DstNumElts); 2500288943Sdim } 2501314564Sdim Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 2502288943Sdim 2503314564Sdim // If the intrinsic has a mask operand, handle that. 2504314564Sdim if (CI->getNumArgOperands() == 4) 2505314564Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2506314564Sdim CI->getArgOperand(2)); 2507309124Sdim } else if (!IsX86 && Name == "stackprotectorcheck") { 2508309124Sdim Rep = nullptr; 2509309124Sdim } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") || 2510309124Sdim Name.startswith("avx512.mask.perm.di."))) { 2511309124Sdim Value *Op0 = CI->getArgOperand(0); 2512309124Sdim unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2513309124Sdim VectorType *VecTy = cast<VectorType>(CI->getType()); 2514309124Sdim unsigned NumElts = VecTy->getNumElements(); 2515249259Sdim 2516309124Sdim SmallVector<uint32_t, 8> Idxs(NumElts); 2517309124Sdim for (unsigned i = 0; i != NumElts; ++i) 2518309124Sdim Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3); 2519249259Sdim 2520309124Sdim Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 2521249259Sdim 2522309124Sdim if (CI->getNumArgOperands() == 4) 2523309124Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2524309124Sdim CI->getArgOperand(2)); 2525327952Sdim } else if (IsX86 && (Name.startswith("avx.vperm2f128.") || 2526327952Sdim Name == "avx2.vperm2i128")) { 2527327952Sdim // The immediate permute control byte looks like this: 2528327952Sdim // [1:0] - select 128 bits from sources for low half of destination 2529327952Sdim // [2] - ignore 2530327952Sdim // [3] - zero low half of destination 2531327952Sdim // [5:4] - select 128 bits from sources for high half of destination 2532327952Sdim // [6] - ignore 2533327952Sdim // [7] - zero high half of destination 2534327952Sdim 2535327952Sdim uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2536327952Sdim 2537327952Sdim unsigned NumElts = CI->getType()->getVectorNumElements(); 2538327952Sdim unsigned HalfSize = NumElts / 2; 2539327952Sdim SmallVector<uint32_t, 8> ShuffleMask(NumElts); 2540327952Sdim 2541327952Sdim // Determine which operand(s) are actually in use for this instruction. 2542327952Sdim Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0); 2543327952Sdim Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0); 2544327952Sdim 2545327952Sdim // If needed, replace operands based on zero mask. 2546327952Sdim V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0; 2547327952Sdim V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1; 2548327952Sdim 2549327952Sdim // Permute low half of result. 2550327952Sdim unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0; 2551327952Sdim for (unsigned i = 0; i < HalfSize; ++i) 2552327952Sdim ShuffleMask[i] = StartIndex + i; 2553327952Sdim 2554327952Sdim // Permute high half of result. 2555327952Sdim StartIndex = (Imm & 0x10) ? HalfSize : 0; 2556327952Sdim for (unsigned i = 0; i < HalfSize; ++i) 2557327952Sdim ShuffleMask[i + HalfSize] = NumElts + StartIndex + i; 2558327952Sdim 2559327952Sdim Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask); 2560327952Sdim 2561309124Sdim } else if (IsX86 && (Name.startswith("avx.vpermil.") || 2562309124Sdim Name == "sse2.pshuf.d" || 2563309124Sdim Name.startswith("avx512.mask.vpermil.p") || 2564309124Sdim Name.startswith("avx512.mask.pshuf.d."))) { 2565309124Sdim Value *Op0 = CI->getArgOperand(0); 2566309124Sdim unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2567309124Sdim VectorType *VecTy = cast<VectorType>(CI->getType()); 2568309124Sdim unsigned NumElts = VecTy->getNumElements(); 2569309124Sdim // Calculate the size of each index in the immediate. 2570309124Sdim unsigned IdxSize = 64 / VecTy->getScalarSizeInBits(); 2571309124Sdim unsigned IdxMask = ((1 << IdxSize) - 1); 2572309124Sdim 2573309124Sdim SmallVector<uint32_t, 8> Idxs(NumElts); 2574309124Sdim // Lookup the bits for this element, wrapping around the immediate every 2575309124Sdim // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need 2576309124Sdim // to offset by the first index of each group. 2577309124Sdim for (unsigned i = 0; i != NumElts; ++i) 2578309124Sdim Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask); 2579309124Sdim 2580309124Sdim Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 2581309124Sdim 2582309124Sdim if (CI->getNumArgOperands() == 4) 2583309124Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2584309124Sdim CI->getArgOperand(2)); 2585309124Sdim } else if (IsX86 && (Name == "sse2.pshufl.w" || 2586309124Sdim Name.startswith("avx512.mask.pshufl.w."))) { 2587309124Sdim Value *Op0 = CI->getArgOperand(0); 2588309124Sdim unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2589309124Sdim unsigned NumElts = CI->getType()->getVectorNumElements(); 2590309124Sdim 2591309124Sdim SmallVector<uint32_t, 16> Idxs(NumElts); 2592309124Sdim for (unsigned l = 0; l != NumElts; l += 8) { 2593309124Sdim for (unsigned i = 0; i != 4; ++i) 2594309124Sdim Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l; 2595309124Sdim for (unsigned i = 4; i != 8; ++i) 2596309124Sdim Idxs[i + l] = i + l; 2597249259Sdim } 2598309124Sdim 2599309124Sdim Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 2600309124Sdim 2601309124Sdim if (CI->getNumArgOperands() == 4) 2602309124Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2603309124Sdim CI->getArgOperand(2)); 2604309124Sdim } else if (IsX86 && (Name == "sse2.pshufh.w" || 2605309124Sdim Name.startswith("avx512.mask.pshufh.w."))) { 2606309124Sdim Value *Op0 = CI->getArgOperand(0); 2607309124Sdim unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2608309124Sdim unsigned NumElts = CI->getType()->getVectorNumElements(); 2609309124Sdim 2610309124Sdim SmallVector<uint32_t, 16> Idxs(NumElts); 2611309124Sdim for (unsigned l = 0; l != NumElts; l += 8) { 2612309124Sdim for (unsigned i = 0; i != 4; ++i) 2613309124Sdim Idxs[i + l] = i + l; 2614309124Sdim for (unsigned i = 0; i != 4; ++i) 2615309124Sdim Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l; 2616309124Sdim } 2617309124Sdim 2618309124Sdim Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 2619309124Sdim 2620309124Sdim if (CI->getNumArgOperands() == 4) 2621309124Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2622309124Sdim CI->getArgOperand(2)); 2623314564Sdim } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) { 2624314564Sdim Value *Op0 = CI->getArgOperand(0); 2625314564Sdim Value *Op1 = CI->getArgOperand(1); 2626314564Sdim unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2627314564Sdim unsigned NumElts = CI->getType()->getVectorNumElements(); 2628314564Sdim 2629314564Sdim unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 2630314564Sdim unsigned HalfLaneElts = NumLaneElts / 2; 2631314564Sdim 2632314564Sdim SmallVector<uint32_t, 16> Idxs(NumElts); 2633314564Sdim for (unsigned i = 0; i != NumElts; ++i) { 2634314564Sdim // Base index is the starting element of the lane. 2635314564Sdim Idxs[i] = i - (i % NumLaneElts); 2636314564Sdim // If we are half way through the lane switch to the other source. 2637314564Sdim if ((i % NumLaneElts) >= HalfLaneElts) 2638314564Sdim Idxs[i] += NumElts; 2639314564Sdim // Now select the specific element. By adding HalfLaneElts bits from 2640314564Sdim // the immediate. Wrapping around the immediate every 8-bits. 2641314564Sdim Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1); 2642314564Sdim } 2643314564Sdim 2644314564Sdim Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 2645314564Sdim 2646314564Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, 2647314564Sdim CI->getArgOperand(3)); 2648309124Sdim } else if (IsX86 && (Name.startswith("avx512.mask.movddup") || 2649309124Sdim Name.startswith("avx512.mask.movshdup") || 2650309124Sdim Name.startswith("avx512.mask.movsldup"))) { 2651309124Sdim Value *Op0 = CI->getArgOperand(0); 2652309124Sdim unsigned NumElts = CI->getType()->getVectorNumElements(); 2653309124Sdim unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 2654309124Sdim 2655309124Sdim unsigned Offset = 0; 2656309124Sdim if (Name.startswith("avx512.mask.movshdup.")) 2657309124Sdim Offset = 1; 2658309124Sdim 2659309124Sdim SmallVector<uint32_t, 16> Idxs(NumElts); 2660309124Sdim for (unsigned l = 0; l != NumElts; l += NumLaneElts) 2661309124Sdim for (unsigned i = 0; i != NumLaneElts; i += 2) { 2662309124Sdim Idxs[i + l + 0] = i + l + Offset; 2663309124Sdim Idxs[i + l + 1] = i + l + Offset; 2664309124Sdim } 2665309124Sdim 2666309124Sdim Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 2667309124Sdim 2668309124Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 2669309124Sdim CI->getArgOperand(1)); 2670309124Sdim } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") || 2671309124Sdim Name.startswith("avx512.mask.unpckl."))) { 2672309124Sdim Value *Op0 = CI->getArgOperand(0); 2673309124Sdim Value *Op1 = CI->getArgOperand(1); 2674309124Sdim int NumElts = CI->getType()->getVectorNumElements(); 2675309124Sdim int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 2676309124Sdim 2677309124Sdim SmallVector<uint32_t, 64> Idxs(NumElts); 2678309124Sdim for (int l = 0; l != NumElts; l += NumLaneElts) 2679309124Sdim for (int i = 0; i != NumLaneElts; ++i) 2680309124Sdim Idxs[i + l] = l + (i / 2) + NumElts * (i % 2); 2681309124Sdim 2682309124Sdim Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 2683309124Sdim 2684309124Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2685309124Sdim CI->getArgOperand(2)); 2686309124Sdim } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") || 2687309124Sdim Name.startswith("avx512.mask.unpckh."))) { 2688309124Sdim Value *Op0 = CI->getArgOperand(0); 2689309124Sdim Value *Op1 = CI->getArgOperand(1); 2690309124Sdim int NumElts = CI->getType()->getVectorNumElements(); 2691309124Sdim int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 2692309124Sdim 2693309124Sdim SmallVector<uint32_t, 64> Idxs(NumElts); 2694309124Sdim for (int l = 0; l != NumElts; l += NumLaneElts) 2695309124Sdim for (int i = 0; i != NumLaneElts; ++i) 2696309124Sdim Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2); 2697309124Sdim 2698309124Sdim Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 2699309124Sdim 2700309124Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2701309124Sdim CI->getArgOperand(2)); 2702344779Sdim } else if (IsX86 && (Name.startswith("avx512.mask.and.") || 2703344779Sdim Name.startswith("avx512.mask.pand."))) { 2704314564Sdim VectorType *FTy = cast<VectorType>(CI->getType()); 2705314564Sdim VectorType *ITy = VectorType::getInteger(FTy); 2706314564Sdim Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 2707314564Sdim Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 2708314564Sdim Rep = Builder.CreateBitCast(Rep, FTy); 2709314564Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2710314564Sdim CI->getArgOperand(2)); 2711344779Sdim } else if (IsX86 && (Name.startswith("avx512.mask.andn.") || 2712344779Sdim Name.startswith("avx512.mask.pandn."))) { 2713314564Sdim VectorType *FTy = cast<VectorType>(CI->getType()); 2714314564Sdim VectorType *ITy = VectorType::getInteger(FTy); 2715314564Sdim Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy)); 2716314564Sdim Rep = Builder.CreateAnd(Rep, 2717314564Sdim Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 2718314564Sdim Rep = Builder.CreateBitCast(Rep, FTy); 2719314564Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2720314564Sdim CI->getArgOperand(2)); 2721344779Sdim } else if (IsX86 && (Name.startswith("avx512.mask.or.") || 2722344779Sdim Name.startswith("avx512.mask.por."))) { 2723314564Sdim VectorType *FTy = cast<VectorType>(CI->getType()); 2724314564Sdim VectorType *ITy = VectorType::getInteger(FTy); 2725314564Sdim Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 2726314564Sdim Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 2727314564Sdim Rep = Builder.CreateBitCast(Rep, FTy); 2728314564Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2729314564Sdim CI->getArgOperand(2)); 2730344779Sdim } else if (IsX86 && (Name.startswith("avx512.mask.xor.") || 2731344779Sdim Name.startswith("avx512.mask.pxor."))) { 2732314564Sdim VectorType *FTy = cast<VectorType>(CI->getType()); 2733314564Sdim VectorType *ITy = VectorType::getInteger(FTy); 2734314564Sdim Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 2735314564Sdim Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 2736314564Sdim Rep = Builder.CreateBitCast(Rep, FTy); 2737314564Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2738314564Sdim CI->getArgOperand(2)); 2739314564Sdim } else if (IsX86 && Name.startswith("avx512.mask.padd.")) { 2740314564Sdim Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1)); 2741314564Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2742314564Sdim CI->getArgOperand(2)); 2743314564Sdim } else if (IsX86 && Name.startswith("avx512.mask.psub.")) { 2744314564Sdim Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1)); 2745314564Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2746314564Sdim CI->getArgOperand(2)); 2747314564Sdim } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) { 2748314564Sdim Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1)); 2749314564Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2750314564Sdim CI->getArgOperand(2)); 2751341825Sdim } else if (IsX86 && Name.startswith("avx512.mask.add.p")) { 2752341825Sdim if (Name.endswith(".512")) { 2753341825Sdim Intrinsic::ID IID; 2754341825Sdim if (Name[17] == 's') 2755341825Sdim IID = Intrinsic::x86_avx512_add_ps_512; 2756341825Sdim else 2757341825Sdim IID = Intrinsic::x86_avx512_add_pd_512; 2758341825Sdim 2759341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 2760341825Sdim { CI->getArgOperand(0), CI->getArgOperand(1), 2761341825Sdim CI->getArgOperand(4) }); 2762341825Sdim } else { 2763341825Sdim Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1)); 2764341825Sdim } 2765314564Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2766314564Sdim CI->getArgOperand(2)); 2767314564Sdim } else if (IsX86 && Name.startswith("avx512.mask.div.p")) { 2768341825Sdim if (Name.endswith(".512")) { 2769341825Sdim Intrinsic::ID IID; 2770341825Sdim if (Name[17] == 's') 2771341825Sdim IID = Intrinsic::x86_avx512_div_ps_512; 2772341825Sdim else 2773341825Sdim IID = Intrinsic::x86_avx512_div_pd_512; 2774341825Sdim 2775341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 2776341825Sdim { CI->getArgOperand(0), CI->getArgOperand(1), 2777341825Sdim CI->getArgOperand(4) }); 2778341825Sdim } else { 2779341825Sdim Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1)); 2780341825Sdim } 2781314564Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2782314564Sdim CI->getArgOperand(2)); 2783314564Sdim } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) { 2784341825Sdim if (Name.endswith(".512")) { 2785341825Sdim Intrinsic::ID IID; 2786341825Sdim if (Name[17] == 's') 2787341825Sdim IID = Intrinsic::x86_avx512_mul_ps_512; 2788341825Sdim else 2789341825Sdim IID = Intrinsic::x86_avx512_mul_pd_512; 2790341825Sdim 2791341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 2792341825Sdim { CI->getArgOperand(0), CI->getArgOperand(1), 2793341825Sdim CI->getArgOperand(4) }); 2794341825Sdim } else { 2795341825Sdim Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1)); 2796341825Sdim } 2797314564Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2798314564Sdim CI->getArgOperand(2)); 2799314564Sdim } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) { 2800341825Sdim if (Name.endswith(".512")) { 2801341825Sdim Intrinsic::ID IID; 2802341825Sdim if (Name[17] == 's') 2803341825Sdim IID = Intrinsic::x86_avx512_sub_ps_512; 2804341825Sdim else 2805341825Sdim IID = Intrinsic::x86_avx512_sub_pd_512; 2806321369Sdim 2807341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 2808341825Sdim { CI->getArgOperand(0), CI->getArgOperand(1), 2809341825Sdim CI->getArgOperand(4) }); 2810341825Sdim } else { 2811341825Sdim Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1)); 2812341825Sdim } 2813321369Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2814321369Sdim CI->getArgOperand(2)); 2815344779Sdim } else if (IsX86 && (Name.startswith("avx512.mask.max.p") || 2816344779Sdim Name.startswith("avx512.mask.min.p")) && 2817341825Sdim Name.drop_front(18) == ".512") { 2818344779Sdim bool IsDouble = Name[17] == 'd'; 2819344779Sdim bool IsMin = Name[13] == 'i'; 2820344779Sdim static const Intrinsic::ID MinMaxTbl[2][2] = { 2821344779Sdim { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 }, 2822344779Sdim { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 } 2823344779Sdim }; 2824344779Sdim Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble]; 2825314564Sdim 2826314564Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 2827341825Sdim { CI->getArgOperand(0), CI->getArgOperand(1), 2828341825Sdim CI->getArgOperand(4) }); 2829314564Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2830314564Sdim CI->getArgOperand(2)); 2831341825Sdim } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) { 2832341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), 2833341825Sdim Intrinsic::ctlz, 2834341825Sdim CI->getType()), 2835341825Sdim { CI->getArgOperand(0), Builder.getInt1(false) }); 2836341825Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 2837341825Sdim CI->getArgOperand(1)); 2838314564Sdim } else if (IsX86 && Name.startswith("avx512.mask.psll")) { 2839314564Sdim bool IsImmediate = Name[16] == 'i' || 2840314564Sdim (Name.size() > 18 && Name[18] == 'i'); 2841314564Sdim bool IsVariable = Name[16] == 'v'; 2842314564Sdim char Size = Name[16] == '.' ? Name[17] : 2843314564Sdim Name[17] == '.' ? Name[18] : 2844314564Sdim Name[18] == '.' ? Name[19] : 2845314564Sdim Name[20]; 2846314564Sdim 2847314564Sdim Intrinsic::ID IID; 2848314564Sdim if (IsVariable && Name[17] != '.') { 2849314564Sdim if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di 2850314564Sdim IID = Intrinsic::x86_avx2_psllv_q; 2851314564Sdim else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di 2852314564Sdim IID = Intrinsic::x86_avx2_psllv_q_256; 2853314564Sdim else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si 2854314564Sdim IID = Intrinsic::x86_avx2_psllv_d; 2855314564Sdim else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si 2856314564Sdim IID = Intrinsic::x86_avx2_psllv_d_256; 2857314564Sdim else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi 2858314564Sdim IID = Intrinsic::x86_avx512_psllv_w_128; 2859314564Sdim else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi 2860314564Sdim IID = Intrinsic::x86_avx512_psllv_w_256; 2861314564Sdim else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi 2862314564Sdim IID = Intrinsic::x86_avx512_psllv_w_512; 2863314564Sdim else 2864314564Sdim llvm_unreachable("Unexpected size"); 2865314564Sdim } else if (Name.endswith(".128")) { 2866314564Sdim if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128 2867314564Sdim IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d 2868314564Sdim : Intrinsic::x86_sse2_psll_d; 2869314564Sdim else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128 2870314564Sdim IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q 2871314564Sdim : Intrinsic::x86_sse2_psll_q; 2872314564Sdim else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128 2873314564Sdim IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w 2874314564Sdim : Intrinsic::x86_sse2_psll_w; 2875314564Sdim else 2876314564Sdim llvm_unreachable("Unexpected size"); 2877314564Sdim } else if (Name.endswith(".256")) { 2878314564Sdim if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256 2879314564Sdim IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d 2880314564Sdim : Intrinsic::x86_avx2_psll_d; 2881314564Sdim else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256 2882314564Sdim IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q 2883314564Sdim : Intrinsic::x86_avx2_psll_q; 2884314564Sdim else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256 2885314564Sdim IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w 2886314564Sdim : Intrinsic::x86_avx2_psll_w; 2887314564Sdim else 2888314564Sdim llvm_unreachable("Unexpected size"); 2889314564Sdim } else { 2890314564Sdim if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512 2891314564Sdim IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 : 2892314564Sdim IsVariable ? Intrinsic::x86_avx512_psllv_d_512 : 2893314564Sdim Intrinsic::x86_avx512_psll_d_512; 2894314564Sdim else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512 2895314564Sdim IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 : 2896314564Sdim IsVariable ? Intrinsic::x86_avx512_psllv_q_512 : 2897314564Sdim Intrinsic::x86_avx512_psll_q_512; 2898314564Sdim else if (Size == 'w') // psll.wi.512, pslli.w, psll.w 2899314564Sdim IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512 2900314564Sdim : Intrinsic::x86_avx512_psll_w_512; 2901314564Sdim else 2902314564Sdim llvm_unreachable("Unexpected size"); 2903314564Sdim } 2904314564Sdim 2905314564Sdim Rep = UpgradeX86MaskedShift(Builder, *CI, IID); 2906314564Sdim } else if (IsX86 && Name.startswith("avx512.mask.psrl")) { 2907314564Sdim bool IsImmediate = Name[16] == 'i' || 2908314564Sdim (Name.size() > 18 && Name[18] == 'i'); 2909314564Sdim bool IsVariable = Name[16] == 'v'; 2910314564Sdim char Size = Name[16] == '.' ? Name[17] : 2911314564Sdim Name[17] == '.' ? Name[18] : 2912314564Sdim Name[18] == '.' ? Name[19] : 2913314564Sdim Name[20]; 2914314564Sdim 2915314564Sdim Intrinsic::ID IID; 2916314564Sdim if (IsVariable && Name[17] != '.') { 2917314564Sdim if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di 2918314564Sdim IID = Intrinsic::x86_avx2_psrlv_q; 2919314564Sdim else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di 2920314564Sdim IID = Intrinsic::x86_avx2_psrlv_q_256; 2921314564Sdim else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si 2922314564Sdim IID = Intrinsic::x86_avx2_psrlv_d; 2923314564Sdim else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si 2924314564Sdim IID = Intrinsic::x86_avx2_psrlv_d_256; 2925314564Sdim else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi 2926314564Sdim IID = Intrinsic::x86_avx512_psrlv_w_128; 2927314564Sdim else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi 2928314564Sdim IID = Intrinsic::x86_avx512_psrlv_w_256; 2929314564Sdim else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi 2930314564Sdim IID = Intrinsic::x86_avx512_psrlv_w_512; 2931314564Sdim else 2932314564Sdim llvm_unreachable("Unexpected size"); 2933314564Sdim } else if (Name.endswith(".128")) { 2934314564Sdim if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128 2935314564Sdim IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d 2936314564Sdim : Intrinsic::x86_sse2_psrl_d; 2937314564Sdim else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128 2938314564Sdim IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q 2939314564Sdim : Intrinsic::x86_sse2_psrl_q; 2940314564Sdim else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128 2941314564Sdim IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w 2942314564Sdim : Intrinsic::x86_sse2_psrl_w; 2943314564Sdim else 2944314564Sdim llvm_unreachable("Unexpected size"); 2945314564Sdim } else if (Name.endswith(".256")) { 2946314564Sdim if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256 2947314564Sdim IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d 2948314564Sdim : Intrinsic::x86_avx2_psrl_d; 2949314564Sdim else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256 2950314564Sdim IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q 2951314564Sdim : Intrinsic::x86_avx2_psrl_q; 2952314564Sdim else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256 2953314564Sdim IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w 2954314564Sdim : Intrinsic::x86_avx2_psrl_w; 2955314564Sdim else 2956314564Sdim llvm_unreachable("Unexpected size"); 2957314564Sdim } else { 2958314564Sdim if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512 2959314564Sdim IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 : 2960314564Sdim IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 : 2961314564Sdim Intrinsic::x86_avx512_psrl_d_512; 2962314564Sdim else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512 2963314564Sdim IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 : 2964314564Sdim IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 : 2965314564Sdim Intrinsic::x86_avx512_psrl_q_512; 2966314564Sdim else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w) 2967314564Sdim IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512 2968314564Sdim : Intrinsic::x86_avx512_psrl_w_512; 2969314564Sdim else 2970314564Sdim llvm_unreachable("Unexpected size"); 2971314564Sdim } 2972314564Sdim 2973314564Sdim Rep = UpgradeX86MaskedShift(Builder, *CI, IID); 2974314564Sdim } else if (IsX86 && Name.startswith("avx512.mask.psra")) { 2975314564Sdim bool IsImmediate = Name[16] == 'i' || 2976314564Sdim (Name.size() > 18 && Name[18] == 'i'); 2977314564Sdim bool IsVariable = Name[16] == 'v'; 2978314564Sdim char Size = Name[16] == '.' ? Name[17] : 2979314564Sdim Name[17] == '.' ? Name[18] : 2980314564Sdim Name[18] == '.' ? Name[19] : 2981314564Sdim Name[20]; 2982314564Sdim 2983314564Sdim Intrinsic::ID IID; 2984314564Sdim if (IsVariable && Name[17] != '.') { 2985314564Sdim if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si 2986314564Sdim IID = Intrinsic::x86_avx2_psrav_d; 2987314564Sdim else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si 2988314564Sdim IID = Intrinsic::x86_avx2_psrav_d_256; 2989314564Sdim else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi 2990314564Sdim IID = Intrinsic::x86_avx512_psrav_w_128; 2991314564Sdim else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi 2992314564Sdim IID = Intrinsic::x86_avx512_psrav_w_256; 2993314564Sdim else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi 2994314564Sdim IID = Intrinsic::x86_avx512_psrav_w_512; 2995314564Sdim else 2996314564Sdim llvm_unreachable("Unexpected size"); 2997314564Sdim } else if (Name.endswith(".128")) { 2998314564Sdim if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128 2999314564Sdim IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d 3000314564Sdim : Intrinsic::x86_sse2_psra_d; 3001314564Sdim else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128 3002314564Sdim IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 : 3003314564Sdim IsVariable ? Intrinsic::x86_avx512_psrav_q_128 : 3004314564Sdim Intrinsic::x86_avx512_psra_q_128; 3005314564Sdim else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128 3006314564Sdim IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w 3007314564Sdim : Intrinsic::x86_sse2_psra_w; 3008314564Sdim else 3009314564Sdim llvm_unreachable("Unexpected size"); 3010314564Sdim } else if (Name.endswith(".256")) { 3011314564Sdim if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256 3012314564Sdim IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d 3013314564Sdim : Intrinsic::x86_avx2_psra_d; 3014314564Sdim else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256 3015314564Sdim IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 : 3016314564Sdim IsVariable ? Intrinsic::x86_avx512_psrav_q_256 : 3017314564Sdim Intrinsic::x86_avx512_psra_q_256; 3018314564Sdim else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256 3019314564Sdim IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w 3020314564Sdim : Intrinsic::x86_avx2_psra_w; 3021314564Sdim else 3022314564Sdim llvm_unreachable("Unexpected size"); 3023314564Sdim } else { 3024314564Sdim if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512 3025314564Sdim IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 : 3026314564Sdim IsVariable ? Intrinsic::x86_avx512_psrav_d_512 : 3027314564Sdim Intrinsic::x86_avx512_psra_d_512; 3028314564Sdim else if (Size == 'q') // psra.qi.512, psrai.q, psra.q 3029314564Sdim IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 : 3030314564Sdim IsVariable ? Intrinsic::x86_avx512_psrav_q_512 : 3031314564Sdim Intrinsic::x86_avx512_psra_q_512; 3032314564Sdim else if (Size == 'w') // psra.wi.512, psrai.w, psra.w 3033314564Sdim IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512 3034314564Sdim : Intrinsic::x86_avx512_psra_w_512; 3035314564Sdim else 3036314564Sdim llvm_unreachable("Unexpected size"); 3037314564Sdim } 3038314564Sdim 3039314564Sdim Rep = UpgradeX86MaskedShift(Builder, *CI, IID); 3040314564Sdim } else if (IsX86 && Name.startswith("avx512.mask.move.s")) { 3041314564Sdim Rep = upgradeMaskedMove(Builder, *CI); 3042321369Sdim } else if (IsX86 && Name.startswith("avx512.cvtmask2")) { 3043321369Sdim Rep = UpgradeMaskToInt(Builder, *CI); 3044321369Sdim } else if (IsX86 && Name.endswith(".movntdqa")) { 3045321369Sdim Module *M = F->getParent(); 3046321369Sdim MDNode *Node = MDNode::get( 3047321369Sdim C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 3048321369Sdim 3049321369Sdim Value *Ptr = CI->getArgOperand(0); 3050321369Sdim VectorType *VTy = cast<VectorType>(CI->getType()); 3051321369Sdim 3052321369Sdim // Convert the type of the pointer to a pointer to the stored type. 3053321369Sdim Value *BC = 3054321369Sdim Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast"); 3055353358Sdim LoadInst *LI = Builder.CreateAlignedLoad(VTy, BC, VTy->getBitWidth() / 8); 3056321369Sdim LI->setMetadata(M->getMDKindID("nontemporal"), Node); 3057321369Sdim Rep = LI; 3058341825Sdim } else if (IsX86 && (Name.startswith("fma.vfmadd.") || 3059341825Sdim Name.startswith("fma.vfmsub.") || 3060341825Sdim Name.startswith("fma.vfnmadd.") || 3061341825Sdim Name.startswith("fma.vfnmsub."))) { 3062341825Sdim bool NegMul = Name[6] == 'n'; 3063341825Sdim bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's'; 3064341825Sdim bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's'; 3065341825Sdim 3066341825Sdim Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), 3067341825Sdim CI->getArgOperand(2) }; 3068341825Sdim 3069341825Sdim if (IsScalar) { 3070341825Sdim Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0); 3071341825Sdim Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0); 3072341825Sdim Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0); 3073341825Sdim } 3074341825Sdim 3075341825Sdim if (NegMul && !IsScalar) 3076341825Sdim Ops[0] = Builder.CreateFNeg(Ops[0]); 3077341825Sdim if (NegMul && IsScalar) 3078341825Sdim Ops[1] = Builder.CreateFNeg(Ops[1]); 3079341825Sdim if (NegAcc) 3080341825Sdim Ops[2] = Builder.CreateFNeg(Ops[2]); 3081341825Sdim 3082341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), 3083341825Sdim Intrinsic::fma, 3084341825Sdim Ops[0]->getType()), 3085341825Sdim Ops); 3086341825Sdim 3087341825Sdim if (IsScalar) 3088341825Sdim Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, 3089341825Sdim (uint64_t)0); 3090341825Sdim } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) { 3091341825Sdim Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), 3092341825Sdim CI->getArgOperand(2) }; 3093341825Sdim 3094341825Sdim Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0); 3095341825Sdim Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0); 3096341825Sdim Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0); 3097341825Sdim 3098341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), 3099341825Sdim Intrinsic::fma, 3100341825Sdim Ops[0]->getType()), 3101341825Sdim Ops); 3102341825Sdim 3103341825Sdim Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()), 3104341825Sdim Rep, (uint64_t)0); 3105341825Sdim } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") || 3106341825Sdim Name.startswith("avx512.maskz.vfmadd.s") || 3107341825Sdim Name.startswith("avx512.mask3.vfmadd.s") || 3108341825Sdim Name.startswith("avx512.mask3.vfmsub.s") || 3109341825Sdim Name.startswith("avx512.mask3.vfnmsub.s"))) { 3110341825Sdim bool IsMask3 = Name[11] == '3'; 3111341825Sdim bool IsMaskZ = Name[11] == 'z'; 3112341825Sdim // Drop the "avx512.mask." to make it easier. 3113341825Sdim Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12); 3114341825Sdim bool NegMul = Name[2] == 'n'; 3115341825Sdim bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's'; 3116341825Sdim 3117341825Sdim Value *A = CI->getArgOperand(0); 3118341825Sdim Value *B = CI->getArgOperand(1); 3119341825Sdim Value *C = CI->getArgOperand(2); 3120341825Sdim 3121341825Sdim if (NegMul && (IsMask3 || IsMaskZ)) 3122341825Sdim A = Builder.CreateFNeg(A); 3123341825Sdim if (NegMul && !(IsMask3 || IsMaskZ)) 3124341825Sdim B = Builder.CreateFNeg(B); 3125341825Sdim if (NegAcc) 3126341825Sdim C = Builder.CreateFNeg(C); 3127341825Sdim 3128341825Sdim A = Builder.CreateExtractElement(A, (uint64_t)0); 3129341825Sdim B = Builder.CreateExtractElement(B, (uint64_t)0); 3130341825Sdim C = Builder.CreateExtractElement(C, (uint64_t)0); 3131341825Sdim 3132341825Sdim if (!isa<ConstantInt>(CI->getArgOperand(4)) || 3133341825Sdim cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) { 3134341825Sdim Value *Ops[] = { A, B, C, CI->getArgOperand(4) }; 3135341825Sdim 3136341825Sdim Intrinsic::ID IID; 3137341825Sdim if (Name.back() == 'd') 3138341825Sdim IID = Intrinsic::x86_avx512_vfmadd_f64; 3139341825Sdim else 3140341825Sdim IID = Intrinsic::x86_avx512_vfmadd_f32; 3141341825Sdim Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID); 3142341825Sdim Rep = Builder.CreateCall(FMA, Ops); 3143341825Sdim } else { 3144341825Sdim Function *FMA = Intrinsic::getDeclaration(CI->getModule(), 3145341825Sdim Intrinsic::fma, 3146341825Sdim A->getType()); 3147341825Sdim Rep = Builder.CreateCall(FMA, { A, B, C }); 3148341825Sdim } 3149341825Sdim 3150341825Sdim Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) : 3151341825Sdim IsMask3 ? C : A; 3152341825Sdim 3153341825Sdim // For Mask3 with NegAcc, we need to create a new extractelement that 3154341825Sdim // avoids the negation above. 3155341825Sdim if (NegAcc && IsMask3) 3156341825Sdim PassThru = Builder.CreateExtractElement(CI->getArgOperand(2), 3157341825Sdim (uint64_t)0); 3158341825Sdim 3159341825Sdim Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3), 3160341825Sdim Rep, PassThru); 3161341825Sdim Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), 3162341825Sdim Rep, (uint64_t)0); 3163341825Sdim } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") || 3164341825Sdim Name.startswith("avx512.mask.vfnmadd.p") || 3165341825Sdim Name.startswith("avx512.mask.vfnmsub.p") || 3166341825Sdim Name.startswith("avx512.mask3.vfmadd.p") || 3167341825Sdim Name.startswith("avx512.mask3.vfmsub.p") || 3168341825Sdim Name.startswith("avx512.mask3.vfnmsub.p") || 3169341825Sdim Name.startswith("avx512.maskz.vfmadd.p"))) { 3170341825Sdim bool IsMask3 = Name[11] == '3'; 3171341825Sdim bool IsMaskZ = Name[11] == 'z'; 3172341825Sdim // Drop the "avx512.mask." to make it easier. 3173341825Sdim Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12); 3174341825Sdim bool NegMul = Name[2] == 'n'; 3175341825Sdim bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's'; 3176341825Sdim 3177341825Sdim Value *A = CI->getArgOperand(0); 3178341825Sdim Value *B = CI->getArgOperand(1); 3179341825Sdim Value *C = CI->getArgOperand(2); 3180341825Sdim 3181341825Sdim if (NegMul && (IsMask3 || IsMaskZ)) 3182341825Sdim A = Builder.CreateFNeg(A); 3183341825Sdim if (NegMul && !(IsMask3 || IsMaskZ)) 3184341825Sdim B = Builder.CreateFNeg(B); 3185341825Sdim if (NegAcc) 3186341825Sdim C = Builder.CreateFNeg(C); 3187341825Sdim 3188341825Sdim if (CI->getNumArgOperands() == 5 && 3189341825Sdim (!isa<ConstantInt>(CI->getArgOperand(4)) || 3190341825Sdim cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) { 3191341825Sdim Intrinsic::ID IID; 3192341825Sdim // Check the character before ".512" in string. 3193341825Sdim if (Name[Name.size()-5] == 's') 3194341825Sdim IID = Intrinsic::x86_avx512_vfmadd_ps_512; 3195341825Sdim else 3196341825Sdim IID = Intrinsic::x86_avx512_vfmadd_pd_512; 3197341825Sdim 3198341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 3199341825Sdim { A, B, C, CI->getArgOperand(4) }); 3200341825Sdim } else { 3201341825Sdim Function *FMA = Intrinsic::getDeclaration(CI->getModule(), 3202341825Sdim Intrinsic::fma, 3203341825Sdim A->getType()); 3204341825Sdim Rep = Builder.CreateCall(FMA, { A, B, C }); 3205341825Sdim } 3206341825Sdim 3207341825Sdim Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) : 3208341825Sdim IsMask3 ? CI->getArgOperand(2) : 3209341825Sdim CI->getArgOperand(0); 3210341825Sdim 3211341825Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); 3212341825Sdim } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") || 3213341825Sdim Name.startswith("fma.vfmsubadd.p"))) { 3214341825Sdim bool IsSubAdd = Name[7] == 's'; 3215341825Sdim int NumElts = CI->getType()->getVectorNumElements(); 3216341825Sdim 3217341825Sdim Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), 3218341825Sdim CI->getArgOperand(2) }; 3219341825Sdim 3220341825Sdim Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma, 3221341825Sdim Ops[0]->getType()); 3222341825Sdim Value *Odd = Builder.CreateCall(FMA, Ops); 3223341825Sdim Ops[2] = Builder.CreateFNeg(Ops[2]); 3224341825Sdim Value *Even = Builder.CreateCall(FMA, Ops); 3225341825Sdim 3226341825Sdim if (IsSubAdd) 3227341825Sdim std::swap(Even, Odd); 3228341825Sdim 3229341825Sdim SmallVector<uint32_t, 32> Idxs(NumElts); 3230341825Sdim for (int i = 0; i != NumElts; ++i) 3231341825Sdim Idxs[i] = i + (i % 2) * NumElts; 3232341825Sdim 3233341825Sdim Rep = Builder.CreateShuffleVector(Even, Odd, Idxs); 3234341825Sdim } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") || 3235341825Sdim Name.startswith("avx512.mask3.vfmaddsub.p") || 3236341825Sdim Name.startswith("avx512.maskz.vfmaddsub.p") || 3237341825Sdim Name.startswith("avx512.mask3.vfmsubadd.p"))) { 3238341825Sdim bool IsMask3 = Name[11] == '3'; 3239341825Sdim bool IsMaskZ = Name[11] == 'z'; 3240341825Sdim // Drop the "avx512.mask." to make it easier. 3241341825Sdim Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12); 3242341825Sdim bool IsSubAdd = Name[3] == 's'; 3243341825Sdim if (CI->getNumArgOperands() == 5 && 3244341825Sdim (!isa<ConstantInt>(CI->getArgOperand(4)) || 3245341825Sdim cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) { 3246341825Sdim Intrinsic::ID IID; 3247341825Sdim // Check the character before ".512" in string. 3248341825Sdim if (Name[Name.size()-5] == 's') 3249341825Sdim IID = Intrinsic::x86_avx512_vfmaddsub_ps_512; 3250341825Sdim else 3251341825Sdim IID = Intrinsic::x86_avx512_vfmaddsub_pd_512; 3252341825Sdim 3253341825Sdim Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), 3254341825Sdim CI->getArgOperand(2), CI->getArgOperand(4) }; 3255341825Sdim if (IsSubAdd) 3256341825Sdim Ops[2] = Builder.CreateFNeg(Ops[2]); 3257341825Sdim 3258341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 3259341825Sdim {CI->getArgOperand(0), CI->getArgOperand(1), 3260341825Sdim CI->getArgOperand(2), CI->getArgOperand(4)}); 3261341825Sdim } else { 3262341825Sdim int NumElts = CI->getType()->getVectorNumElements(); 3263341825Sdim 3264341825Sdim Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), 3265341825Sdim CI->getArgOperand(2) }; 3266341825Sdim 3267341825Sdim Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma, 3268341825Sdim Ops[0]->getType()); 3269341825Sdim Value *Odd = Builder.CreateCall(FMA, Ops); 3270341825Sdim Ops[2] = Builder.CreateFNeg(Ops[2]); 3271341825Sdim Value *Even = Builder.CreateCall(FMA, Ops); 3272341825Sdim 3273341825Sdim if (IsSubAdd) 3274341825Sdim std::swap(Even, Odd); 3275341825Sdim 3276341825Sdim SmallVector<uint32_t, 32> Idxs(NumElts); 3277341825Sdim for (int i = 0; i != NumElts; ++i) 3278341825Sdim Idxs[i] = i + (i % 2) * NumElts; 3279341825Sdim 3280341825Sdim Rep = Builder.CreateShuffleVector(Even, Odd, Idxs); 3281341825Sdim } 3282341825Sdim 3283341825Sdim Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) : 3284341825Sdim IsMask3 ? CI->getArgOperand(2) : 3285341825Sdim CI->getArgOperand(0); 3286341825Sdim 3287341825Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); 3288341825Sdim } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") || 3289341825Sdim Name.startswith("avx512.maskz.pternlog."))) { 3290341825Sdim bool ZeroMask = Name[11] == 'z'; 3291341825Sdim unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); 3292341825Sdim unsigned EltWidth = CI->getType()->getScalarSizeInBits(); 3293341825Sdim Intrinsic::ID IID; 3294341825Sdim if (VecWidth == 128 && EltWidth == 32) 3295341825Sdim IID = Intrinsic::x86_avx512_pternlog_d_128; 3296341825Sdim else if (VecWidth == 256 && EltWidth == 32) 3297341825Sdim IID = Intrinsic::x86_avx512_pternlog_d_256; 3298341825Sdim else if (VecWidth == 512 && EltWidth == 32) 3299341825Sdim IID = Intrinsic::x86_avx512_pternlog_d_512; 3300341825Sdim else if (VecWidth == 128 && EltWidth == 64) 3301341825Sdim IID = Intrinsic::x86_avx512_pternlog_q_128; 3302341825Sdim else if (VecWidth == 256 && EltWidth == 64) 3303341825Sdim IID = Intrinsic::x86_avx512_pternlog_q_256; 3304341825Sdim else if (VecWidth == 512 && EltWidth == 64) 3305341825Sdim IID = Intrinsic::x86_avx512_pternlog_q_512; 3306341825Sdim else 3307341825Sdim llvm_unreachable("Unexpected intrinsic"); 3308341825Sdim 3309341825Sdim Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1), 3310341825Sdim CI->getArgOperand(2), CI->getArgOperand(3) }; 3311341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), 3312341825Sdim Args); 3313341825Sdim Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) 3314341825Sdim : CI->getArgOperand(0); 3315341825Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru); 3316341825Sdim } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") || 3317341825Sdim Name.startswith("avx512.maskz.vpmadd52"))) { 3318341825Sdim bool ZeroMask = Name[11] == 'z'; 3319341825Sdim bool High = Name[20] == 'h' || Name[21] == 'h'; 3320341825Sdim unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); 3321341825Sdim Intrinsic::ID IID; 3322341825Sdim if (VecWidth == 128 && !High) 3323341825Sdim IID = Intrinsic::x86_avx512_vpmadd52l_uq_128; 3324341825Sdim else if (VecWidth == 256 && !High) 3325341825Sdim IID = Intrinsic::x86_avx512_vpmadd52l_uq_256; 3326341825Sdim else if (VecWidth == 512 && !High) 3327341825Sdim IID = Intrinsic::x86_avx512_vpmadd52l_uq_512; 3328341825Sdim else if (VecWidth == 128 && High) 3329341825Sdim IID = Intrinsic::x86_avx512_vpmadd52h_uq_128; 3330341825Sdim else if (VecWidth == 256 && High) 3331341825Sdim IID = Intrinsic::x86_avx512_vpmadd52h_uq_256; 3332341825Sdim else if (VecWidth == 512 && High) 3333341825Sdim IID = Intrinsic::x86_avx512_vpmadd52h_uq_512; 3334341825Sdim else 3335341825Sdim llvm_unreachable("Unexpected intrinsic"); 3336341825Sdim 3337341825Sdim Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1), 3338341825Sdim CI->getArgOperand(2) }; 3339341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), 3340341825Sdim Args); 3341341825Sdim Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) 3342341825Sdim : CI->getArgOperand(0); 3343341825Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); 3344341825Sdim } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") || 3345341825Sdim Name.startswith("avx512.mask.vpermt2var.") || 3346341825Sdim Name.startswith("avx512.maskz.vpermt2var."))) { 3347341825Sdim bool ZeroMask = Name[11] == 'z'; 3348341825Sdim bool IndexForm = Name[17] == 'i'; 3349344779Sdim Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm); 3350341825Sdim } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") || 3351341825Sdim Name.startswith("avx512.maskz.vpdpbusd.") || 3352341825Sdim Name.startswith("avx512.mask.vpdpbusds.") || 3353341825Sdim Name.startswith("avx512.maskz.vpdpbusds."))) { 3354341825Sdim bool ZeroMask = Name[11] == 'z'; 3355341825Sdim bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's'; 3356341825Sdim unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); 3357341825Sdim Intrinsic::ID IID; 3358341825Sdim if (VecWidth == 128 && !IsSaturating) 3359341825Sdim IID = Intrinsic::x86_avx512_vpdpbusd_128; 3360341825Sdim else if (VecWidth == 256 && !IsSaturating) 3361341825Sdim IID = Intrinsic::x86_avx512_vpdpbusd_256; 3362341825Sdim else if (VecWidth == 512 && !IsSaturating) 3363341825Sdim IID = Intrinsic::x86_avx512_vpdpbusd_512; 3364341825Sdim else if (VecWidth == 128 && IsSaturating) 3365341825Sdim IID = Intrinsic::x86_avx512_vpdpbusds_128; 3366341825Sdim else if (VecWidth == 256 && IsSaturating) 3367341825Sdim IID = Intrinsic::x86_avx512_vpdpbusds_256; 3368341825Sdim else if (VecWidth == 512 && IsSaturating) 3369341825Sdim IID = Intrinsic::x86_avx512_vpdpbusds_512; 3370341825Sdim else 3371341825Sdim llvm_unreachable("Unexpected intrinsic"); 3372341825Sdim 3373341825Sdim Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1), 3374341825Sdim CI->getArgOperand(2) }; 3375341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), 3376341825Sdim Args); 3377341825Sdim Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) 3378341825Sdim : CI->getArgOperand(0); 3379341825Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); 3380341825Sdim } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") || 3381341825Sdim Name.startswith("avx512.maskz.vpdpwssd.") || 3382341825Sdim Name.startswith("avx512.mask.vpdpwssds.") || 3383341825Sdim Name.startswith("avx512.maskz.vpdpwssds."))) { 3384341825Sdim bool ZeroMask = Name[11] == 'z'; 3385341825Sdim bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's'; 3386341825Sdim unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); 3387341825Sdim Intrinsic::ID IID; 3388341825Sdim if (VecWidth == 128 && !IsSaturating) 3389341825Sdim IID = Intrinsic::x86_avx512_vpdpwssd_128; 3390341825Sdim else if (VecWidth == 256 && !IsSaturating) 3391341825Sdim IID = Intrinsic::x86_avx512_vpdpwssd_256; 3392341825Sdim else if (VecWidth == 512 && !IsSaturating) 3393341825Sdim IID = Intrinsic::x86_avx512_vpdpwssd_512; 3394341825Sdim else if (VecWidth == 128 && IsSaturating) 3395341825Sdim IID = Intrinsic::x86_avx512_vpdpwssds_128; 3396341825Sdim else if (VecWidth == 256 && IsSaturating) 3397341825Sdim IID = Intrinsic::x86_avx512_vpdpwssds_256; 3398341825Sdim else if (VecWidth == 512 && IsSaturating) 3399341825Sdim IID = Intrinsic::x86_avx512_vpdpwssds_512; 3400341825Sdim else 3401341825Sdim llvm_unreachable("Unexpected intrinsic"); 3402341825Sdim 3403341825Sdim Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1), 3404341825Sdim CI->getArgOperand(2) }; 3405341825Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), 3406341825Sdim Args); 3407341825Sdim Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) 3408341825Sdim : CI->getArgOperand(0); 3409341825Sdim Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); 3410344779Sdim } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" || 3411344779Sdim Name == "addcarry.u32" || Name == "addcarry.u64" || 3412344779Sdim Name == "subborrow.u32" || Name == "subborrow.u64")) { 3413344779Sdim Intrinsic::ID IID; 3414344779Sdim if (Name[0] == 'a' && Name.back() == '2') 3415344779Sdim IID = Intrinsic::x86_addcarry_32; 3416344779Sdim else if (Name[0] == 'a' && Name.back() == '4') 3417344779Sdim IID = Intrinsic::x86_addcarry_64; 3418344779Sdim else if (Name[0] == 's' && Name.back() == '2') 3419344779Sdim IID = Intrinsic::x86_subborrow_32; 3420344779Sdim else if (Name[0] == 's' && Name.back() == '4') 3421344779Sdim IID = Intrinsic::x86_subborrow_64; 3422344779Sdim else 3423344779Sdim llvm_unreachable("Unexpected intrinsic"); 3424344779Sdim 3425344779Sdim // Make a call with 3 operands. 3426344779Sdim Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1), 3427344779Sdim CI->getArgOperand(2)}; 3428344779Sdim Value *NewCall = Builder.CreateCall( 3429344779Sdim Intrinsic::getDeclaration(CI->getModule(), IID), 3430344779Sdim Args); 3431344779Sdim 3432344779Sdim // Extract the second result and store it. 3433344779Sdim Value *Data = Builder.CreateExtractValue(NewCall, 1); 3434344779Sdim // Cast the pointer to the right type. 3435344779Sdim Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3), 3436344779Sdim llvm::PointerType::getUnqual(Data->getType())); 3437344779Sdim Builder.CreateAlignedStore(Data, Ptr, 1); 3438344779Sdim // Replace the original call result with the first result of the new call. 3439344779Sdim Value *CF = Builder.CreateExtractValue(NewCall, 0); 3440344779Sdim 3441344779Sdim CI->replaceAllUsesWith(CF); 3442344779Sdim Rep = nullptr; 3443341825Sdim } else if (IsX86 && Name.startswith("avx512.mask.") && 3444341825Sdim upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) { 3445341825Sdim // Rep will be updated by the call in the condition. 3446321369Sdim } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) { 3447321369Sdim Value *Arg = CI->getArgOperand(0); 3448321369Sdim Value *Neg = Builder.CreateNeg(Arg, "neg"); 3449321369Sdim Value *Cmp = Builder.CreateICmpSGE( 3450321369Sdim Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); 3451321369Sdim Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); 3452353358Sdim } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") || 3453353358Sdim Name.startswith("atomic.load.add.f64.p"))) { 3454353358Sdim Value *Ptr = CI->getArgOperand(0); 3455353358Sdim Value *Val = CI->getArgOperand(1); 3456353358Sdim Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, 3457353358Sdim AtomicOrdering::SequentiallyConsistent); 3458321369Sdim } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" || 3459321369Sdim Name == "max.ui" || Name == "max.ull")) { 3460321369Sdim Value *Arg0 = CI->getArgOperand(0); 3461321369Sdim Value *Arg1 = CI->getArgOperand(1); 3462321369Sdim Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull") 3463321369Sdim ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond") 3464321369Sdim : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond"); 3465321369Sdim Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max"); 3466321369Sdim } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" || 3467321369Sdim Name == "min.ui" || Name == "min.ull")) { 3468321369Sdim Value *Arg0 = CI->getArgOperand(0); 3469321369Sdim Value *Arg1 = CI->getArgOperand(1); 3470321369Sdim Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull") 3471321369Sdim ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond") 3472321369Sdim : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond"); 3473321369Sdim Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min"); 3474321369Sdim } else if (IsNVVM && Name == "clz.ll") { 3475321369Sdim // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64. 3476321369Sdim Value *Arg = CI->getArgOperand(0); 3477321369Sdim Value *Ctlz = Builder.CreateCall( 3478321369Sdim Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 3479321369Sdim {Arg->getType()}), 3480321369Sdim {Arg, Builder.getFalse()}, "ctlz"); 3481321369Sdim Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc"); 3482321369Sdim } else if (IsNVVM && Name == "popc.ll") { 3483321369Sdim // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an 3484321369Sdim // i64. 3485321369Sdim Value *Arg = CI->getArgOperand(0); 3486321369Sdim Value *Popc = Builder.CreateCall( 3487321369Sdim Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 3488321369Sdim {Arg->getType()}), 3489321369Sdim Arg, "ctpop"); 3490321369Sdim Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc"); 3491321369Sdim } else if (IsNVVM && Name == "h2f") { 3492321369Sdim Rep = Builder.CreateCall(Intrinsic::getDeclaration( 3493321369Sdim F->getParent(), Intrinsic::convert_from_fp16, 3494321369Sdim {Builder.getFloatTy()}), 3495321369Sdim CI->getArgOperand(0), "h2f"); 3496309124Sdim } else { 3497309124Sdim llvm_unreachable("Unknown function for CallInst upgrade."); 3498249259Sdim } 3499249259Sdim 3500309124Sdim if (Rep) 3501309124Sdim CI->replaceAllUsesWith(Rep); 3502249259Sdim CI->eraseFromParent(); 3503249259Sdim return; 3504249259Sdim } 3505249259Sdim 3506341825Sdim const auto &DefaultCase = [&NewFn, &CI]() -> void { 3507321369Sdim // Handle generic mangling change, but nothing else 3508321369Sdim assert( 3509321369Sdim (CI->getCalledFunction()->getName() != NewFn->getName()) && 3510321369Sdim "Unknown function for CallInst upgrade and isn't just a name change"); 3511321369Sdim CI->setCalledFunction(NewFn); 3512341825Sdim }; 3513341825Sdim CallInst *NewCall = nullptr; 3514341825Sdim switch (NewFn->getIntrinsicID()) { 3515341825Sdim default: { 3516341825Sdim DefaultCase(); 3517321369Sdim return; 3518321369Sdim } 3519353358Sdim case Intrinsic::experimental_vector_reduce_v2_fmul: { 3520353358Sdim SmallVector<Value *, 2> Args; 3521353358Sdim if (CI->isFast()) 3522353358Sdim Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0)); 3523353358Sdim else 3524353358Sdim Args.push_back(CI->getOperand(0)); 3525353358Sdim Args.push_back(CI->getOperand(1)); 3526353358Sdim NewCall = Builder.CreateCall(NewFn, Args); 3527353358Sdim cast<Instruction>(NewCall)->copyFastMathFlags(CI); 3528353358Sdim break; 3529353358Sdim } 3530353358Sdim case Intrinsic::experimental_vector_reduce_v2_fadd: { 3531353358Sdim SmallVector<Value *, 2> Args; 3532353358Sdim if (CI->isFast()) 3533353358Sdim Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType())); 3534353358Sdim else 3535353358Sdim Args.push_back(CI->getOperand(0)); 3536353358Sdim Args.push_back(CI->getOperand(1)); 3537353358Sdim NewCall = Builder.CreateCall(NewFn, Args); 3538353358Sdim cast<Instruction>(NewCall)->copyFastMathFlags(CI); 3539353358Sdim break; 3540353358Sdim } 3541296417Sdim case Intrinsic::arm_neon_vld1: 3542296417Sdim case Intrinsic::arm_neon_vld2: 3543296417Sdim case Intrinsic::arm_neon_vld3: 3544296417Sdim case Intrinsic::arm_neon_vld4: 3545296417Sdim case Intrinsic::arm_neon_vld2lane: 3546296417Sdim case Intrinsic::arm_neon_vld3lane: 3547296417Sdim case Intrinsic::arm_neon_vld4lane: 3548296417Sdim case Intrinsic::arm_neon_vst1: 3549296417Sdim case Intrinsic::arm_neon_vst2: 3550296417Sdim case Intrinsic::arm_neon_vst3: 3551296417Sdim case Intrinsic::arm_neon_vst4: 3552296417Sdim case Intrinsic::arm_neon_vst2lane: 3553296417Sdim case Intrinsic::arm_neon_vst3lane: 3554296417Sdim case Intrinsic::arm_neon_vst4lane: { 3555296417Sdim SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 3556296417Sdim CI->arg_operands().end()); 3557321369Sdim NewCall = Builder.CreateCall(NewFn, Args); 3558321369Sdim break; 3559296417Sdim } 3560296417Sdim 3561314564Sdim case Intrinsic::bitreverse: 3562321369Sdim NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); 3563321369Sdim break; 3564314564Sdim 3565249259Sdim case Intrinsic::ctlz: 3566249259Sdim case Intrinsic::cttz: 3567249259Sdim assert(CI->getNumArgOperands() == 1 && 3568249259Sdim "Mismatch between function args and call args"); 3569321369Sdim NewCall = 3570321369Sdim Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()}); 3571321369Sdim break; 3572249259Sdim 3573321369Sdim case Intrinsic::objectsize: { 3574321369Sdim Value *NullIsUnknownSize = CI->getNumArgOperands() == 2 3575321369Sdim ? Builder.getFalse() 3576321369Sdim : CI->getArgOperand(2); 3577353358Sdim Value *Dynamic = 3578353358Sdim CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3); 3579321369Sdim NewCall = Builder.CreateCall( 3580353358Sdim NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic}); 3581321369Sdim break; 3582249259Sdim } 3583249259Sdim 3584321369Sdim case Intrinsic::ctpop: 3585321369Sdim NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); 3586321369Sdim break; 3587321369Sdim 3588321369Sdim case Intrinsic::convert_from_fp16: 3589321369Sdim NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); 3590321369Sdim break; 3591321369Sdim 3592327952Sdim case Intrinsic::dbg_value: 3593327952Sdim // Upgrade from the old version that had an extra offset argument. 3594327952Sdim assert(CI->getNumArgOperands() == 4); 3595327952Sdim // Drop nonzero offsets instead of attempting to upgrade them. 3596327952Sdim if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1))) 3597327952Sdim if (Offset->isZeroValue()) { 3598327952Sdim NewCall = Builder.CreateCall( 3599327952Sdim NewFn, 3600327952Sdim {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)}); 3601327952Sdim break; 3602327952Sdim } 3603327952Sdim CI->eraseFromParent(); 3604327952Sdim return; 3605327952Sdim 3606249259Sdim case Intrinsic::x86_xop_vfrcz_ss: 3607249259Sdim case Intrinsic::x86_xop_vfrcz_sd: 3608321369Sdim NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)}); 3609321369Sdim break; 3610249259Sdim 3611309124Sdim case Intrinsic::x86_xop_vpermil2pd: 3612309124Sdim case Intrinsic::x86_xop_vpermil2ps: 3613309124Sdim case Intrinsic::x86_xop_vpermil2pd_256: 3614309124Sdim case Intrinsic::x86_xop_vpermil2ps_256: { 3615309124Sdim SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 3616309124Sdim CI->arg_operands().end()); 3617309124Sdim VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType()); 3618309124Sdim VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy); 3619309124Sdim Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy); 3620321369Sdim NewCall = Builder.CreateCall(NewFn, Args); 3621321369Sdim break; 3622309124Sdim } 3623309124Sdim 3624249259Sdim case Intrinsic::x86_sse41_ptestc: 3625249259Sdim case Intrinsic::x86_sse41_ptestz: 3626249259Sdim case Intrinsic::x86_sse41_ptestnzc: { 3627249259Sdim // The arguments for these intrinsics used to be v4f32, and changed 3628249259Sdim // to v2i64. This is purely a nop, since those are bitwise intrinsics. 3629249259Sdim // So, the only thing required is a bitcast for both arguments. 3630249259Sdim // First, check the arguments have the old type. 3631249259Sdim Value *Arg0 = CI->getArgOperand(0); 3632249259Sdim if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) 3633249259Sdim return; 3634249259Sdim 3635249259Sdim // Old intrinsic, add bitcasts 3636249259Sdim Value *Arg1 = CI->getArgOperand(1); 3637249259Sdim 3638288943Sdim Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 3639249259Sdim 3640288943Sdim Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); 3641288943Sdim Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 3642288943Sdim 3643321369Sdim NewCall = Builder.CreateCall(NewFn, {BC0, BC1}); 3644321369Sdim break; 3645249259Sdim } 3646280031Sdim 3647344779Sdim case Intrinsic::x86_rdtscp: { 3648344779Sdim // This used to take 1 arguments. If we have no arguments, it is already 3649344779Sdim // upgraded. 3650344779Sdim if (CI->getNumOperands() == 0) 3651344779Sdim return; 3652344779Sdim 3653344779Sdim NewCall = Builder.CreateCall(NewFn); 3654344779Sdim // Extract the second result and store it. 3655344779Sdim Value *Data = Builder.CreateExtractValue(NewCall, 1); 3656344779Sdim // Cast the pointer to the right type. 3657344779Sdim Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0), 3658344779Sdim llvm::PointerType::getUnqual(Data->getType())); 3659344779Sdim Builder.CreateAlignedStore(Data, Ptr, 1); 3660344779Sdim // Replace the original call result with the first result of the new call. 3661344779Sdim Value *TSC = Builder.CreateExtractValue(NewCall, 0); 3662344779Sdim 3663344779Sdim std::string Name = CI->getName(); 3664344779Sdim if (!Name.empty()) { 3665344779Sdim CI->setName(Name + ".old"); 3666344779Sdim NewCall->setName(Name); 3667344779Sdim } 3668344779Sdim CI->replaceAllUsesWith(TSC); 3669344779Sdim CI->eraseFromParent(); 3670344779Sdim return; 3671344779Sdim } 3672344779Sdim 3673280031Sdim case Intrinsic::x86_sse41_insertps: 3674280031Sdim case Intrinsic::x86_sse41_dppd: 3675280031Sdim case Intrinsic::x86_sse41_dpps: 3676280031Sdim case Intrinsic::x86_sse41_mpsadbw: 3677280031Sdim case Intrinsic::x86_avx_dp_ps_256: 3678280031Sdim case Intrinsic::x86_avx2_mpsadbw: { 3679280031Sdim // Need to truncate the last argument from i32 to i8 -- this argument models 3680280031Sdim // an inherently 8-bit immediate operand to these x86 instructions. 3681280031Sdim SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 3682280031Sdim CI->arg_operands().end()); 3683280031Sdim 3684280031Sdim // Replace the last argument with a trunc. 3685280031Sdim Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); 3686321369Sdim NewCall = Builder.CreateCall(NewFn, Args); 3687321369Sdim break; 3688249259Sdim } 3689309124Sdim 3690309124Sdim case Intrinsic::thread_pointer: { 3691321369Sdim NewCall = Builder.CreateCall(NewFn, {}); 3692321369Sdim break; 3693280031Sdim } 3694309124Sdim 3695314564Sdim case Intrinsic::invariant_start: 3696314564Sdim case Intrinsic::invariant_end: 3697309124Sdim case Intrinsic::masked_load: 3698321369Sdim case Intrinsic::masked_store: 3699321369Sdim case Intrinsic::masked_gather: 3700321369Sdim case Intrinsic::masked_scatter: { 3701309124Sdim SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 3702309124Sdim CI->arg_operands().end()); 3703321369Sdim NewCall = Builder.CreateCall(NewFn, Args); 3704321369Sdim break; 3705309124Sdim } 3706341825Sdim 3707341825Sdim case Intrinsic::memcpy: 3708341825Sdim case Intrinsic::memmove: 3709341825Sdim case Intrinsic::memset: { 3710341825Sdim // We have to make sure that the call signature is what we're expecting. 3711341825Sdim // We only want to change the old signatures by removing the alignment arg: 3712341825Sdim // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1) 3713341825Sdim // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1) 3714341825Sdim // @llvm.memset...(i8*, i8, i[32|64], i32, i1) 3715341825Sdim // -> @llvm.memset...(i8*, i8, i[32|64], i1) 3716341825Sdim // Note: i8*'s in the above can be any pointer type 3717341825Sdim if (CI->getNumArgOperands() != 5) { 3718341825Sdim DefaultCase(); 3719341825Sdim return; 3720341825Sdim } 3721341825Sdim // Remove alignment argument (3), and add alignment attributes to the 3722341825Sdim // dest/src pointers. 3723341825Sdim Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1), 3724341825Sdim CI->getArgOperand(2), CI->getArgOperand(4)}; 3725341825Sdim NewCall = Builder.CreateCall(NewFn, Args); 3726341825Sdim auto *MemCI = cast<MemIntrinsic>(NewCall); 3727341825Sdim // All mem intrinsics support dest alignment. 3728341825Sdim const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3)); 3729341825Sdim MemCI->setDestAlignment(Align->getZExtValue()); 3730341825Sdim // Memcpy/Memmove also support source alignment. 3731341825Sdim if (auto *MTI = dyn_cast<MemTransferInst>(MemCI)) 3732341825Sdim MTI->setSourceAlignment(Align->getZExtValue()); 3733341825Sdim break; 3734309124Sdim } 3735341825Sdim } 3736321369Sdim assert(NewCall && "Should have either set this variable or returned through " 3737321369Sdim "the default case"); 3738321369Sdim std::string Name = CI->getName(); 3739321369Sdim if (!Name.empty()) { 3740321369Sdim CI->setName(Name + ".old"); 3741321369Sdim NewCall->setName(Name); 3742321369Sdim } 3743321369Sdim CI->replaceAllUsesWith(NewCall); 3744321369Sdim CI->eraseFromParent(); 3745249259Sdim} 3746249259Sdim 3747309124Sdimvoid llvm::UpgradeCallsToIntrinsic(Function *F) { 3748249259Sdim assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); 3749249259Sdim 3750309124Sdim // Check if this function should be upgraded and get the replacement function 3751309124Sdim // if there is one. 3752249259Sdim Function *NewFn; 3753249259Sdim if (UpgradeIntrinsicFunction(F, NewFn)) { 3754309124Sdim // Replace all users of the old function with the new function or new 3755309124Sdim // instructions. This is not a range loop because the call is deleted. 3756309124Sdim for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; ) 3757288943Sdim if (CallInst *CI = dyn_cast<CallInst>(*UI++)) 3758288943Sdim UpgradeIntrinsicCall(CI, NewFn); 3759309124Sdim 3760288943Sdim // Remove old function, no longer used, from the module. 3761288943Sdim F->eraseFromParent(); 3762249259Sdim } 3763249259Sdim} 3764249259Sdim 3765314564SdimMDNode *llvm::UpgradeTBAANode(MDNode &MD) { 3766261991Sdim // Check if the tag uses struct-path aware TBAA format. 3767314564Sdim if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3) 3768314564Sdim return &MD; 3769261991Sdim 3770314564Sdim auto &Context = MD.getContext(); 3771314564Sdim if (MD.getNumOperands() == 3) { 3772314564Sdim Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)}; 3773314564Sdim MDNode *ScalarType = MDNode::get(Context, Elts); 3774261991Sdim // Create a MDNode <ScalarType, ScalarType, offset 0, const> 3775280031Sdim Metadata *Elts2[] = {ScalarType, ScalarType, 3776314564Sdim ConstantAsMetadata::get( 3777314564Sdim Constant::getNullValue(Type::getInt64Ty(Context))), 3778314564Sdim MD.getOperand(2)}; 3779314564Sdim return MDNode::get(Context, Elts2); 3780261991Sdim } 3781314564Sdim // Create a MDNode <MD, MD, offset 0> 3782314564Sdim Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue( 3783314564Sdim Type::getInt64Ty(Context)))}; 3784314564Sdim return MDNode::get(Context, Elts); 3785261991Sdim} 3786261991Sdim 3787261991SdimInstruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, 3788261991Sdim Instruction *&Temp) { 3789261991Sdim if (Opc != Instruction::BitCast) 3790276479Sdim return nullptr; 3791261991Sdim 3792276479Sdim Temp = nullptr; 3793261991Sdim Type *SrcTy = V->getType(); 3794261991Sdim if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 3795261991Sdim SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 3796261991Sdim LLVMContext &Context = V->getContext(); 3797261991Sdim 3798261991Sdim // We have no information about target data layout, so we assume that 3799261991Sdim // the maximum pointer size is 64bit. 3800261991Sdim Type *MidTy = Type::getInt64Ty(Context); 3801261991Sdim Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); 3802261991Sdim 3803261991Sdim return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); 3804261991Sdim } 3805261991Sdim 3806276479Sdim return nullptr; 3807261991Sdim} 3808261991Sdim 3809261991SdimValue *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { 3810261991Sdim if (Opc != Instruction::BitCast) 3811276479Sdim return nullptr; 3812261991Sdim 3813261991Sdim Type *SrcTy = C->getType(); 3814261991Sdim if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 3815261991Sdim SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 3816261991Sdim LLVMContext &Context = C->getContext(); 3817261991Sdim 3818261991Sdim // We have no information about target data layout, so we assume that 3819261991Sdim // the maximum pointer size is 64bit. 3820261991Sdim Type *MidTy = Type::getInt64Ty(Context); 3821261991Sdim 3822261991Sdim return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), 3823261991Sdim DestTy); 3824261991Sdim } 3825261991Sdim 3826276479Sdim return nullptr; 3827261991Sdim} 3828261991Sdim 3829261991Sdim/// Check the debug info version number, if it is out-dated, drop the debug 3830261991Sdim/// info. Return true if module is modified. 3831261991Sdimbool llvm::UpgradeDebugInfo(Module &M) { 3832276479Sdim unsigned Version = getDebugMetadataVersionFromModule(M); 3833327952Sdim if (Version == DEBUG_METADATA_VERSION) { 3834327952Sdim bool BrokenDebugInfo = false; 3835327952Sdim if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo)) 3836327952Sdim report_fatal_error("Broken module found, compilation aborted!"); 3837327952Sdim if (!BrokenDebugInfo) 3838327952Sdim // Everything is ok. 3839327952Sdim return false; 3840327952Sdim else { 3841327952Sdim // Diagnose malformed debug info. 3842327952Sdim DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M); 3843327952Sdim M.getContext().diagnose(Diag); 3844327952Sdim } 3845327952Sdim } 3846327952Sdim bool Modified = StripDebugInfo(M); 3847327952Sdim if (Modified && Version != DEBUG_METADATA_VERSION) { 3848327952Sdim // Diagnose a version mismatch. 3849276479Sdim DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); 3850276479Sdim M.getContext().diagnose(DiagVersion); 3851276479Sdim } 3852327952Sdim return Modified; 3853261991Sdim} 3854276479Sdim 3855360784Sdim/// This checks for objc retain release marker which should be upgraded. It 3856360784Sdim/// returns true if module is modified. 3857360784Sdimstatic bool UpgradeRetainReleaseMarker(Module &M) { 3858341825Sdim bool Changed = false; 3859353358Sdim const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker"; 3860353358Sdim NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey); 3861341825Sdim if (ModRetainReleaseMarker) { 3862341825Sdim MDNode *Op = ModRetainReleaseMarker->getOperand(0); 3863341825Sdim if (Op) { 3864341825Sdim MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0)); 3865341825Sdim if (ID) { 3866341825Sdim SmallVector<StringRef, 4> ValueComp; 3867341825Sdim ID->getString().split(ValueComp, "#"); 3868341825Sdim if (ValueComp.size() == 2) { 3869341825Sdim std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str(); 3870353358Sdim ID = MDString::get(M.getContext(), NewValue); 3871341825Sdim } 3872353358Sdim M.addModuleFlag(Module::Error, MarkerKey, ID); 3873353358Sdim M.eraseNamedMetadata(ModRetainReleaseMarker); 3874353358Sdim Changed = true; 3875341825Sdim } 3876341825Sdim } 3877341825Sdim } 3878341825Sdim return Changed; 3879341825Sdim} 3880341825Sdim 3881360784Sdimvoid llvm::UpgradeARCRuntime(Module &M) { 3882360784Sdim // This lambda converts normal function calls to ARC runtime functions to 3883360784Sdim // intrinsic calls. 3884360784Sdim auto UpgradeToIntrinsic = [&](const char *OldFunc, 3885360784Sdim llvm::Intrinsic::ID IntrinsicFunc) { 3886360784Sdim Function *Fn = M.getFunction(OldFunc); 3887360784Sdim 3888360784Sdim if (!Fn) 3889360784Sdim return; 3890360784Sdim 3891360784Sdim Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc); 3892360784Sdim 3893360784Sdim for (auto I = Fn->user_begin(), E = Fn->user_end(); I != E;) { 3894360784Sdim CallInst *CI = dyn_cast<CallInst>(*I++); 3895360784Sdim if (!CI || CI->getCalledFunction() != Fn) 3896360784Sdim continue; 3897360784Sdim 3898360784Sdim IRBuilder<> Builder(CI->getParent(), CI->getIterator()); 3899360784Sdim FunctionType *NewFuncTy = NewFn->getFunctionType(); 3900360784Sdim SmallVector<Value *, 2> Args; 3901360784Sdim 3902360784Sdim // Don't upgrade the intrinsic if it's not valid to bitcast the return 3903360784Sdim // value to the return type of the old function. 3904360784Sdim if (NewFuncTy->getReturnType() != CI->getType() && 3905360784Sdim !CastInst::castIsValid(Instruction::BitCast, CI, 3906360784Sdim NewFuncTy->getReturnType())) 3907360784Sdim continue; 3908360784Sdim 3909360784Sdim bool InvalidCast = false; 3910360784Sdim 3911360784Sdim for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) { 3912360784Sdim Value *Arg = CI->getArgOperand(I); 3913360784Sdim 3914360784Sdim // Bitcast argument to the parameter type of the new function if it's 3915360784Sdim // not a variadic argument. 3916360784Sdim if (I < NewFuncTy->getNumParams()) { 3917360784Sdim // Don't upgrade the intrinsic if it's not valid to bitcast the argument 3918360784Sdim // to the parameter type of the new function. 3919360784Sdim if (!CastInst::castIsValid(Instruction::BitCast, Arg, 3920360784Sdim NewFuncTy->getParamType(I))) { 3921360784Sdim InvalidCast = true; 3922360784Sdim break; 3923360784Sdim } 3924360784Sdim Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I)); 3925360784Sdim } 3926360784Sdim Args.push_back(Arg); 3927360784Sdim } 3928360784Sdim 3929360784Sdim if (InvalidCast) 3930360784Sdim continue; 3931360784Sdim 3932360784Sdim // Create a call instruction that calls the new function. 3933360784Sdim CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args); 3934360784Sdim NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind()); 3935360784Sdim NewCall->setName(CI->getName()); 3936360784Sdim 3937360784Sdim // Bitcast the return value back to the type of the old call. 3938360784Sdim Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType()); 3939360784Sdim 3940360784Sdim if (!CI->use_empty()) 3941360784Sdim CI->replaceAllUsesWith(NewRetVal); 3942360784Sdim CI->eraseFromParent(); 3943360784Sdim } 3944360784Sdim 3945360784Sdim if (Fn->use_empty()) 3946360784Sdim Fn->eraseFromParent(); 3947360784Sdim }; 3948360784Sdim 3949360784Sdim // Unconditionally convert a call to "clang.arc.use" to a call to 3950360784Sdim // "llvm.objc.clang.arc.use". 3951360784Sdim UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use); 3952360784Sdim 3953360784Sdim // Upgrade the retain release marker. If there is no need to upgrade 3954360784Sdim // the marker, that means either the module is already new enough to contain 3955360784Sdim // new intrinsics or it is not ARC. There is no need to upgrade runtime call. 3956360784Sdim if (!UpgradeRetainReleaseMarker(M)) 3957360784Sdim return; 3958360784Sdim 3959360784Sdim std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = { 3960360784Sdim {"objc_autorelease", llvm::Intrinsic::objc_autorelease}, 3961360784Sdim {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop}, 3962360784Sdim {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush}, 3963360784Sdim {"objc_autoreleaseReturnValue", 3964360784Sdim llvm::Intrinsic::objc_autoreleaseReturnValue}, 3965360784Sdim {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak}, 3966360784Sdim {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak}, 3967360784Sdim {"objc_initWeak", llvm::Intrinsic::objc_initWeak}, 3968360784Sdim {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak}, 3969360784Sdim {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained}, 3970360784Sdim {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak}, 3971360784Sdim {"objc_release", llvm::Intrinsic::objc_release}, 3972360784Sdim {"objc_retain", llvm::Intrinsic::objc_retain}, 3973360784Sdim {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease}, 3974360784Sdim {"objc_retainAutoreleaseReturnValue", 3975360784Sdim llvm::Intrinsic::objc_retainAutoreleaseReturnValue}, 3976360784Sdim {"objc_retainAutoreleasedReturnValue", 3977360784Sdim llvm::Intrinsic::objc_retainAutoreleasedReturnValue}, 3978360784Sdim {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock}, 3979360784Sdim {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong}, 3980360784Sdim {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak}, 3981360784Sdim {"objc_unsafeClaimAutoreleasedReturnValue", 3982360784Sdim llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue}, 3983360784Sdim {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject}, 3984360784Sdim {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject}, 3985360784Sdim {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer}, 3986360784Sdim {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease}, 3987360784Sdim {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter}, 3988360784Sdim {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit}, 3989360784Sdim {"objc_arc_annotation_topdown_bbstart", 3990360784Sdim llvm::Intrinsic::objc_arc_annotation_topdown_bbstart}, 3991360784Sdim {"objc_arc_annotation_topdown_bbend", 3992360784Sdim llvm::Intrinsic::objc_arc_annotation_topdown_bbend}, 3993360784Sdim {"objc_arc_annotation_bottomup_bbstart", 3994360784Sdim llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart}, 3995360784Sdim {"objc_arc_annotation_bottomup_bbend", 3996360784Sdim llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}}; 3997360784Sdim 3998360784Sdim for (auto &I : RuntimeFuncs) 3999360784Sdim UpgradeToIntrinsic(I.first, I.second); 4000360784Sdim} 4001360784Sdim 4002309124Sdimbool llvm::UpgradeModuleFlags(Module &M) { 4003322855Sdim NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); 4004309124Sdim if (!ModFlags) 4005309124Sdim return false; 4006309124Sdim 4007322855Sdim bool HasObjCFlag = false, HasClassProperties = false, Changed = false; 4008309124Sdim for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { 4009309124Sdim MDNode *Op = ModFlags->getOperand(I); 4010322855Sdim if (Op->getNumOperands() != 3) 4011309124Sdim continue; 4012309124Sdim MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1)); 4013309124Sdim if (!ID) 4014309124Sdim continue; 4015309124Sdim if (ID->getString() == "Objective-C Image Info Version") 4016309124Sdim HasObjCFlag = true; 4017309124Sdim if (ID->getString() == "Objective-C Class Properties") 4018309124Sdim HasClassProperties = true; 4019322855Sdim // Upgrade PIC/PIE Module Flags. The module flag behavior for these two 4020322855Sdim // field was Error and now they are Max. 4021322855Sdim if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") { 4022322855Sdim if (auto *Behavior = 4023322855Sdim mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) { 4024322855Sdim if (Behavior->getLimitedValue() == Module::Error) { 4025322855Sdim Type *Int32Ty = Type::getInt32Ty(M.getContext()); 4026322855Sdim Metadata *Ops[3] = { 4027322855Sdim ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)), 4028322855Sdim MDString::get(M.getContext(), ID->getString()), 4029322855Sdim Op->getOperand(2)}; 4030322855Sdim ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); 4031322855Sdim Changed = true; 4032322855Sdim } 4033322855Sdim } 4034322855Sdim } 4035326496Sdim // Upgrade Objective-C Image Info Section. Removed the whitespce in the 4036326496Sdim // section name so that llvm-lto will not complain about mismatching 4037326496Sdim // module flags that is functionally the same. 4038326496Sdim if (ID->getString() == "Objective-C Image Info Section") { 4039326496Sdim if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) { 4040326496Sdim SmallVector<StringRef, 4> ValueComp; 4041326496Sdim Value->getString().split(ValueComp, " "); 4042326496Sdim if (ValueComp.size() != 1) { 4043326496Sdim std::string NewValue; 4044326496Sdim for (auto &S : ValueComp) 4045326496Sdim NewValue += S.str(); 4046326496Sdim Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1), 4047326496Sdim MDString::get(M.getContext(), NewValue)}; 4048326496Sdim ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); 4049326496Sdim Changed = true; 4050326496Sdim } 4051326496Sdim } 4052326496Sdim } 4053276479Sdim } 4054322855Sdim 4055309124Sdim // "Objective-C Class Properties" is recently added for Objective-C. We 4056309124Sdim // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module 4057314564Sdim // flag of value 0, so we can correclty downgrade this flag when trying to 4058314564Sdim // link an ObjC bitcode without this module flag with an ObjC bitcode with 4059314564Sdim // this module flag. 4060309124Sdim if (HasObjCFlag && !HasClassProperties) { 4061314564Sdim M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties", 4062309124Sdim (uint32_t)0); 4063322855Sdim Changed = true; 4064309124Sdim } 4065322855Sdim 4066322855Sdim return Changed; 4067276479Sdim} 4068309124Sdim 4069326496Sdimvoid llvm::UpgradeSectionAttributes(Module &M) { 4070326496Sdim auto TrimSpaces = [](StringRef Section) -> std::string { 4071326496Sdim SmallVector<StringRef, 5> Components; 4072326496Sdim Section.split(Components, ','); 4073326496Sdim 4074326496Sdim SmallString<32> Buffer; 4075326496Sdim raw_svector_ostream OS(Buffer); 4076326496Sdim 4077326496Sdim for (auto Component : Components) 4078326496Sdim OS << ',' << Component.trim(); 4079326496Sdim 4080326496Sdim return OS.str().substr(1); 4081326496Sdim }; 4082326496Sdim 4083326496Sdim for (auto &GV : M.globals()) { 4084326496Sdim if (!GV.hasSection()) 4085326496Sdim continue; 4086326496Sdim 4087326496Sdim StringRef Section = GV.getSection(); 4088326496Sdim 4089326496Sdim if (!Section.startswith("__DATA, __objc_catlist")) 4090326496Sdim continue; 4091326496Sdim 4092326496Sdim // __DATA, __objc_catlist, regular, no_dead_strip 4093326496Sdim // __DATA,__objc_catlist,regular,no_dead_strip 4094326496Sdim GV.setSection(TrimSpaces(Section)); 4095326496Sdim } 4096326496Sdim} 4097326496Sdim 4098309124Sdimstatic bool isOldLoopArgument(Metadata *MD) { 4099309124Sdim auto *T = dyn_cast_or_null<MDTuple>(MD); 4100309124Sdim if (!T) 4101309124Sdim return false; 4102309124Sdim if (T->getNumOperands() < 1) 4103309124Sdim return false; 4104309124Sdim auto *S = dyn_cast_or_null<MDString>(T->getOperand(0)); 4105309124Sdim if (!S) 4106309124Sdim return false; 4107309124Sdim return S->getString().startswith("llvm.vectorizer."); 4108309124Sdim} 4109309124Sdim 4110309124Sdimstatic MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) { 4111309124Sdim StringRef OldPrefix = "llvm.vectorizer."; 4112309124Sdim assert(OldTag.startswith(OldPrefix) && "Expected old prefix"); 4113309124Sdim 4114309124Sdim if (OldTag == "llvm.vectorizer.unroll") 4115309124Sdim return MDString::get(C, "llvm.loop.interleave.count"); 4116309124Sdim 4117309124Sdim return MDString::get( 4118309124Sdim C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size())) 4119309124Sdim .str()); 4120309124Sdim} 4121309124Sdim 4122309124Sdimstatic Metadata *upgradeLoopArgument(Metadata *MD) { 4123309124Sdim auto *T = dyn_cast_or_null<MDTuple>(MD); 4124309124Sdim if (!T) 4125309124Sdim return MD; 4126309124Sdim if (T->getNumOperands() < 1) 4127309124Sdim return MD; 4128309124Sdim auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0)); 4129309124Sdim if (!OldTag) 4130309124Sdim return MD; 4131309124Sdim if (!OldTag->getString().startswith("llvm.vectorizer.")) 4132309124Sdim return MD; 4133309124Sdim 4134309124Sdim // This has an old tag. Upgrade it. 4135309124Sdim SmallVector<Metadata *, 8> Ops; 4136309124Sdim Ops.reserve(T->getNumOperands()); 4137309124Sdim Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString())); 4138309124Sdim for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I) 4139309124Sdim Ops.push_back(T->getOperand(I)); 4140309124Sdim 4141309124Sdim return MDTuple::get(T->getContext(), Ops); 4142309124Sdim} 4143309124Sdim 4144309124SdimMDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { 4145309124Sdim auto *T = dyn_cast<MDTuple>(&N); 4146309124Sdim if (!T) 4147309124Sdim return &N; 4148309124Sdim 4149314564Sdim if (none_of(T->operands(), isOldLoopArgument)) 4150309124Sdim return &N; 4151309124Sdim 4152309124Sdim SmallVector<Metadata *, 8> Ops; 4153309124Sdim Ops.reserve(T->getNumOperands()); 4154309124Sdim for (Metadata *MD : T->operands()) 4155309124Sdim Ops.push_back(upgradeLoopArgument(MD)); 4156309124Sdim 4157309124Sdim return MDTuple::get(T->getContext(), Ops); 4158309124Sdim} 4159360784Sdim 4160360784Sdimstd::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { 4161360784Sdim std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64"; 4162360784Sdim 4163360784Sdim // If X86, and the datalayout matches the expected format, add pointer size 4164360784Sdim // address spaces to the datalayout. 4165360784Sdim if (!Triple(TT).isX86() || DL.contains(AddrSpaces)) 4166360784Sdim return DL; 4167360784Sdim 4168360784Sdim SmallVector<StringRef, 4> Groups; 4169360784Sdim Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)"); 4170360784Sdim if (!R.match(DL, &Groups)) 4171360784Sdim return DL; 4172360784Sdim 4173360784Sdim SmallString<1024> Buf; 4174360784Sdim std::string Res = (Groups[1] + AddrSpaces + Groups[3]).toStringRef(Buf).str(); 4175360784Sdim return Res; 4176360784Sdim} 4177360784Sdim 4178360784Sdimvoid llvm::UpgradeFramePointerAttributes(AttrBuilder &B) { 4179360784Sdim StringRef FramePointer; 4180360784Sdim if (B.contains("no-frame-pointer-elim")) { 4181360784Sdim // The value can be "true" or "false". 4182360784Sdim for (const auto &I : B.td_attrs()) 4183360784Sdim if (I.first == "no-frame-pointer-elim") 4184360784Sdim FramePointer = I.second == "true" ? "all" : "none"; 4185360784Sdim B.removeAttribute("no-frame-pointer-elim"); 4186360784Sdim } 4187360784Sdim if (B.contains("no-frame-pointer-elim-non-leaf")) { 4188360784Sdim // The value is ignored. "no-frame-pointer-elim"="true" takes priority. 4189360784Sdim if (FramePointer != "all") 4190360784Sdim FramePointer = "non-leaf"; 4191360784Sdim B.removeAttribute("no-frame-pointer-elim-non-leaf"); 4192360784Sdim } 4193360784Sdim 4194360784Sdim if (!FramePointer.empty()) 4195360784Sdim B.addAttribute("frame-pointer", FramePointer); 4196360784Sdim} 4197