1170 // Add all the dag nodes to the worklist. 1171 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 1172 E = DAG.allnodes_end(); I != E; ++I) 1173 AddToWorklist(I); 1174 1175 // Create a dummy node (which is not added to allnodes), that adds a reference 1176 // to the root node, preventing it from being deleted, and tracking any 1177 // changes of the root. 1178 HandleSDNode Dummy(DAG.getRoot()); 1179 1180 // while the worklist isn't empty, find a node and 1181 // try and combine it. 1182 while (!WorklistMap.empty()) { 1183 SDNode *N; 1184 // The Worklist holds the SDNodes in order, but it may contain null entries. 1185 do { 1186 N = Worklist.pop_back_val(); 1187 } while (!N); 1188 1189 bool GoodWorklistEntry = WorklistMap.erase(N); 1190 (void)GoodWorklistEntry; 1191 assert(GoodWorklistEntry && 1192 "Found a worklist entry without a corresponding map entry!"); 1193 1194 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1195 // N is deleted from the DAG, since they too may now be dead or may have a 1196 // reduced number of uses, allowing other xforms. 1197 if (recursivelyDeleteUnusedNodes(N)) 1198 continue; 1199 1200 WorklistRemover DeadNodes(*this); 1201 1202 // If this combine is running after legalizing the DAG, re-legalize any 1203 // nodes pulled off the worklist. 1204 if (Level == AfterLegalizeDAG) { 1205 SmallSetVector<SDNode *, 16> UpdatedNodes; 1206 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); 1207 1208 for (SDNode *LN : UpdatedNodes) { 1209 AddToWorklist(LN); 1210 AddUsersToWorklist(LN); 1211 } 1212 if (!NIsValid) 1213 continue; 1214 } 1215 1216 DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); 1217 1218 // Add any operands of the new node which have not yet been combined to the 1219 // worklist as well. Because the worklist uniques things already, this 1220 // won't repeatedly process the same operand. 1221 CombinedNodes.insert(N); 1222 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1223 if (!CombinedNodes.count(N->getOperand(i).getNode())) 1224 AddToWorklist(N->getOperand(i).getNode()); 1225 1226 SDValue RV = combine(N); 1227 1228 if (!RV.getNode()) 1229 continue; 1230 1231 ++NodesCombined; 1232 1233 // If we get back the same node we passed in, rather than a new node or 1234 // zero, we know that the node must have defined multiple values and 1235 // CombineTo was used. Since CombineTo takes care of the worklist 1236 // mechanics for us, we have no work to do in this case. 1237 if (RV.getNode() == N) 1238 continue; 1239 1240 assert(N->getOpcode() != ISD::DELETED_NODE && 1241 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 1242 "Node was deleted but visit returned new node!"); 1243 1244 DEBUG(dbgs() << " ... into: "; 1245 RV.getNode()->dump(&DAG)); 1246 1247 // Transfer debug value. 1248 DAG.TransferDbgValues(SDValue(N, 0), RV); 1249 if (N->getNumValues() == RV.getNode()->getNumValues()) 1250 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1251 else { 1252 assert(N->getValueType(0) == RV.getValueType() && 1253 N->getNumValues() == 1 && "Type mismatch"); 1254 SDValue OpV = RV; 1255 DAG.ReplaceAllUsesWith(N, &OpV); 1256 } 1257 1258 // Push the new node and any users onto the worklist 1259 AddToWorklist(RV.getNode()); 1260 AddUsersToWorklist(RV.getNode()); 1261 1262 // Finally, if the node is now dead, remove it from the graph. The node 1263 // may not be dead if the replacement process recursively simplified to 1264 // something else needing this node. This will also take care of adding any 1265 // operands which have lost a user to the worklist. 1266 recursivelyDeleteUnusedNodes(N); 1267 } 1268 1269 // If the root changed (e.g. it was a dead load, update the root). 1270 DAG.setRoot(Dummy.getValue()); 1271 DAG.RemoveDeadNodes(); 1272} 1273 1274SDValue DAGCombiner::visit(SDNode *N) { 1275 switch (N->getOpcode()) { 1276 default: break; 1277 case ISD::TokenFactor: return visitTokenFactor(N); 1278 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1279 case ISD::ADD: return visitADD(N); 1280 case ISD::SUB: return visitSUB(N); 1281 case ISD::ADDC: return visitADDC(N); 1282 case ISD::SUBC: return visitSUBC(N); 1283 case ISD::ADDE: return visitADDE(N); 1284 case ISD::SUBE: return visitSUBE(N); 1285 case ISD::MUL: return visitMUL(N); 1286 case ISD::SDIV: return visitSDIV(N); 1287 case ISD::UDIV: return visitUDIV(N); 1288 case ISD::SREM: return visitSREM(N); 1289 case ISD::UREM: return visitUREM(N); 1290 case ISD::MULHU: return visitMULHU(N); 1291 case ISD::MULHS: return visitMULHS(N); 1292 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1293 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1294 case ISD::SMULO: return visitSMULO(N); 1295 case ISD::UMULO: return visitUMULO(N); 1296 case ISD::SDIVREM: return visitSDIVREM(N); 1297 case ISD::UDIVREM: return visitUDIVREM(N); 1298 case ISD::AND: return visitAND(N); 1299 case ISD::OR: return visitOR(N); 1300 case ISD::XOR: return visitXOR(N); 1301 case ISD::SHL: return visitSHL(N); 1302 case ISD::SRA: return visitSRA(N); 1303 case ISD::SRL: return visitSRL(N); 1304 case ISD::ROTR: 1305 case ISD::ROTL: return visitRotate(N); 1306 case ISD::CTLZ: return visitCTLZ(N); 1307 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1308 case ISD::CTTZ: return visitCTTZ(N); 1309 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1310 case ISD::CTPOP: return visitCTPOP(N); 1311 case ISD::SELECT: return visitSELECT(N); 1312 case ISD::VSELECT: return visitVSELECT(N); 1313 case ISD::SELECT_CC: return visitSELECT_CC(N); 1314 case ISD::SETCC: return visitSETCC(N); 1315 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1316 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1317 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1318 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1319 case ISD::TRUNCATE: return visitTRUNCATE(N); 1320 case ISD::BITCAST: return visitBITCAST(N); 1321 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1322 case ISD::FADD: return visitFADD(N); 1323 case ISD::FSUB: return visitFSUB(N); 1324 case ISD::FMUL: return visitFMUL(N); 1325 case ISD::FMA: return visitFMA(N); 1326 case ISD::FDIV: return visitFDIV(N); 1327 case ISD::FREM: return visitFREM(N); 1328 case ISD::FSQRT: return visitFSQRT(N); 1329 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1330 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1331 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1332 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1333 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1334 case ISD::FP_ROUND: return visitFP_ROUND(N); 1335 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1336 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1337 case ISD::FNEG: return visitFNEG(N); 1338 case ISD::FABS: return visitFABS(N); 1339 case ISD::FFLOOR: return visitFFLOOR(N); 1340 case ISD::FMINNUM: return visitFMINNUM(N); 1341 case ISD::FMAXNUM: return visitFMAXNUM(N); 1342 case ISD::FCEIL: return visitFCEIL(N); 1343 case ISD::FTRUNC: return visitFTRUNC(N); 1344 case ISD::BRCOND: return visitBRCOND(N); 1345 case ISD::BR_CC: return visitBR_CC(N); 1346 case ISD::LOAD: return visitLOAD(N); 1347 case ISD::STORE: return visitSTORE(N); 1348 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1349 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1350 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1351 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1352 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1353 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1354 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); 1355 case ISD::MLOAD: return visitMLOAD(N); 1356 case ISD::MSTORE: return visitMSTORE(N); 1357 } 1358 return SDValue(); 1359} 1360 1361SDValue DAGCombiner::combine(SDNode *N) { 1362 SDValue RV = visit(N); 1363 1364 // If nothing happened, try a target-specific DAG combine. 1365 if (!RV.getNode()) { 1366 assert(N->getOpcode() != ISD::DELETED_NODE && 1367 "Node was deleted but visit returned NULL!"); 1368 1369 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1370 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1371 1372 // Expose the DAG combiner to the target combiner impls. 1373 TargetLowering::DAGCombinerInfo 1374 DagCombineInfo(DAG, Level, false, this); 1375 1376 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1377 } 1378 } 1379 1380 // If nothing happened still, try promoting the operation. 1381 if (!RV.getNode()) { 1382 switch (N->getOpcode()) { 1383 default: break; 1384 case ISD::ADD: 1385 case ISD::SUB: 1386 case ISD::MUL: 1387 case ISD::AND: 1388 case ISD::OR: 1389 case ISD::XOR: 1390 RV = PromoteIntBinOp(SDValue(N, 0)); 1391 break; 1392 case ISD::SHL: 1393 case ISD::SRA: 1394 case ISD::SRL: 1395 RV = PromoteIntShiftOp(SDValue(N, 0)); 1396 break; 1397 case ISD::SIGN_EXTEND: 1398 case ISD::ZERO_EXTEND: 1399 case ISD::ANY_EXTEND: 1400 RV = PromoteExtend(SDValue(N, 0)); 1401 break; 1402 case ISD::LOAD: 1403 if (PromoteLoad(SDValue(N, 0))) 1404 RV = SDValue(N, 0); 1405 break; 1406 } 1407 } 1408 1409 // If N is a commutative binary node, try commuting it to enable more 1410 // sdisel CSE. 1411 if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 1412 N->getNumValues() == 1) { 1413 SDValue N0 = N->getOperand(0); 1414 SDValue N1 = N->getOperand(1); 1415 1416 // Constant operands are canonicalized to RHS. 1417 if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { 1418 SDValue Ops[] = {N1, N0}; 1419 SDNode *CSENode; 1420 if (const BinaryWithFlagsSDNode *BinNode = 1421 dyn_cast<BinaryWithFlagsSDNode>(N)) { 1422 CSENode = DAG.getNodeIfExists( 1423 N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(), 1424 BinNode->hasNoSignedWrap(), BinNode->isExact()); 1425 } else { 1426 CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); 1427 } 1428 if (CSENode) 1429 return SDValue(CSENode, 0); 1430 } 1431 } 1432 1433 return RV; 1434} 1435 1436/// Given a node, return its input chain if it has one, otherwise return a null 1437/// sd operand. 1438static SDValue getInputChainForNode(SDNode *N) { 1439 if (unsigned NumOps = N->getNumOperands()) { 1440 if (N->getOperand(0).getValueType() == MVT::Other) 1441 return N->getOperand(0); 1442 if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1443 return N->getOperand(NumOps-1); 1444 for (unsigned i = 1; i < NumOps-1; ++i) 1445 if (N->getOperand(i).getValueType() == MVT::Other) 1446 return N->getOperand(i); 1447 } 1448 return SDValue(); 1449} 1450 1451SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1452 // If N has two operands, where one has an input chain equal to the other, 1453 // the 'other' chain is redundant. 1454 if (N->getNumOperands() == 2) { 1455 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1456 return N->getOperand(0); 1457 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1458 return N->getOperand(1); 1459 } 1460 1461 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1462 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1463 SmallPtrSet<SDNode*, 16> SeenOps; 1464 bool Changed = false; // If we should replace this token factor. 1465 1466 // Start out with this token factor. 1467 TFs.push_back(N); 1468 1469 // Iterate through token factors. The TFs grows when new token factors are 1470 // encountered. 1471 for (unsigned i = 0; i < TFs.size(); ++i) { 1472 SDNode *TF = TFs[i]; 1473 1474 // Check each of the operands. 1475 for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { 1476 SDValue Op = TF->getOperand(i); 1477 1478 switch (Op.getOpcode()) { 1479 case ISD::EntryToken: 1480 // Entry tokens don't need to be added to the list. They are 1481 // rededundant. 1482 Changed = true; 1483 break; 1484 1485 case ISD::TokenFactor: 1486 if (Op.hasOneUse() && 1487 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 1488 // Queue up for processing. 1489 TFs.push_back(Op.getNode()); 1490 // Clean up in case the token factor is removed. 1491 AddToWorklist(Op.getNode()); 1492 Changed = true; 1493 break; 1494 } 1495 // Fall thru 1496 1497 default: 1498 // Only add if it isn't already in the list. 1499 if (SeenOps.insert(Op.getNode()).second) 1500 Ops.push_back(Op); 1501 else 1502 Changed = true; 1503 break; 1504 } 1505 } 1506 } 1507 1508 SDValue Result; 1509 1510 // If we've change things around then replace token factor. 1511 if (Changed) { 1512 if (Ops.empty()) { 1513 // The entry token is the only possible outcome. 1514 Result = DAG.getEntryNode(); 1515 } else { 1516 // New and improved token factor. 1517 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); 1518 } 1519 1520 // Don't add users to work list. 1521 return CombineTo(N, Result, false); 1522 } 1523 1524 return Result; 1525} 1526 1527/// MERGE_VALUES can always be eliminated. 1528SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1529 WorklistRemover DeadNodes(*this); 1530 // Replacing results may cause a different MERGE_VALUES to suddenly 1531 // be CSE'd with N, and carry its uses with it. Iterate until no 1532 // uses remain, to ensure that the node can be safely deleted. 1533 // First add the users of this node to the work list so that they 1534 // can be tried again once they have new operands. 1535 AddUsersToWorklist(N); 1536 do { 1537 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1538 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); 1539 } while (!N->use_empty()); 1540 deleteAndRecombine(N); 1541 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1542} 1543 1544SDValue DAGCombiner::visitADD(SDNode *N) { 1545 SDValue N0 = N->getOperand(0); 1546 SDValue N1 = N->getOperand(1); 1547 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1548 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1549 EVT VT = N0.getValueType(); 1550 1551 // fold vector ops 1552 if (VT.isVector()) { 1553 SDValue FoldedVOp = SimplifyVBinOp(N); 1554 if (FoldedVOp.getNode()) return FoldedVOp; 1555 1556 // fold (add x, 0) -> x, vector edition 1557 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1558 return N0; 1559 if (ISD::isBuildVectorAllZeros(N0.getNode())) 1560 return N1; 1561 } 1562 1563 // fold (add x, undef) -> undef 1564 if (N0.getOpcode() == ISD::UNDEF) 1565 return N0; 1566 if (N1.getOpcode() == ISD::UNDEF) 1567 return N1; 1568 // fold (add c1, c2) -> c1+c2 1569 if (N0C && N1C) 1570 return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); 1571 // canonicalize constant to RHS 1572 if (N0C && !N1C) 1573 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); 1574 // fold (add x, 0) -> x 1575 if (N1C && N1C->isNullValue()) 1576 return N0; 1577 // fold (add Sym, c) -> Sym+c 1578 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1579 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && 1580 GA->getOpcode() == ISD::GlobalAddress) 1581 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1582 GA->getOffset() + 1583 (uint64_t)N1C->getSExtValue()); 1584 // fold ((c1-A)+c2) -> (c1+c2)-A 1585 if (N1C && N0.getOpcode() == ISD::SUB) 1586 if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) 1587 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1588 DAG.getConstant(N1C->getAPIntValue()+ 1589 N0C->getAPIntValue(), VT), 1590 N0.getOperand(1)); 1591 // reassociate add 1592 SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); 1593 if (RADD.getNode()) 1594 return RADD; 1595 // fold ((0-A) + B) -> B-A 1596 if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && 1597 cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) 1598 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); 1599 // fold (A + (0-B)) -> A-B 1600 if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && 1601 cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) 1602 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); 1603 // fold (A+(B-A)) -> B 1604 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 1605 return N1.getOperand(0); 1606 // fold ((B-A)+A) -> B 1607 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 1608 return N0.getOperand(0); 1609 // fold (A+(B-(A+C))) to (B-C) 1610 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1611 N0 == N1.getOperand(1).getOperand(0)) 1612 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1613 N1.getOperand(1).getOperand(1)); 1614 // fold (A+(B-(C+A))) to (B-C) 1615 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1616 N0 == N1.getOperand(1).getOperand(1)) 1617 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1618 N1.getOperand(1).getOperand(0)); 1619 // fold (A+((B-A)+or-C)) to (B+or-C) 1620 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 1621 N1.getOperand(0).getOpcode() == ISD::SUB && 1622 N0 == N1.getOperand(0).getOperand(1)) 1623 return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, 1624 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 1625 1626 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 1627 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 1628 SDValue N00 = N0.getOperand(0); 1629 SDValue N01 = N0.getOperand(1); 1630 SDValue N10 = N1.getOperand(0); 1631 SDValue N11 = N1.getOperand(1); 1632 1633 if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) 1634 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1635 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), 1636 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); 1637 } 1638 1639 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 1640 return SDValue(N, 0); 1641 1642 // fold (a+b) -> (a|b) iff a and b share no bits. 1643 if (VT.isInteger() && !VT.isVector()) { 1644 APInt LHSZero, LHSOne; 1645 APInt RHSZero, RHSOne; 1646 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1647 1648 if (LHSZero.getBoolValue()) { 1649 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1650 1651 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1652 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1653 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){ 1654 if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) 1655 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); 1656 } 1657 } 1658 } 1659 1660 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 1661 if (N1.getOpcode() == ISD::SHL && 1662 N1.getOperand(0).getOpcode() == ISD::SUB) 1663 if (ConstantSDNode *C = 1664 dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) 1665 if (C->getAPIntValue() == 0) 1666 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, 1667 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1668 N1.getOperand(0).getOperand(1), 1669 N1.getOperand(1))); 1670 if (N0.getOpcode() == ISD::SHL && 1671 N0.getOperand(0).getOpcode() == ISD::SUB) 1672 if (ConstantSDNode *C = 1673 dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) 1674 if (C->getAPIntValue() == 0) 1675 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, 1676 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1677 N0.getOperand(0).getOperand(1), 1678 N0.getOperand(1))); 1679 1680 if (N1.getOpcode() == ISD::AND) { 1681 SDValue AndOp0 = N1.getOperand(0); 1682 ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 1683 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 1684 unsigned DestBits = VT.getScalarType().getSizeInBits(); 1685 1686 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 1687 // and similar xforms where the inner op is either ~0 or 0. 1688 if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { 1689 SDLoc DL(N); 1690 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); 1691 } 1692 } 1693 1694 // add (sext i1), X -> sub X, (zext i1) 1695 if (N0.getOpcode() == ISD::SIGN_EXTEND && 1696 N0.getOperand(0).getValueType() == MVT::i1 && 1697 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 1698 SDLoc DL(N); 1699 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 1700 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 1701 } 1702 1703 // add X, (sextinreg Y i1) -> sub X, (and Y 1) 1704 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1705 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 1706 if (TN->getVT() == MVT::i1) { 1707 SDLoc DL(N); 1708 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1709 DAG.getConstant(1, VT)); 1710 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); 1711 } 1712 } 1713 1714 return SDValue(); 1715} 1716 1717SDValue DAGCombiner::visitADDC(SDNode *N) { 1718 SDValue N0 = N->getOperand(0); 1719 SDValue N1 = N->getOperand(1); 1720 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1721 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1722 EVT VT = N0.getValueType(); 1723 1724 // If the flag result is dead, turn this into an ADD. 1725 if (!N->hasAnyUseOfValue(1)) 1726 return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), 1727 DAG.getNode(ISD::CARRY_FALSE, 1728 SDLoc(N), MVT::Glue)); 1729 1730 // canonicalize constant to RHS. 1731 if (N0C && !N1C) 1732 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); 1733 1734 // fold (addc x, 0) -> x + no carry out 1735 if (N1C && N1C->isNullValue()) 1736 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 1737 SDLoc(N), MVT::Glue)); 1738 1739 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 1740 APInt LHSZero, LHSOne; 1741 APInt RHSZero, RHSOne; 1742 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1743 1744 if (LHSZero.getBoolValue()) { 1745 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1746 1747 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1748 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1749 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 1750 return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), 1751 DAG.getNode(ISD::CARRY_FALSE, 1752 SDLoc(N), MVT::Glue)); 1753 } 1754 1755 return SDValue(); 1756} 1757 1758SDValue DAGCombiner::visitADDE(SDNode *N) { 1759 SDValue N0 = N->getOperand(0); 1760 SDValue N1 = N->getOperand(1); 1761 SDValue CarryIn = N->getOperand(2); 1762 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1763 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1764 1765 // canonicalize constant to RHS 1766 if (N0C && !N1C) 1767 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), 1768 N1, N0, CarryIn); 1769 1770 // fold (adde x, y, false) -> (addc x, y) 1771 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1772 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); 1773 1774 return SDValue(); 1775} 1776 1777// Since it may not be valid to emit a fold to zero for vector initializers 1778// check if we can before folding. 1779static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, 1780 SelectionDAG &DAG, 1781 bool LegalOperations, bool LegalTypes) { 1782 if (!VT.isVector()) 1783 return DAG.getConstant(0, VT); 1784 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 1785 return DAG.getConstant(0, VT); 1786 return SDValue(); 1787} 1788 1789SDValue DAGCombiner::visitSUB(SDNode *N) { 1790 SDValue N0 = N->getOperand(0); 1791 SDValue N1 = N->getOperand(1); 1792 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1793 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1794 ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : 1795 dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); 1796 EVT VT = N0.getValueType(); 1797 1798 // fold vector ops 1799 if (VT.isVector()) { 1800 SDValue FoldedVOp = SimplifyVBinOp(N); 1801 if (FoldedVOp.getNode()) return FoldedVOp; 1802 1803 // fold (sub x, 0) -> x, vector edition 1804 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1805 return N0; 1806 } 1807 1808 // fold (sub x, x) -> 0 1809 // FIXME: Refactor this and xor and other similar operations together. 1810 if (N0 == N1) 1811 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 1812 // fold (sub c1, c2) -> c1-c2 1813 if (N0C && N1C) 1814 return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); 1815 // fold (sub x, c) -> (add x, -c) 1816 if (N1C) 1817 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, 1818 DAG.getConstant(-N1C->getAPIntValue(), VT)); 1819 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 1820 if (N0C && N0C->isAllOnesValue()) 1821 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 1822 // fold A-(A-B) -> B 1823 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 1824 return N1.getOperand(1); 1825 // fold (A+B)-A -> B 1826 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 1827 return N0.getOperand(1); 1828 // fold (A+B)-B -> A 1829 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 1830 return N0.getOperand(0); 1831 // fold C2-(A+C1) -> (C2-C1)-A 1832 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { 1833 SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), 1834 VT); 1835 return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC, 1836 N1.getOperand(0)); 1837 } 1838 // fold ((A+(B+or-C))-B) -> A+or-C 1839 if (N0.getOpcode() == ISD::ADD && 1840 (N0.getOperand(1).getOpcode() == ISD::SUB || 1841 N0.getOperand(1).getOpcode() == ISD::ADD) && 1842 N0.getOperand(1).getOperand(0) == N1) 1843 return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, 1844 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 1845 // fold ((A+(C+B))-B) -> A+C 1846 if (N0.getOpcode() == ISD::ADD && 1847 N0.getOperand(1).getOpcode() == ISD::ADD && 1848 N0.getOperand(1).getOperand(1) == N1) 1849 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 1850 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1851 // fold ((A-(B-C))-C) -> A-B 1852 if (N0.getOpcode() == ISD::SUB && 1853 N0.getOperand(1).getOpcode() == ISD::SUB && 1854 N0.getOperand(1).getOperand(1) == N1) 1855 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1856 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1857 1858 // If either operand of a sub is undef, the result is undef 1859 if (N0.getOpcode() == ISD::UNDEF) 1860 return N0; 1861 if (N1.getOpcode() == ISD::UNDEF) 1862 return N1; 1863 1864 // If the relocation model supports it, consider symbol offsets. 1865 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1866 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 1867 // fold (sub Sym, c) -> Sym-c 1868 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 1869 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1870 GA->getOffset() - 1871 (uint64_t)N1C->getSExtValue()); 1872 // fold (sub Sym+c1, Sym+c2) -> c1-c2 1873 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 1874 if (GA->getGlobal() == GB->getGlobal()) 1875 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 1876 VT); 1877 } 1878 1879 // sub X, (sextinreg Y i1) -> add X, (and Y 1) 1880 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1881 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 1882 if (TN->getVT() == MVT::i1) { 1883 SDLoc DL(N); 1884 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1885 DAG.getConstant(1, VT)); 1886 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); 1887 } 1888 } 1889 1890 return SDValue(); 1891} 1892 1893SDValue DAGCombiner::visitSUBC(SDNode *N) { 1894 SDValue N0 = N->getOperand(0); 1895 SDValue N1 = N->getOperand(1); 1896 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1897 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1898 EVT VT = N0.getValueType(); 1899 1900 // If the flag result is dead, turn this into an SUB. 1901 if (!N->hasAnyUseOfValue(1)) 1902 return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1), 1903 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1904 MVT::Glue)); 1905 1906 // fold (subc x, x) -> 0 + no borrow 1907 if (N0 == N1) 1908 return CombineTo(N, DAG.getConstant(0, VT), 1909 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1910 MVT::Glue)); 1911 1912 // fold (subc x, 0) -> x + no borrow 1913 if (N1C && N1C->isNullValue()) 1914 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1915 MVT::Glue)); 1916 1917 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 1918 if (N0C && N0C->isAllOnesValue()) 1919 return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), 1920 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1921 MVT::Glue)); 1922 1923 return SDValue(); 1924} 1925 1926SDValue DAGCombiner::visitSUBE(SDNode *N) { 1927 SDValue N0 = N->getOperand(0); 1928 SDValue N1 = N->getOperand(1); 1929 SDValue CarryIn = N->getOperand(2); 1930 1931 // fold (sube x, y, false) -> (subc x, y) 1932 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1933 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); 1934 1935 return SDValue(); 1936} 1937 1938SDValue DAGCombiner::visitMUL(SDNode *N) { 1939 SDValue N0 = N->getOperand(0); 1940 SDValue N1 = N->getOperand(1); 1941 EVT VT = N0.getValueType(); 1942 1943 // fold (mul x, undef) -> 0 1944 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 1945 return DAG.getConstant(0, VT); 1946 1947 bool N0IsConst = false; 1948 bool N1IsConst = false; 1949 APInt ConstValue0, ConstValue1; 1950 // fold vector ops 1951 if (VT.isVector()) { 1952 SDValue FoldedVOp = SimplifyVBinOp(N); 1953 if (FoldedVOp.getNode()) return FoldedVOp; 1954 1955 N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); 1956 N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); 1957 } else { 1958 N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr; 1959 ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() 1960 : APInt(); 1961 N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr; 1962 ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() 1963 : APInt(); 1964 } 1965 1966 // fold (mul c1, c2) -> c1*c2 1967 if (N0IsConst && N1IsConst) 1968 return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode()); 1969 1970 // canonicalize constant to RHS 1971 if (N0IsConst && !N1IsConst) 1972 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); 1973 // fold (mul x, 0) -> 0 1974 if (N1IsConst && ConstValue1 == 0) 1975 return N1; 1976 // We require a splat of the entire scalar bit width for non-contiguous 1977 // bit patterns. 1978 bool IsFullSplat = 1979 ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); 1980 // fold (mul x, 1) -> x 1981 if (N1IsConst && ConstValue1 == 1 && IsFullSplat) 1982 return N0; 1983 // fold (mul x, -1) -> 0-x 1984 if (N1IsConst && ConstValue1.isAllOnesValue()) 1985 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1986 DAG.getConstant(0, VT), N0); 1987 // fold (mul x, (1 << c)) -> x << c 1988 if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) 1989 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, 1990 DAG.getConstant(ConstValue1.logBase2(), 1991 getShiftAmountTy(N0.getValueType()))); 1992 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 1993 if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { 1994 unsigned Log2Val = (-ConstValue1).logBase2(); 1995 // FIXME: If the input is something that is easily negated (e.g. a 1996 // single-use add), we should put the negate there. 1997 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1998 DAG.getConstant(0, VT), 1999 DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, 2000 DAG.getConstant(Log2Val, 2001 getShiftAmountTy(N0.getValueType())))); 2002 } 2003 2004 APInt Val; 2005 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 2006 if (N1IsConst && N0.getOpcode() == ISD::SHL && 2007 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2008 isa<ConstantSDNode>(N0.getOperand(1)))) { 2009 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, 2010 N1, N0.getOperand(1)); 2011 AddToWorklist(C3.getNode()); 2012 return DAG.getNode(ISD::MUL, SDLoc(N), VT, 2013 N0.getOperand(0), C3); 2014 } 2015 2016 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 2017 // use. 2018 { 2019 SDValue Sh(nullptr,0), Y(nullptr,0); 2020 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 2021 if (N0.getOpcode() == ISD::SHL && 2022 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2023 isa<ConstantSDNode>(N0.getOperand(1))) && 2024 N0.getNode()->hasOneUse()) { 2025 Sh = N0; Y = N1; 2026 } else if (N1.getOpcode() == ISD::SHL && 2027 isa<ConstantSDNode>(N1.getOperand(1)) && 2028 N1.getNode()->hasOneUse()) { 2029 Sh = N1; Y = N0; 2030 } 2031 2032 if (Sh.getNode()) { 2033 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2034 Sh.getOperand(0), Y); 2035 return DAG.getNode(ISD::SHL, SDLoc(N), VT, 2036 Mul, Sh.getOperand(1)); 2037 } 2038 } 2039 2040 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 2041 if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 2042 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2043 isa<ConstantSDNode>(N0.getOperand(1)))) 2044 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 2045 DAG.getNode(ISD::MUL, SDLoc(N0), VT, 2046 N0.getOperand(0), N1), 2047 DAG.getNode(ISD::MUL, SDLoc(N1), VT, 2048 N0.getOperand(1), N1)); 2049 2050 // reassociate mul 2051 SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); 2052 if (RMUL.getNode()) 2053 return RMUL; 2054 2055 return SDValue(); 2056} 2057 2058SDValue DAGCombiner::visitSDIV(SDNode *N) { 2059 SDValue N0 = N->getOperand(0); 2060 SDValue N1 = N->getOperand(1); 2061 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2062 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2063 EVT VT = N->getValueType(0); 2064 2065 // fold vector ops 2066 if (VT.isVector()) { 2067 SDValue FoldedVOp = SimplifyVBinOp(N); 2068 if (FoldedVOp.getNode()) return FoldedVOp; 2069 } 2070 2071 // fold (sdiv c1, c2) -> c1/c2 2072 if (N0C && N1C && !N1C->isNullValue()) 2073 return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); 2074 // fold (sdiv X, 1) -> X 2075 if (N1C && N1C->getAPIntValue() == 1LL) 2076 return N0; 2077 // fold (sdiv X, -1) -> 0-X 2078 if (N1C && N1C->isAllOnesValue()) 2079 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 2080 DAG.getConstant(0, VT), N0); 2081 // If we know the sign bits of both operands are zero, strength reduce to a 2082 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 2083 if (!VT.isVector()) { 2084 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2085 return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), 2086 N0, N1); 2087 } 2088 2089 // fold (sdiv X, pow2) -> simple ops after legalize 2090 if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() || 2091 (-N1C->getAPIntValue()).isPowerOf2())) { 2092 // If dividing by powers of two is cheap, then don't perform the following 2093 // fold. 2094 if (TLI.isPow2SDivCheap()) 2095 return SDValue(); 2096 2097 // Target-specific implementation of sdiv x, pow2. 2098 SDValue Res = BuildSDIVPow2(N); 2099 if (Res.getNode()) 2100 return Res; 2101 2102 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); 2103 2104 // Splat the sign bit into the register 2105 SDValue SGN = 2106 DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, 2107 DAG.getConstant(VT.getScalarSizeInBits() - 1, 2108 getShiftAmountTy(N0.getValueType()))); 2109 AddToWorklist(SGN.getNode()); 2110 2111 // Add (N0 < 0) ? abs2 - 1 : 0; 2112 SDValue SRL = 2113 DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, 2114 DAG.getConstant(VT.getScalarSizeInBits() - lg2, 2115 getShiftAmountTy(SGN.getValueType()))); 2116 SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); 2117 AddToWorklist(SRL.getNode()); 2118 AddToWorklist(ADD.getNode()); // Divide by pow2 2119 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, 2120 DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); 2121 2122 // If we're dividing by a positive value, we're done. Otherwise, we must 2123 // negate the result. 2124 if (N1C->getAPIntValue().isNonNegative()) 2125 return SRA; 2126 2127 AddToWorklist(SRA.getNode()); 2128 return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); 2129 } 2130 2131 // if integer divide is expensive and we satisfy the requirements, emit an 2132 // alternate sequence. 2133 if (N1C && !TLI.isIntDivCheap()) { 2134 SDValue Op = BuildSDIV(N); 2135 if (Op.getNode()) return Op; 2136 } 2137 2138 // undef / X -> 0 2139 if (N0.getOpcode() == ISD::UNDEF) 2140 return DAG.getConstant(0, VT); 2141 // X / undef -> undef 2142 if (N1.getOpcode() == ISD::UNDEF) 2143 return N1; 2144 2145 return SDValue(); 2146} 2147 2148SDValue DAGCombiner::visitUDIV(SDNode *N) { 2149 SDValue N0 = N->getOperand(0); 2150 SDValue N1 = N->getOperand(1); 2151 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2152 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2153 EVT VT = N->getValueType(0); 2154 2155 // fold vector ops 2156 if (VT.isVector()) { 2157 SDValue FoldedVOp = SimplifyVBinOp(N); 2158 if (FoldedVOp.getNode()) return FoldedVOp; 2159 } 2160 2161 // fold (udiv c1, c2) -> c1/c2 2162 if (N0C && N1C && !N1C->isNullValue()) 2163 return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); 2164 // fold (udiv x, (1 << c)) -> x >>u c 2165 if (N1C && N1C->getAPIntValue().isPowerOf2()) 2166 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, 2167 DAG.getConstant(N1C->getAPIntValue().logBase2(), 2168 getShiftAmountTy(N0.getValueType()))); 2169 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 2170 if (N1.getOpcode() == ISD::SHL) { 2171 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 2172 if (SHC->getAPIntValue().isPowerOf2()) { 2173 EVT ADDVT = N1.getOperand(1).getValueType(); 2174 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT, 2175 N1.getOperand(1), 2176 DAG.getConstant(SHC->getAPIntValue() 2177 .logBase2(), 2178 ADDVT)); 2179 AddToWorklist(Add.getNode()); 2180 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); 2181 } 2182 } 2183 } 2184 // fold (udiv x, c) -> alternate 2185 if (N1C && !TLI.isIntDivCheap()) { 2186 SDValue Op = BuildUDIV(N); 2187 if (Op.getNode()) return Op; 2188 } 2189 2190 // undef / X -> 0 2191 if (N0.getOpcode() == ISD::UNDEF) 2192 return DAG.getConstant(0, VT); 2193 // X / undef -> undef 2194 if (N1.getOpcode() == ISD::UNDEF) 2195 return N1; 2196 2197 return SDValue(); 2198} 2199 2200SDValue DAGCombiner::visitSREM(SDNode *N) { 2201 SDValue N0 = N->getOperand(0); 2202 SDValue N1 = N->getOperand(1); 2203 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2204 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2205 EVT VT = N->getValueType(0); 2206 2207 // fold (srem c1, c2) -> c1%c2 2208 if (N0C && N1C && !N1C->isNullValue()) 2209 return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); 2210 // If we know the sign bits of both operands are zero, strength reduce to a 2211 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 2212 if (!VT.isVector()) { 2213 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2214 return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1); 2215 } 2216 2217 // If X/C can be simplified by the division-by-constant logic, lower 2218 // X%C to the equivalent of X-X/C*C. 2219 if (N1C && !N1C->isNullValue()) { 2220 SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); 2221 AddToWorklist(Div.getNode()); 2222 SDValue OptimizedDiv = combine(Div.getNode()); 2223 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2224 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2225 OptimizedDiv, N1); 2226 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); 2227 AddToWorklist(Mul.getNode()); 2228 return Sub; 2229 } 2230 } 2231 2232 // undef % X -> 0 2233 if (N0.getOpcode() == ISD::UNDEF) 2234 return DAG.getConstant(0, VT); 2235 // X % undef -> undef 2236 if (N1.getOpcode() == ISD::UNDEF) 2237 return N1; 2238 2239 return SDValue(); 2240} 2241 2242SDValue DAGCombiner::visitUREM(SDNode *N) { 2243 SDValue N0 = N->getOperand(0); 2244 SDValue N1 = N->getOperand(1); 2245 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2246 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2247 EVT VT = N->getValueType(0); 2248 2249 // fold (urem c1, c2) -> c1%c2 2250 if (N0C && N1C && !N1C->isNullValue()) 2251 return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); 2252 // fold (urem x, pow2) -> (and x, pow2-1) 2253 if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) 2254 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, 2255 DAG.getConstant(N1C->getAPIntValue()-1,VT)); 2256 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 2257 if (N1.getOpcode() == ISD::SHL) { 2258 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 2259 if (SHC->getAPIntValue().isPowerOf2()) { 2260 SDValue Add = 2261 DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, 2262 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), 2263 VT)); 2264 AddToWorklist(Add.getNode()); 2265 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); 2266 } 2267 } 2268 } 2269 2270 // If X/C can be simplified by the division-by-constant logic, lower 2271 // X%C to the equivalent of X-X/C*C. 2272 if (N1C && !N1C->isNullValue()) { 2273 SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); 2274 AddToWorklist(Div.getNode()); 2275 SDValue OptimizedDiv = combine(Div.getNode()); 2276 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2277 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2278 OptimizedDiv, N1); 2279 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); 2280 AddToWorklist(Mul.getNode()); 2281 return Sub; 2282 } 2283 } 2284 2285 // undef % X -> 0 2286 if (N0.getOpcode() == ISD::UNDEF) 2287 return DAG.getConstant(0, VT); 2288 // X % undef -> undef 2289 if (N1.getOpcode() == ISD::UNDEF) 2290 return N1; 2291 2292 return SDValue(); 2293} 2294 2295SDValue DAGCombiner::visitMULHS(SDNode *N) { 2296 SDValue N0 = N->getOperand(0); 2297 SDValue N1 = N->getOperand(1); 2298 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2299 EVT VT = N->getValueType(0); 2300 SDLoc DL(N); 2301 2302 // fold (mulhs x, 0) -> 0 2303 if (N1C && N1C->isNullValue()) 2304 return N1; 2305 // fold (mulhs x, 1) -> (sra x, size(x)-1) 2306 if (N1C && N1C->getAPIntValue() == 1) 2307 return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0, 2308 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 2309 getShiftAmountTy(N0.getValueType()))); 2310 // fold (mulhs x, undef) -> 0 2311 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2312 return DAG.getConstant(0, VT); 2313 2314 // If the type twice as wide is legal, transform the mulhs to a wider multiply 2315 // plus a shift. 2316 if (VT.isSimple() && !VT.isVector()) { 2317 MVT Simple = VT.getSimpleVT(); 2318 unsigned SimpleSize = Simple.getSizeInBits(); 2319 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2320 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2321 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 2322 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 2323 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2324 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2325 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2326 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2327 } 2328 } 2329 2330 return SDValue(); 2331} 2332 2333SDValue DAGCombiner::visitMULHU(SDNode *N) { 2334 SDValue N0 = N->getOperand(0); 2335 SDValue N1 = N->getOperand(1); 2336 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2337 EVT VT = N->getValueType(0); 2338 SDLoc DL(N); 2339 2340 // fold (mulhu x, 0) -> 0 2341 if (N1C && N1C->isNullValue()) 2342 return N1; 2343 // fold (mulhu x, 1) -> 0 2344 if (N1C && N1C->getAPIntValue() == 1) 2345 return DAG.getConstant(0, N0.getValueType()); 2346 // fold (mulhu x, undef) -> 0 2347 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2348 return DAG.getConstant(0, VT); 2349 2350 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2351 // plus a shift. 2352 if (VT.isSimple() && !VT.isVector()) { 2353 MVT Simple = VT.getSimpleVT(); 2354 unsigned SimpleSize = Simple.getSizeInBits(); 2355 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2356 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2357 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 2358 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 2359 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2360 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2361 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2362 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2363 } 2364 } 2365 2366 return SDValue(); 2367} 2368 2369/// Perform optimizations common to nodes that compute two values. LoOp and HiOp 2370/// give the opcodes for the two computations that are being performed. Return 2371/// true if a simplification was made. 2372SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 2373 unsigned HiOp) { 2374 // If the high half is not needed, just compute the low half. 2375 bool HiExists = N->hasAnyUseOfValue(1); 2376 if (!HiExists && 2377 (!LegalOperations || 2378 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { 2379 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2380 return CombineTo(N, Res, Res); 2381 } 2382 2383 // If the low half is not needed, just compute the high half. 2384 bool LoExists = N->hasAnyUseOfValue(0); 2385 if (!LoExists && 2386 (!LegalOperations || 2387 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 2388 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2389 return CombineTo(N, Res, Res); 2390 } 2391 2392 // If both halves are used, return as it is. 2393 if (LoExists && HiExists) 2394 return SDValue(); 2395 2396 // If the two computed results can be simplified separately, separate them. 2397 if (LoExists) { 2398 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2399 AddToWorklist(Lo.getNode()); 2400 SDValue LoOpt = combine(Lo.getNode()); 2401 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 2402 (!LegalOperations || 2403 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 2404 return CombineTo(N, LoOpt, LoOpt); 2405 } 2406 2407 if (HiExists) { 2408 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2409 AddToWorklist(Hi.getNode()); 2410 SDValue HiOpt = combine(Hi.getNode()); 2411 if (HiOpt.getNode() && HiOpt != Hi && 2412 (!LegalOperations || 2413 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 2414 return CombineTo(N, HiOpt, HiOpt); 2415 } 2416 2417 return SDValue(); 2418} 2419 2420SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 2421 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); 2422 if (Res.getNode()) return Res; 2423 2424 EVT VT = N->getValueType(0); 2425 SDLoc DL(N); 2426 2427 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2428 // plus a shift. 2429 if (VT.isSimple() && !VT.isVector()) { 2430 MVT Simple = VT.getSimpleVT(); 2431 unsigned SimpleSize = Simple.getSizeInBits(); 2432 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2433 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2434 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 2435 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 2436 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2437 // Compute the high part as N1. 2438 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2439 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2440 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2441 // Compute the low part as N0. 2442 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2443 return CombineTo(N, Lo, Hi); 2444 } 2445 } 2446 2447 return SDValue(); 2448} 2449 2450SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 2451 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); 2452 if (Res.getNode()) return Res; 2453 2454 EVT VT = N->getValueType(0); 2455 SDLoc DL(N); 2456 2457 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2458 // plus a shift. 2459 if (VT.isSimple() && !VT.isVector()) { 2460 MVT Simple = VT.getSimpleVT(); 2461 unsigned SimpleSize = Simple.getSizeInBits(); 2462 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2463 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2464 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 2465 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 2466 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2467 // Compute the high part as N1. 2468 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2469 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2470 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2471 // Compute the low part as N0. 2472 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2473 return CombineTo(N, Lo, Hi); 2474 } 2475 } 2476 2477 return SDValue(); 2478} 2479 2480SDValue DAGCombiner::visitSMULO(SDNode *N) { 2481 // (smulo x, 2) -> (saddo x, x) 2482 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2483 if (C2->getAPIntValue() == 2) 2484 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), 2485 N->getOperand(0), N->getOperand(0)); 2486 2487 return SDValue(); 2488} 2489 2490SDValue DAGCombiner::visitUMULO(SDNode *N) { 2491 // (umulo x, 2) -> (uaddo x, x) 2492 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2493 if (C2->getAPIntValue() == 2) 2494 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), 2495 N->getOperand(0), N->getOperand(0)); 2496 2497 return SDValue(); 2498} 2499 2500SDValue DAGCombiner::visitSDIVREM(SDNode *N) { 2501 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); 2502 if (Res.getNode()) return Res; 2503 2504 return SDValue(); 2505} 2506 2507SDValue DAGCombiner::visitUDIVREM(SDNode *N) { 2508 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); 2509 if (Res.getNode()) return Res; 2510 2511 return SDValue(); 2512} 2513 2514/// If this is a binary operator with two operands of the same opcode, try to 2515/// simplify it. 2516SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 2517 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2518 EVT VT = N0.getValueType(); 2519 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 2520 2521 // Bail early if none of these transforms apply. 2522 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 2523 2524 // For each of OP in AND/OR/XOR: 2525 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 2526 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 2527 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 2528 // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) 2529 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 2530 // 2531 // do not sink logical op inside of a vector extend, since it may combine 2532 // into a vsetcc. 2533 EVT Op0VT = N0.getOperand(0).getValueType(); 2534 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 2535 N0.getOpcode() == ISD::SIGN_EXTEND || 2536 N0.getOpcode() == ISD::BSWAP || 2537 // Avoid infinite looping with PromoteIntBinOp. 2538 (N0.getOpcode() == ISD::ANY_EXTEND && 2539 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 2540 (N0.getOpcode() == ISD::TRUNCATE && 2541 (!TLI.isZExtFree(VT, Op0VT) || 2542 !TLI.isTruncateFree(Op0VT, VT)) && 2543 TLI.isTypeLegal(Op0VT))) && 2544 !VT.isVector() && 2545 Op0VT == N1.getOperand(0).getValueType() && 2546 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 2547 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2548 N0.getOperand(0).getValueType(), 2549 N0.getOperand(0), N1.getOperand(0)); 2550 AddToWorklist(ORNode.getNode()); 2551 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); 2552 } 2553 2554 // For each of OP in SHL/SRL/SRA/AND... 2555 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 2556 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 2557 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 2558 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 2559 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 2560 N0.getOperand(1) == N1.getOperand(1)) { 2561 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2562 N0.getOperand(0).getValueType(), 2563 N0.getOperand(0), N1.getOperand(0)); 2564 AddToWorklist(ORNode.getNode()); 2565 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 2566 ORNode, N0.getOperand(1)); 2567 } 2568 2569 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 2570 // Only perform this optimization after type legalization and before 2571 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 2572 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 2573 // we don't want to undo this promotion. 2574 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 2575 // on scalars. 2576 if ((N0.getOpcode() == ISD::BITCAST || 2577 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && 2578 Level == AfterLegalizeTypes) { 2579 SDValue In0 = N0.getOperand(0); 2580 SDValue In1 = N1.getOperand(0); 2581 EVT In0Ty = In0.getValueType(); 2582 EVT In1Ty = In1.getValueType(); 2583 SDLoc DL(N); 2584 // If both incoming values are integers, and the original types are the 2585 // same. 2586 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 2587 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); 2588 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); 2589 AddToWorklist(Op.getNode()); 2590 return BC; 2591 } 2592 } 2593 2594 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 2595 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 2596 // If both shuffles use the same mask, and both shuffle within a single 2597 // vector, then it is worthwhile to move the swizzle after the operation. 2598 // The type-legalizer generates this pattern when loading illegal 2599 // vector types from memory. In many cases this allows additional shuffle 2600 // optimizations. 2601 // There are other cases where moving the shuffle after the xor/and/or 2602 // is profitable even if shuffles don't perform a swizzle. 2603 // If both shuffles use the same mask, and both shuffles have the same first 2604 // or second operand, then it might still be profitable to move the shuffle 2605 // after the xor/and/or operation. 2606 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { 2607 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); 2608 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); 2609 2610 assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && 2611 "Inputs to shuffles are not the same type"); 2612 2613 // Check that both shuffles use the same mask. The masks are known to be of 2614 // the same length because the result vector type is the same. 2615 // Check also that shuffles have only one use to avoid introducing extra 2616 // instructions. 2617 if (SVN0->hasOneUse() && SVN1->hasOneUse() && 2618 SVN0->getMask().equals(SVN1->getMask())) { 2619 SDValue ShOp = N0->getOperand(1); 2620 2621 // Don't try to fold this node if it requires introducing a 2622 // build vector of all zeros that might be illegal at this stage. 2623 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2624 if (!LegalTypes) 2625 ShOp = DAG.getConstant(0, VT); 2626 else 2627 ShOp = SDValue(); 2628 } 2629 2630 // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) 2631 // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) 2632 // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) 2633 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { 2634 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2635 N0->getOperand(0), N1->getOperand(0)); 2636 AddToWorklist(NewNode.getNode()); 2637 return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, 2638 &SVN0->getMask()[0]); 2639 } 2640 2641 // Don't try to fold this node if it requires introducing a 2642 // build vector of all zeros that might be illegal at this stage. 2643 ShOp = N0->getOperand(0); 2644 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2645 if (!LegalTypes) 2646 ShOp = DAG.getConstant(0, VT); 2647 else 2648 ShOp = SDValue(); 2649 } 2650 2651 // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) 2652 // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) 2653 // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) 2654 if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { 2655 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2656 N0->getOperand(1), N1->getOperand(1)); 2657 AddToWorklist(NewNode.getNode()); 2658 return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, 2659 &SVN0->getMask()[0]); 2660 } 2661 } 2662 } 2663 2664 return SDValue(); 2665} 2666 2667SDValue DAGCombiner::visitAND(SDNode *N) { 2668 SDValue N0 = N->getOperand(0); 2669 SDValue N1 = N->getOperand(1); 2670 SDValue LL, LR, RL, RR, CC0, CC1; 2671 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2672 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2673 EVT VT = N1.getValueType(); 2674 unsigned BitWidth = VT.getScalarType().getSizeInBits(); 2675 2676 // fold vector ops 2677 if (VT.isVector()) { 2678 SDValue FoldedVOp = SimplifyVBinOp(N); 2679 if (FoldedVOp.getNode()) return FoldedVOp; 2680 2681 // fold (and x, 0) -> 0, vector edition 2682 if (ISD::isBuildVectorAllZeros(N0.getNode())) 2683 // do not return N0, because undef node may exist in N0 2684 return DAG.getConstant( 2685 APInt::getNullValue( 2686 N0.getValueType().getScalarType().getSizeInBits()), 2687 N0.getValueType()); 2688 if (ISD::isBuildVectorAllZeros(N1.getNode())) 2689 // do not return N1, because undef node may exist in N1 2690 return DAG.getConstant( 2691 APInt::getNullValue( 2692 N1.getValueType().getScalarType().getSizeInBits()), 2693 N1.getValueType()); 2694 2695 // fold (and x, -1) -> x, vector edition 2696 if (ISD::isBuildVectorAllOnes(N0.getNode())) 2697 return N1; 2698 if (ISD::isBuildVectorAllOnes(N1.getNode())) 2699 return N0; 2700 } 2701 2702 // fold (and x, undef) -> 0 2703 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2704 return DAG.getConstant(0, VT); 2705 // fold (and c1, c2) -> c1&c2 2706 if (N0C && N1C) 2707 return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); 2708 // canonicalize constant to RHS 2709 if (N0C && !N1C) 2710 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); 2711 // fold (and x, -1) -> x 2712 if (N1C && N1C->isAllOnesValue()) 2713 return N0; 2714 // if (and x, c) is known to be zero, return 0 2715 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 2716 APInt::getAllOnesValue(BitWidth))) 2717 return DAG.getConstant(0, VT); 2718 // reassociate and 2719 SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); 2720 if (RAND.getNode()) 2721 return RAND; 2722 // fold (and (or x, C), D) -> D if (C & D) == D 2723 if (N1C && N0.getOpcode() == ISD::OR) 2724 if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 2725 if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) 2726 return N1; 2727 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 2728 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 2729 SDValue N0Op0 = N0.getOperand(0); 2730 APInt Mask = ~N1C->getAPIntValue(); 2731 Mask = Mask.trunc(N0Op0.getValueSizeInBits()); 2732 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 2733 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 2734 N0.getValueType(), N0Op0); 2735 2736 // Replace uses of the AND with uses of the Zero extend node. 2737 CombineTo(N, Zext); 2738 2739 // We actually want to replace all uses of the any_extend with the 2740 // zero_extend, to avoid duplicating things. This will later cause this 2741 // AND to be folded. 2742 CombineTo(N0.getNode(), Zext); 2743 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2744 } 2745 } 2746 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 2747 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must 2748 // already be zero by virtue of the width of the base type of the load. 2749 // 2750 // the 'X' node here can either be nothing or an extract_vector_elt to catch 2751 // more cases. 2752 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 2753 N0.getOperand(0).getOpcode() == ISD::LOAD) || 2754 N0.getOpcode() == ISD::LOAD) { 2755 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 2756 N0 : N0.getOperand(0) ); 2757 2758 // Get the constant (if applicable) the zero'th operand is being ANDed with. 2759 // This can be a pure constant or a vector splat, in which case we treat the 2760 // vector as a scalar and use the splat value. 2761 APInt Constant = APInt::getNullValue(1); 2762 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 2763 Constant = C->getAPIntValue(); 2764 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { 2765 APInt SplatValue, SplatUndef; 2766 unsigned SplatBitSize; 2767 bool HasAnyUndefs; 2768 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 2769 SplatBitSize, HasAnyUndefs); 2770 if (IsSplat) { 2771 // Undef bits can contribute to a possible optimisation if set, so 2772 // set them. 2773 SplatValue |= SplatUndef; 2774 2775 // The splat value may be something like "0x00FFFFFF", which means 0 for 2776 // the first vector value and FF for the rest, repeating. We need a mask 2777 // that will apply equally to all members of the vector, so AND all the 2778 // lanes of the constant together. 2779 EVT VT = Vector->getValueType(0); 2780 unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); 2781 2782 // If the splat value has been compressed to a bitlength lower 2783 // than the size of the vector lane, we need to re-expand it to 2784 // the lane size. 2785 if (BitWidth > SplatBitSize) 2786 for (SplatValue = SplatValue.zextOrTrunc(BitWidth); 2787 SplatBitSize < BitWidth; 2788 SplatBitSize = SplatBitSize * 2) 2789 SplatValue |= SplatValue.shl(SplatBitSize); 2790
| 1163 // Add all the dag nodes to the worklist. 1164 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 1165 E = DAG.allnodes_end(); I != E; ++I) 1166 AddToWorklist(I); 1167 1168 // Create a dummy node (which is not added to allnodes), that adds a reference 1169 // to the root node, preventing it from being deleted, and tracking any 1170 // changes of the root. 1171 HandleSDNode Dummy(DAG.getRoot()); 1172 1173 // while the worklist isn't empty, find a node and 1174 // try and combine it. 1175 while (!WorklistMap.empty()) { 1176 SDNode *N; 1177 // The Worklist holds the SDNodes in order, but it may contain null entries. 1178 do { 1179 N = Worklist.pop_back_val(); 1180 } while (!N); 1181 1182 bool GoodWorklistEntry = WorklistMap.erase(N); 1183 (void)GoodWorklistEntry; 1184 assert(GoodWorklistEntry && 1185 "Found a worklist entry without a corresponding map entry!"); 1186 1187 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1188 // N is deleted from the DAG, since they too may now be dead or may have a 1189 // reduced number of uses, allowing other xforms. 1190 if (recursivelyDeleteUnusedNodes(N)) 1191 continue; 1192 1193 WorklistRemover DeadNodes(*this); 1194 1195 // If this combine is running after legalizing the DAG, re-legalize any 1196 // nodes pulled off the worklist. 1197 if (Level == AfterLegalizeDAG) { 1198 SmallSetVector<SDNode *, 16> UpdatedNodes; 1199 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); 1200 1201 for (SDNode *LN : UpdatedNodes) { 1202 AddToWorklist(LN); 1203 AddUsersToWorklist(LN); 1204 } 1205 if (!NIsValid) 1206 continue; 1207 } 1208 1209 DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); 1210 1211 // Add any operands of the new node which have not yet been combined to the 1212 // worklist as well. Because the worklist uniques things already, this 1213 // won't repeatedly process the same operand. 1214 CombinedNodes.insert(N); 1215 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1216 if (!CombinedNodes.count(N->getOperand(i).getNode())) 1217 AddToWorklist(N->getOperand(i).getNode()); 1218 1219 SDValue RV = combine(N); 1220 1221 if (!RV.getNode()) 1222 continue; 1223 1224 ++NodesCombined; 1225 1226 // If we get back the same node we passed in, rather than a new node or 1227 // zero, we know that the node must have defined multiple values and 1228 // CombineTo was used. Since CombineTo takes care of the worklist 1229 // mechanics for us, we have no work to do in this case. 1230 if (RV.getNode() == N) 1231 continue; 1232 1233 assert(N->getOpcode() != ISD::DELETED_NODE && 1234 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 1235 "Node was deleted but visit returned new node!"); 1236 1237 DEBUG(dbgs() << " ... into: "; 1238 RV.getNode()->dump(&DAG)); 1239 1240 // Transfer debug value. 1241 DAG.TransferDbgValues(SDValue(N, 0), RV); 1242 if (N->getNumValues() == RV.getNode()->getNumValues()) 1243 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1244 else { 1245 assert(N->getValueType(0) == RV.getValueType() && 1246 N->getNumValues() == 1 && "Type mismatch"); 1247 SDValue OpV = RV; 1248 DAG.ReplaceAllUsesWith(N, &OpV); 1249 } 1250 1251 // Push the new node and any users onto the worklist 1252 AddToWorklist(RV.getNode()); 1253 AddUsersToWorklist(RV.getNode()); 1254 1255 // Finally, if the node is now dead, remove it from the graph. The node 1256 // may not be dead if the replacement process recursively simplified to 1257 // something else needing this node. This will also take care of adding any 1258 // operands which have lost a user to the worklist. 1259 recursivelyDeleteUnusedNodes(N); 1260 } 1261 1262 // If the root changed (e.g. it was a dead load, update the root). 1263 DAG.setRoot(Dummy.getValue()); 1264 DAG.RemoveDeadNodes(); 1265} 1266 1267SDValue DAGCombiner::visit(SDNode *N) { 1268 switch (N->getOpcode()) { 1269 default: break; 1270 case ISD::TokenFactor: return visitTokenFactor(N); 1271 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1272 case ISD::ADD: return visitADD(N); 1273 case ISD::SUB: return visitSUB(N); 1274 case ISD::ADDC: return visitADDC(N); 1275 case ISD::SUBC: return visitSUBC(N); 1276 case ISD::ADDE: return visitADDE(N); 1277 case ISD::SUBE: return visitSUBE(N); 1278 case ISD::MUL: return visitMUL(N); 1279 case ISD::SDIV: return visitSDIV(N); 1280 case ISD::UDIV: return visitUDIV(N); 1281 case ISD::SREM: return visitSREM(N); 1282 case ISD::UREM: return visitUREM(N); 1283 case ISD::MULHU: return visitMULHU(N); 1284 case ISD::MULHS: return visitMULHS(N); 1285 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1286 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1287 case ISD::SMULO: return visitSMULO(N); 1288 case ISD::UMULO: return visitUMULO(N); 1289 case ISD::SDIVREM: return visitSDIVREM(N); 1290 case ISD::UDIVREM: return visitUDIVREM(N); 1291 case ISD::AND: return visitAND(N); 1292 case ISD::OR: return visitOR(N); 1293 case ISD::XOR: return visitXOR(N); 1294 case ISD::SHL: return visitSHL(N); 1295 case ISD::SRA: return visitSRA(N); 1296 case ISD::SRL: return visitSRL(N); 1297 case ISD::ROTR: 1298 case ISD::ROTL: return visitRotate(N); 1299 case ISD::CTLZ: return visitCTLZ(N); 1300 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1301 case ISD::CTTZ: return visitCTTZ(N); 1302 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1303 case ISD::CTPOP: return visitCTPOP(N); 1304 case ISD::SELECT: return visitSELECT(N); 1305 case ISD::VSELECT: return visitVSELECT(N); 1306 case ISD::SELECT_CC: return visitSELECT_CC(N); 1307 case ISD::SETCC: return visitSETCC(N); 1308 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1309 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1310 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1311 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1312 case ISD::TRUNCATE: return visitTRUNCATE(N); 1313 case ISD::BITCAST: return visitBITCAST(N); 1314 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1315 case ISD::FADD: return visitFADD(N); 1316 case ISD::FSUB: return visitFSUB(N); 1317 case ISD::FMUL: return visitFMUL(N); 1318 case ISD::FMA: return visitFMA(N); 1319 case ISD::FDIV: return visitFDIV(N); 1320 case ISD::FREM: return visitFREM(N); 1321 case ISD::FSQRT: return visitFSQRT(N); 1322 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1323 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1324 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1325 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1326 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1327 case ISD::FP_ROUND: return visitFP_ROUND(N); 1328 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1329 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1330 case ISD::FNEG: return visitFNEG(N); 1331 case ISD::FABS: return visitFABS(N); 1332 case ISD::FFLOOR: return visitFFLOOR(N); 1333 case ISD::FMINNUM: return visitFMINNUM(N); 1334 case ISD::FMAXNUM: return visitFMAXNUM(N); 1335 case ISD::FCEIL: return visitFCEIL(N); 1336 case ISD::FTRUNC: return visitFTRUNC(N); 1337 case ISD::BRCOND: return visitBRCOND(N); 1338 case ISD::BR_CC: return visitBR_CC(N); 1339 case ISD::LOAD: return visitLOAD(N); 1340 case ISD::STORE: return visitSTORE(N); 1341 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1342 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1343 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1344 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1345 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1346 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1347 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); 1348 case ISD::MLOAD: return visitMLOAD(N); 1349 case ISD::MSTORE: return visitMSTORE(N); 1350 } 1351 return SDValue(); 1352} 1353 1354SDValue DAGCombiner::combine(SDNode *N) { 1355 SDValue RV = visit(N); 1356 1357 // If nothing happened, try a target-specific DAG combine. 1358 if (!RV.getNode()) { 1359 assert(N->getOpcode() != ISD::DELETED_NODE && 1360 "Node was deleted but visit returned NULL!"); 1361 1362 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1363 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1364 1365 // Expose the DAG combiner to the target combiner impls. 1366 TargetLowering::DAGCombinerInfo 1367 DagCombineInfo(DAG, Level, false, this); 1368 1369 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1370 } 1371 } 1372 1373 // If nothing happened still, try promoting the operation. 1374 if (!RV.getNode()) { 1375 switch (N->getOpcode()) { 1376 default: break; 1377 case ISD::ADD: 1378 case ISD::SUB: 1379 case ISD::MUL: 1380 case ISD::AND: 1381 case ISD::OR: 1382 case ISD::XOR: 1383 RV = PromoteIntBinOp(SDValue(N, 0)); 1384 break; 1385 case ISD::SHL: 1386 case ISD::SRA: 1387 case ISD::SRL: 1388 RV = PromoteIntShiftOp(SDValue(N, 0)); 1389 break; 1390 case ISD::SIGN_EXTEND: 1391 case ISD::ZERO_EXTEND: 1392 case ISD::ANY_EXTEND: 1393 RV = PromoteExtend(SDValue(N, 0)); 1394 break; 1395 case ISD::LOAD: 1396 if (PromoteLoad(SDValue(N, 0))) 1397 RV = SDValue(N, 0); 1398 break; 1399 } 1400 } 1401 1402 // If N is a commutative binary node, try commuting it to enable more 1403 // sdisel CSE. 1404 if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 1405 N->getNumValues() == 1) { 1406 SDValue N0 = N->getOperand(0); 1407 SDValue N1 = N->getOperand(1); 1408 1409 // Constant operands are canonicalized to RHS. 1410 if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { 1411 SDValue Ops[] = {N1, N0}; 1412 SDNode *CSENode; 1413 if (const BinaryWithFlagsSDNode *BinNode = 1414 dyn_cast<BinaryWithFlagsSDNode>(N)) { 1415 CSENode = DAG.getNodeIfExists( 1416 N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(), 1417 BinNode->hasNoSignedWrap(), BinNode->isExact()); 1418 } else { 1419 CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); 1420 } 1421 if (CSENode) 1422 return SDValue(CSENode, 0); 1423 } 1424 } 1425 1426 return RV; 1427} 1428 1429/// Given a node, return its input chain if it has one, otherwise return a null 1430/// sd operand. 1431static SDValue getInputChainForNode(SDNode *N) { 1432 if (unsigned NumOps = N->getNumOperands()) { 1433 if (N->getOperand(0).getValueType() == MVT::Other) 1434 return N->getOperand(0); 1435 if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1436 return N->getOperand(NumOps-1); 1437 for (unsigned i = 1; i < NumOps-1; ++i) 1438 if (N->getOperand(i).getValueType() == MVT::Other) 1439 return N->getOperand(i); 1440 } 1441 return SDValue(); 1442} 1443 1444SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1445 // If N has two operands, where one has an input chain equal to the other, 1446 // the 'other' chain is redundant. 1447 if (N->getNumOperands() == 2) { 1448 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1449 return N->getOperand(0); 1450 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1451 return N->getOperand(1); 1452 } 1453 1454 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1455 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1456 SmallPtrSet<SDNode*, 16> SeenOps; 1457 bool Changed = false; // If we should replace this token factor. 1458 1459 // Start out with this token factor. 1460 TFs.push_back(N); 1461 1462 // Iterate through token factors. The TFs grows when new token factors are 1463 // encountered. 1464 for (unsigned i = 0; i < TFs.size(); ++i) { 1465 SDNode *TF = TFs[i]; 1466 1467 // Check each of the operands. 1468 for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { 1469 SDValue Op = TF->getOperand(i); 1470 1471 switch (Op.getOpcode()) { 1472 case ISD::EntryToken: 1473 // Entry tokens don't need to be added to the list. They are 1474 // rededundant. 1475 Changed = true; 1476 break; 1477 1478 case ISD::TokenFactor: 1479 if (Op.hasOneUse() && 1480 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 1481 // Queue up for processing. 1482 TFs.push_back(Op.getNode()); 1483 // Clean up in case the token factor is removed. 1484 AddToWorklist(Op.getNode()); 1485 Changed = true; 1486 break; 1487 } 1488 // Fall thru 1489 1490 default: 1491 // Only add if it isn't already in the list. 1492 if (SeenOps.insert(Op.getNode()).second) 1493 Ops.push_back(Op); 1494 else 1495 Changed = true; 1496 break; 1497 } 1498 } 1499 } 1500 1501 SDValue Result; 1502 1503 // If we've change things around then replace token factor. 1504 if (Changed) { 1505 if (Ops.empty()) { 1506 // The entry token is the only possible outcome. 1507 Result = DAG.getEntryNode(); 1508 } else { 1509 // New and improved token factor. 1510 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); 1511 } 1512 1513 // Don't add users to work list. 1514 return CombineTo(N, Result, false); 1515 } 1516 1517 return Result; 1518} 1519 1520/// MERGE_VALUES can always be eliminated. 1521SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1522 WorklistRemover DeadNodes(*this); 1523 // Replacing results may cause a different MERGE_VALUES to suddenly 1524 // be CSE'd with N, and carry its uses with it. Iterate until no 1525 // uses remain, to ensure that the node can be safely deleted. 1526 // First add the users of this node to the work list so that they 1527 // can be tried again once they have new operands. 1528 AddUsersToWorklist(N); 1529 do { 1530 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1531 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); 1532 } while (!N->use_empty()); 1533 deleteAndRecombine(N); 1534 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1535} 1536 1537SDValue DAGCombiner::visitADD(SDNode *N) { 1538 SDValue N0 = N->getOperand(0); 1539 SDValue N1 = N->getOperand(1); 1540 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1541 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1542 EVT VT = N0.getValueType(); 1543 1544 // fold vector ops 1545 if (VT.isVector()) { 1546 SDValue FoldedVOp = SimplifyVBinOp(N); 1547 if (FoldedVOp.getNode()) return FoldedVOp; 1548 1549 // fold (add x, 0) -> x, vector edition 1550 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1551 return N0; 1552 if (ISD::isBuildVectorAllZeros(N0.getNode())) 1553 return N1; 1554 } 1555 1556 // fold (add x, undef) -> undef 1557 if (N0.getOpcode() == ISD::UNDEF) 1558 return N0; 1559 if (N1.getOpcode() == ISD::UNDEF) 1560 return N1; 1561 // fold (add c1, c2) -> c1+c2 1562 if (N0C && N1C) 1563 return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); 1564 // canonicalize constant to RHS 1565 if (N0C && !N1C) 1566 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); 1567 // fold (add x, 0) -> x 1568 if (N1C && N1C->isNullValue()) 1569 return N0; 1570 // fold (add Sym, c) -> Sym+c 1571 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1572 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && 1573 GA->getOpcode() == ISD::GlobalAddress) 1574 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1575 GA->getOffset() + 1576 (uint64_t)N1C->getSExtValue()); 1577 // fold ((c1-A)+c2) -> (c1+c2)-A 1578 if (N1C && N0.getOpcode() == ISD::SUB) 1579 if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) 1580 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1581 DAG.getConstant(N1C->getAPIntValue()+ 1582 N0C->getAPIntValue(), VT), 1583 N0.getOperand(1)); 1584 // reassociate add 1585 SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); 1586 if (RADD.getNode()) 1587 return RADD; 1588 // fold ((0-A) + B) -> B-A 1589 if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && 1590 cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) 1591 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); 1592 // fold (A + (0-B)) -> A-B 1593 if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && 1594 cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) 1595 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); 1596 // fold (A+(B-A)) -> B 1597 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 1598 return N1.getOperand(0); 1599 // fold ((B-A)+A) -> B 1600 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 1601 return N0.getOperand(0); 1602 // fold (A+(B-(A+C))) to (B-C) 1603 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1604 N0 == N1.getOperand(1).getOperand(0)) 1605 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1606 N1.getOperand(1).getOperand(1)); 1607 // fold (A+(B-(C+A))) to (B-C) 1608 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1609 N0 == N1.getOperand(1).getOperand(1)) 1610 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1611 N1.getOperand(1).getOperand(0)); 1612 // fold (A+((B-A)+or-C)) to (B+or-C) 1613 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 1614 N1.getOperand(0).getOpcode() == ISD::SUB && 1615 N0 == N1.getOperand(0).getOperand(1)) 1616 return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, 1617 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 1618 1619 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 1620 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 1621 SDValue N00 = N0.getOperand(0); 1622 SDValue N01 = N0.getOperand(1); 1623 SDValue N10 = N1.getOperand(0); 1624 SDValue N11 = N1.getOperand(1); 1625 1626 if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) 1627 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1628 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), 1629 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); 1630 } 1631 1632 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 1633 return SDValue(N, 0); 1634 1635 // fold (a+b) -> (a|b) iff a and b share no bits. 1636 if (VT.isInteger() && !VT.isVector()) { 1637 APInt LHSZero, LHSOne; 1638 APInt RHSZero, RHSOne; 1639 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1640 1641 if (LHSZero.getBoolValue()) { 1642 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1643 1644 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1645 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1646 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){ 1647 if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) 1648 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); 1649 } 1650 } 1651 } 1652 1653 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 1654 if (N1.getOpcode() == ISD::SHL && 1655 N1.getOperand(0).getOpcode() == ISD::SUB) 1656 if (ConstantSDNode *C = 1657 dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) 1658 if (C->getAPIntValue() == 0) 1659 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, 1660 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1661 N1.getOperand(0).getOperand(1), 1662 N1.getOperand(1))); 1663 if (N0.getOpcode() == ISD::SHL && 1664 N0.getOperand(0).getOpcode() == ISD::SUB) 1665 if (ConstantSDNode *C = 1666 dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) 1667 if (C->getAPIntValue() == 0) 1668 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, 1669 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1670 N0.getOperand(0).getOperand(1), 1671 N0.getOperand(1))); 1672 1673 if (N1.getOpcode() == ISD::AND) { 1674 SDValue AndOp0 = N1.getOperand(0); 1675 ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 1676 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 1677 unsigned DestBits = VT.getScalarType().getSizeInBits(); 1678 1679 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 1680 // and similar xforms where the inner op is either ~0 or 0. 1681 if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { 1682 SDLoc DL(N); 1683 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); 1684 } 1685 } 1686 1687 // add (sext i1), X -> sub X, (zext i1) 1688 if (N0.getOpcode() == ISD::SIGN_EXTEND && 1689 N0.getOperand(0).getValueType() == MVT::i1 && 1690 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 1691 SDLoc DL(N); 1692 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 1693 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 1694 } 1695 1696 // add X, (sextinreg Y i1) -> sub X, (and Y 1) 1697 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1698 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 1699 if (TN->getVT() == MVT::i1) { 1700 SDLoc DL(N); 1701 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1702 DAG.getConstant(1, VT)); 1703 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); 1704 } 1705 } 1706 1707 return SDValue(); 1708} 1709 1710SDValue DAGCombiner::visitADDC(SDNode *N) { 1711 SDValue N0 = N->getOperand(0); 1712 SDValue N1 = N->getOperand(1); 1713 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1714 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1715 EVT VT = N0.getValueType(); 1716 1717 // If the flag result is dead, turn this into an ADD. 1718 if (!N->hasAnyUseOfValue(1)) 1719 return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), 1720 DAG.getNode(ISD::CARRY_FALSE, 1721 SDLoc(N), MVT::Glue)); 1722 1723 // canonicalize constant to RHS. 1724 if (N0C && !N1C) 1725 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); 1726 1727 // fold (addc x, 0) -> x + no carry out 1728 if (N1C && N1C->isNullValue()) 1729 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 1730 SDLoc(N), MVT::Glue)); 1731 1732 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 1733 APInt LHSZero, LHSOne; 1734 APInt RHSZero, RHSOne; 1735 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1736 1737 if (LHSZero.getBoolValue()) { 1738 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1739 1740 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1741 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1742 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 1743 return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), 1744 DAG.getNode(ISD::CARRY_FALSE, 1745 SDLoc(N), MVT::Glue)); 1746 } 1747 1748 return SDValue(); 1749} 1750 1751SDValue DAGCombiner::visitADDE(SDNode *N) { 1752 SDValue N0 = N->getOperand(0); 1753 SDValue N1 = N->getOperand(1); 1754 SDValue CarryIn = N->getOperand(2); 1755 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1756 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1757 1758 // canonicalize constant to RHS 1759 if (N0C && !N1C) 1760 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), 1761 N1, N0, CarryIn); 1762 1763 // fold (adde x, y, false) -> (addc x, y) 1764 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1765 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); 1766 1767 return SDValue(); 1768} 1769 1770// Since it may not be valid to emit a fold to zero for vector initializers 1771// check if we can before folding. 1772static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, 1773 SelectionDAG &DAG, 1774 bool LegalOperations, bool LegalTypes) { 1775 if (!VT.isVector()) 1776 return DAG.getConstant(0, VT); 1777 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 1778 return DAG.getConstant(0, VT); 1779 return SDValue(); 1780} 1781 1782SDValue DAGCombiner::visitSUB(SDNode *N) { 1783 SDValue N0 = N->getOperand(0); 1784 SDValue N1 = N->getOperand(1); 1785 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1786 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1787 ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : 1788 dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); 1789 EVT VT = N0.getValueType(); 1790 1791 // fold vector ops 1792 if (VT.isVector()) { 1793 SDValue FoldedVOp = SimplifyVBinOp(N); 1794 if (FoldedVOp.getNode()) return FoldedVOp; 1795 1796 // fold (sub x, 0) -> x, vector edition 1797 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1798 return N0; 1799 } 1800 1801 // fold (sub x, x) -> 0 1802 // FIXME: Refactor this and xor and other similar operations together. 1803 if (N0 == N1) 1804 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 1805 // fold (sub c1, c2) -> c1-c2 1806 if (N0C && N1C) 1807 return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); 1808 // fold (sub x, c) -> (add x, -c) 1809 if (N1C) 1810 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, 1811 DAG.getConstant(-N1C->getAPIntValue(), VT)); 1812 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 1813 if (N0C && N0C->isAllOnesValue()) 1814 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 1815 // fold A-(A-B) -> B 1816 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 1817 return N1.getOperand(1); 1818 // fold (A+B)-A -> B 1819 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 1820 return N0.getOperand(1); 1821 // fold (A+B)-B -> A 1822 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 1823 return N0.getOperand(0); 1824 // fold C2-(A+C1) -> (C2-C1)-A 1825 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { 1826 SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), 1827 VT); 1828 return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC, 1829 N1.getOperand(0)); 1830 } 1831 // fold ((A+(B+or-C))-B) -> A+or-C 1832 if (N0.getOpcode() == ISD::ADD && 1833 (N0.getOperand(1).getOpcode() == ISD::SUB || 1834 N0.getOperand(1).getOpcode() == ISD::ADD) && 1835 N0.getOperand(1).getOperand(0) == N1) 1836 return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, 1837 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 1838 // fold ((A+(C+B))-B) -> A+C 1839 if (N0.getOpcode() == ISD::ADD && 1840 N0.getOperand(1).getOpcode() == ISD::ADD && 1841 N0.getOperand(1).getOperand(1) == N1) 1842 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 1843 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1844 // fold ((A-(B-C))-C) -> A-B 1845 if (N0.getOpcode() == ISD::SUB && 1846 N0.getOperand(1).getOpcode() == ISD::SUB && 1847 N0.getOperand(1).getOperand(1) == N1) 1848 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1849 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1850 1851 // If either operand of a sub is undef, the result is undef 1852 if (N0.getOpcode() == ISD::UNDEF) 1853 return N0; 1854 if (N1.getOpcode() == ISD::UNDEF) 1855 return N1; 1856 1857 // If the relocation model supports it, consider symbol offsets. 1858 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1859 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 1860 // fold (sub Sym, c) -> Sym-c 1861 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 1862 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1863 GA->getOffset() - 1864 (uint64_t)N1C->getSExtValue()); 1865 // fold (sub Sym+c1, Sym+c2) -> c1-c2 1866 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 1867 if (GA->getGlobal() == GB->getGlobal()) 1868 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 1869 VT); 1870 } 1871 1872 // sub X, (sextinreg Y i1) -> add X, (and Y 1) 1873 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1874 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 1875 if (TN->getVT() == MVT::i1) { 1876 SDLoc DL(N); 1877 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1878 DAG.getConstant(1, VT)); 1879 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); 1880 } 1881 } 1882 1883 return SDValue(); 1884} 1885 1886SDValue DAGCombiner::visitSUBC(SDNode *N) { 1887 SDValue N0 = N->getOperand(0); 1888 SDValue N1 = N->getOperand(1); 1889 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1890 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1891 EVT VT = N0.getValueType(); 1892 1893 // If the flag result is dead, turn this into an SUB. 1894 if (!N->hasAnyUseOfValue(1)) 1895 return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1), 1896 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1897 MVT::Glue)); 1898 1899 // fold (subc x, x) -> 0 + no borrow 1900 if (N0 == N1) 1901 return CombineTo(N, DAG.getConstant(0, VT), 1902 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1903 MVT::Glue)); 1904 1905 // fold (subc x, 0) -> x + no borrow 1906 if (N1C && N1C->isNullValue()) 1907 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1908 MVT::Glue)); 1909 1910 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 1911 if (N0C && N0C->isAllOnesValue()) 1912 return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), 1913 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1914 MVT::Glue)); 1915 1916 return SDValue(); 1917} 1918 1919SDValue DAGCombiner::visitSUBE(SDNode *N) { 1920 SDValue N0 = N->getOperand(0); 1921 SDValue N1 = N->getOperand(1); 1922 SDValue CarryIn = N->getOperand(2); 1923 1924 // fold (sube x, y, false) -> (subc x, y) 1925 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1926 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); 1927 1928 return SDValue(); 1929} 1930 1931SDValue DAGCombiner::visitMUL(SDNode *N) { 1932 SDValue N0 = N->getOperand(0); 1933 SDValue N1 = N->getOperand(1); 1934 EVT VT = N0.getValueType(); 1935 1936 // fold (mul x, undef) -> 0 1937 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 1938 return DAG.getConstant(0, VT); 1939 1940 bool N0IsConst = false; 1941 bool N1IsConst = false; 1942 APInt ConstValue0, ConstValue1; 1943 // fold vector ops 1944 if (VT.isVector()) { 1945 SDValue FoldedVOp = SimplifyVBinOp(N); 1946 if (FoldedVOp.getNode()) return FoldedVOp; 1947 1948 N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); 1949 N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); 1950 } else { 1951 N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr; 1952 ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() 1953 : APInt(); 1954 N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr; 1955 ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() 1956 : APInt(); 1957 } 1958 1959 // fold (mul c1, c2) -> c1*c2 1960 if (N0IsConst && N1IsConst) 1961 return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode()); 1962 1963 // canonicalize constant to RHS 1964 if (N0IsConst && !N1IsConst) 1965 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); 1966 // fold (mul x, 0) -> 0 1967 if (N1IsConst && ConstValue1 == 0) 1968 return N1; 1969 // We require a splat of the entire scalar bit width for non-contiguous 1970 // bit patterns. 1971 bool IsFullSplat = 1972 ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); 1973 // fold (mul x, 1) -> x 1974 if (N1IsConst && ConstValue1 == 1 && IsFullSplat) 1975 return N0; 1976 // fold (mul x, -1) -> 0-x 1977 if (N1IsConst && ConstValue1.isAllOnesValue()) 1978 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1979 DAG.getConstant(0, VT), N0); 1980 // fold (mul x, (1 << c)) -> x << c 1981 if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) 1982 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, 1983 DAG.getConstant(ConstValue1.logBase2(), 1984 getShiftAmountTy(N0.getValueType()))); 1985 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 1986 if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { 1987 unsigned Log2Val = (-ConstValue1).logBase2(); 1988 // FIXME: If the input is something that is easily negated (e.g. a 1989 // single-use add), we should put the negate there. 1990 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1991 DAG.getConstant(0, VT), 1992 DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, 1993 DAG.getConstant(Log2Val, 1994 getShiftAmountTy(N0.getValueType())))); 1995 } 1996 1997 APInt Val; 1998 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 1999 if (N1IsConst && N0.getOpcode() == ISD::SHL && 2000 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2001 isa<ConstantSDNode>(N0.getOperand(1)))) { 2002 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, 2003 N1, N0.getOperand(1)); 2004 AddToWorklist(C3.getNode()); 2005 return DAG.getNode(ISD::MUL, SDLoc(N), VT, 2006 N0.getOperand(0), C3); 2007 } 2008 2009 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 2010 // use. 2011 { 2012 SDValue Sh(nullptr,0), Y(nullptr,0); 2013 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 2014 if (N0.getOpcode() == ISD::SHL && 2015 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2016 isa<ConstantSDNode>(N0.getOperand(1))) && 2017 N0.getNode()->hasOneUse()) { 2018 Sh = N0; Y = N1; 2019 } else if (N1.getOpcode() == ISD::SHL && 2020 isa<ConstantSDNode>(N1.getOperand(1)) && 2021 N1.getNode()->hasOneUse()) { 2022 Sh = N1; Y = N0; 2023 } 2024 2025 if (Sh.getNode()) { 2026 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2027 Sh.getOperand(0), Y); 2028 return DAG.getNode(ISD::SHL, SDLoc(N), VT, 2029 Mul, Sh.getOperand(1)); 2030 } 2031 } 2032 2033 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 2034 if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 2035 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2036 isa<ConstantSDNode>(N0.getOperand(1)))) 2037 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 2038 DAG.getNode(ISD::MUL, SDLoc(N0), VT, 2039 N0.getOperand(0), N1), 2040 DAG.getNode(ISD::MUL, SDLoc(N1), VT, 2041 N0.getOperand(1), N1)); 2042 2043 // reassociate mul 2044 SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); 2045 if (RMUL.getNode()) 2046 return RMUL; 2047 2048 return SDValue(); 2049} 2050 2051SDValue DAGCombiner::visitSDIV(SDNode *N) { 2052 SDValue N0 = N->getOperand(0); 2053 SDValue N1 = N->getOperand(1); 2054 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2055 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2056 EVT VT = N->getValueType(0); 2057 2058 // fold vector ops 2059 if (VT.isVector()) { 2060 SDValue FoldedVOp = SimplifyVBinOp(N); 2061 if (FoldedVOp.getNode()) return FoldedVOp; 2062 } 2063 2064 // fold (sdiv c1, c2) -> c1/c2 2065 if (N0C && N1C && !N1C->isNullValue()) 2066 return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); 2067 // fold (sdiv X, 1) -> X 2068 if (N1C && N1C->getAPIntValue() == 1LL) 2069 return N0; 2070 // fold (sdiv X, -1) -> 0-X 2071 if (N1C && N1C->isAllOnesValue()) 2072 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 2073 DAG.getConstant(0, VT), N0); 2074 // If we know the sign bits of both operands are zero, strength reduce to a 2075 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 2076 if (!VT.isVector()) { 2077 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2078 return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), 2079 N0, N1); 2080 } 2081 2082 // fold (sdiv X, pow2) -> simple ops after legalize 2083 if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() || 2084 (-N1C->getAPIntValue()).isPowerOf2())) { 2085 // If dividing by powers of two is cheap, then don't perform the following 2086 // fold. 2087 if (TLI.isPow2SDivCheap()) 2088 return SDValue(); 2089 2090 // Target-specific implementation of sdiv x, pow2. 2091 SDValue Res = BuildSDIVPow2(N); 2092 if (Res.getNode()) 2093 return Res; 2094 2095 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); 2096 2097 // Splat the sign bit into the register 2098 SDValue SGN = 2099 DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, 2100 DAG.getConstant(VT.getScalarSizeInBits() - 1, 2101 getShiftAmountTy(N0.getValueType()))); 2102 AddToWorklist(SGN.getNode()); 2103 2104 // Add (N0 < 0) ? abs2 - 1 : 0; 2105 SDValue SRL = 2106 DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, 2107 DAG.getConstant(VT.getScalarSizeInBits() - lg2, 2108 getShiftAmountTy(SGN.getValueType()))); 2109 SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); 2110 AddToWorklist(SRL.getNode()); 2111 AddToWorklist(ADD.getNode()); // Divide by pow2 2112 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, 2113 DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); 2114 2115 // If we're dividing by a positive value, we're done. Otherwise, we must 2116 // negate the result. 2117 if (N1C->getAPIntValue().isNonNegative()) 2118 return SRA; 2119 2120 AddToWorklist(SRA.getNode()); 2121 return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); 2122 } 2123 2124 // if integer divide is expensive and we satisfy the requirements, emit an 2125 // alternate sequence. 2126 if (N1C && !TLI.isIntDivCheap()) { 2127 SDValue Op = BuildSDIV(N); 2128 if (Op.getNode()) return Op; 2129 } 2130 2131 // undef / X -> 0 2132 if (N0.getOpcode() == ISD::UNDEF) 2133 return DAG.getConstant(0, VT); 2134 // X / undef -> undef 2135 if (N1.getOpcode() == ISD::UNDEF) 2136 return N1; 2137 2138 return SDValue(); 2139} 2140 2141SDValue DAGCombiner::visitUDIV(SDNode *N) { 2142 SDValue N0 = N->getOperand(0); 2143 SDValue N1 = N->getOperand(1); 2144 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2145 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2146 EVT VT = N->getValueType(0); 2147 2148 // fold vector ops 2149 if (VT.isVector()) { 2150 SDValue FoldedVOp = SimplifyVBinOp(N); 2151 if (FoldedVOp.getNode()) return FoldedVOp; 2152 } 2153 2154 // fold (udiv c1, c2) -> c1/c2 2155 if (N0C && N1C && !N1C->isNullValue()) 2156 return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); 2157 // fold (udiv x, (1 << c)) -> x >>u c 2158 if (N1C && N1C->getAPIntValue().isPowerOf2()) 2159 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, 2160 DAG.getConstant(N1C->getAPIntValue().logBase2(), 2161 getShiftAmountTy(N0.getValueType()))); 2162 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 2163 if (N1.getOpcode() == ISD::SHL) { 2164 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 2165 if (SHC->getAPIntValue().isPowerOf2()) { 2166 EVT ADDVT = N1.getOperand(1).getValueType(); 2167 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT, 2168 N1.getOperand(1), 2169 DAG.getConstant(SHC->getAPIntValue() 2170 .logBase2(), 2171 ADDVT)); 2172 AddToWorklist(Add.getNode()); 2173 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); 2174 } 2175 } 2176 } 2177 // fold (udiv x, c) -> alternate 2178 if (N1C && !TLI.isIntDivCheap()) { 2179 SDValue Op = BuildUDIV(N); 2180 if (Op.getNode()) return Op; 2181 } 2182 2183 // undef / X -> 0 2184 if (N0.getOpcode() == ISD::UNDEF) 2185 return DAG.getConstant(0, VT); 2186 // X / undef -> undef 2187 if (N1.getOpcode() == ISD::UNDEF) 2188 return N1; 2189 2190 return SDValue(); 2191} 2192 2193SDValue DAGCombiner::visitSREM(SDNode *N) { 2194 SDValue N0 = N->getOperand(0); 2195 SDValue N1 = N->getOperand(1); 2196 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2197 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2198 EVT VT = N->getValueType(0); 2199 2200 // fold (srem c1, c2) -> c1%c2 2201 if (N0C && N1C && !N1C->isNullValue()) 2202 return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); 2203 // If we know the sign bits of both operands are zero, strength reduce to a 2204 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 2205 if (!VT.isVector()) { 2206 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2207 return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1); 2208 } 2209 2210 // If X/C can be simplified by the division-by-constant logic, lower 2211 // X%C to the equivalent of X-X/C*C. 2212 if (N1C && !N1C->isNullValue()) { 2213 SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); 2214 AddToWorklist(Div.getNode()); 2215 SDValue OptimizedDiv = combine(Div.getNode()); 2216 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2217 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2218 OptimizedDiv, N1); 2219 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); 2220 AddToWorklist(Mul.getNode()); 2221 return Sub; 2222 } 2223 } 2224 2225 // undef % X -> 0 2226 if (N0.getOpcode() == ISD::UNDEF) 2227 return DAG.getConstant(0, VT); 2228 // X % undef -> undef 2229 if (N1.getOpcode() == ISD::UNDEF) 2230 return N1; 2231 2232 return SDValue(); 2233} 2234 2235SDValue DAGCombiner::visitUREM(SDNode *N) { 2236 SDValue N0 = N->getOperand(0); 2237 SDValue N1 = N->getOperand(1); 2238 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2239 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2240 EVT VT = N->getValueType(0); 2241 2242 // fold (urem c1, c2) -> c1%c2 2243 if (N0C && N1C && !N1C->isNullValue()) 2244 return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); 2245 // fold (urem x, pow2) -> (and x, pow2-1) 2246 if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) 2247 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, 2248 DAG.getConstant(N1C->getAPIntValue()-1,VT)); 2249 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 2250 if (N1.getOpcode() == ISD::SHL) { 2251 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 2252 if (SHC->getAPIntValue().isPowerOf2()) { 2253 SDValue Add = 2254 DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, 2255 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), 2256 VT)); 2257 AddToWorklist(Add.getNode()); 2258 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); 2259 } 2260 } 2261 } 2262 2263 // If X/C can be simplified by the division-by-constant logic, lower 2264 // X%C to the equivalent of X-X/C*C. 2265 if (N1C && !N1C->isNullValue()) { 2266 SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); 2267 AddToWorklist(Div.getNode()); 2268 SDValue OptimizedDiv = combine(Div.getNode()); 2269 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2270 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2271 OptimizedDiv, N1); 2272 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); 2273 AddToWorklist(Mul.getNode()); 2274 return Sub; 2275 } 2276 } 2277 2278 // undef % X -> 0 2279 if (N0.getOpcode() == ISD::UNDEF) 2280 return DAG.getConstant(0, VT); 2281 // X % undef -> undef 2282 if (N1.getOpcode() == ISD::UNDEF) 2283 return N1; 2284 2285 return SDValue(); 2286} 2287 2288SDValue DAGCombiner::visitMULHS(SDNode *N) { 2289 SDValue N0 = N->getOperand(0); 2290 SDValue N1 = N->getOperand(1); 2291 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2292 EVT VT = N->getValueType(0); 2293 SDLoc DL(N); 2294 2295 // fold (mulhs x, 0) -> 0 2296 if (N1C && N1C->isNullValue()) 2297 return N1; 2298 // fold (mulhs x, 1) -> (sra x, size(x)-1) 2299 if (N1C && N1C->getAPIntValue() == 1) 2300 return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0, 2301 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 2302 getShiftAmountTy(N0.getValueType()))); 2303 // fold (mulhs x, undef) -> 0 2304 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2305 return DAG.getConstant(0, VT); 2306 2307 // If the type twice as wide is legal, transform the mulhs to a wider multiply 2308 // plus a shift. 2309 if (VT.isSimple() && !VT.isVector()) { 2310 MVT Simple = VT.getSimpleVT(); 2311 unsigned SimpleSize = Simple.getSizeInBits(); 2312 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2313 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2314 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 2315 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 2316 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2317 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2318 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2319 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2320 } 2321 } 2322 2323 return SDValue(); 2324} 2325 2326SDValue DAGCombiner::visitMULHU(SDNode *N) { 2327 SDValue N0 = N->getOperand(0); 2328 SDValue N1 = N->getOperand(1); 2329 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2330 EVT VT = N->getValueType(0); 2331 SDLoc DL(N); 2332 2333 // fold (mulhu x, 0) -> 0 2334 if (N1C && N1C->isNullValue()) 2335 return N1; 2336 // fold (mulhu x, 1) -> 0 2337 if (N1C && N1C->getAPIntValue() == 1) 2338 return DAG.getConstant(0, N0.getValueType()); 2339 // fold (mulhu x, undef) -> 0 2340 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2341 return DAG.getConstant(0, VT); 2342 2343 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2344 // plus a shift. 2345 if (VT.isSimple() && !VT.isVector()) { 2346 MVT Simple = VT.getSimpleVT(); 2347 unsigned SimpleSize = Simple.getSizeInBits(); 2348 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2349 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2350 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 2351 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 2352 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2353 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2354 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2355 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2356 } 2357 } 2358 2359 return SDValue(); 2360} 2361 2362/// Perform optimizations common to nodes that compute two values. LoOp and HiOp 2363/// give the opcodes for the two computations that are being performed. Return 2364/// true if a simplification was made. 2365SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 2366 unsigned HiOp) { 2367 // If the high half is not needed, just compute the low half. 2368 bool HiExists = N->hasAnyUseOfValue(1); 2369 if (!HiExists && 2370 (!LegalOperations || 2371 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { 2372 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2373 return CombineTo(N, Res, Res); 2374 } 2375 2376 // If the low half is not needed, just compute the high half. 2377 bool LoExists = N->hasAnyUseOfValue(0); 2378 if (!LoExists && 2379 (!LegalOperations || 2380 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 2381 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2382 return CombineTo(N, Res, Res); 2383 } 2384 2385 // If both halves are used, return as it is. 2386 if (LoExists && HiExists) 2387 return SDValue(); 2388 2389 // If the two computed results can be simplified separately, separate them. 2390 if (LoExists) { 2391 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2392 AddToWorklist(Lo.getNode()); 2393 SDValue LoOpt = combine(Lo.getNode()); 2394 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 2395 (!LegalOperations || 2396 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 2397 return CombineTo(N, LoOpt, LoOpt); 2398 } 2399 2400 if (HiExists) { 2401 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2402 AddToWorklist(Hi.getNode()); 2403 SDValue HiOpt = combine(Hi.getNode()); 2404 if (HiOpt.getNode() && HiOpt != Hi && 2405 (!LegalOperations || 2406 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 2407 return CombineTo(N, HiOpt, HiOpt); 2408 } 2409 2410 return SDValue(); 2411} 2412 2413SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 2414 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); 2415 if (Res.getNode()) return Res; 2416 2417 EVT VT = N->getValueType(0); 2418 SDLoc DL(N); 2419 2420 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2421 // plus a shift. 2422 if (VT.isSimple() && !VT.isVector()) { 2423 MVT Simple = VT.getSimpleVT(); 2424 unsigned SimpleSize = Simple.getSizeInBits(); 2425 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2426 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2427 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 2428 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 2429 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2430 // Compute the high part as N1. 2431 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2432 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2433 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2434 // Compute the low part as N0. 2435 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2436 return CombineTo(N, Lo, Hi); 2437 } 2438 } 2439 2440 return SDValue(); 2441} 2442 2443SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 2444 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); 2445 if (Res.getNode()) return Res; 2446 2447 EVT VT = N->getValueType(0); 2448 SDLoc DL(N); 2449 2450 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2451 // plus a shift. 2452 if (VT.isSimple() && !VT.isVector()) { 2453 MVT Simple = VT.getSimpleVT(); 2454 unsigned SimpleSize = Simple.getSizeInBits(); 2455 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2456 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2457 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 2458 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 2459 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2460 // Compute the high part as N1. 2461 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2462 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2463 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2464 // Compute the low part as N0. 2465 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2466 return CombineTo(N, Lo, Hi); 2467 } 2468 } 2469 2470 return SDValue(); 2471} 2472 2473SDValue DAGCombiner::visitSMULO(SDNode *N) { 2474 // (smulo x, 2) -> (saddo x, x) 2475 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2476 if (C2->getAPIntValue() == 2) 2477 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), 2478 N->getOperand(0), N->getOperand(0)); 2479 2480 return SDValue(); 2481} 2482 2483SDValue DAGCombiner::visitUMULO(SDNode *N) { 2484 // (umulo x, 2) -> (uaddo x, x) 2485 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2486 if (C2->getAPIntValue() == 2) 2487 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), 2488 N->getOperand(0), N->getOperand(0)); 2489 2490 return SDValue(); 2491} 2492 2493SDValue DAGCombiner::visitSDIVREM(SDNode *N) { 2494 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); 2495 if (Res.getNode()) return Res; 2496 2497 return SDValue(); 2498} 2499 2500SDValue DAGCombiner::visitUDIVREM(SDNode *N) { 2501 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); 2502 if (Res.getNode()) return Res; 2503 2504 return SDValue(); 2505} 2506 2507/// If this is a binary operator with two operands of the same opcode, try to 2508/// simplify it. 2509SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 2510 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2511 EVT VT = N0.getValueType(); 2512 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 2513 2514 // Bail early if none of these transforms apply. 2515 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 2516 2517 // For each of OP in AND/OR/XOR: 2518 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 2519 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 2520 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 2521 // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) 2522 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 2523 // 2524 // do not sink logical op inside of a vector extend, since it may combine 2525 // into a vsetcc. 2526 EVT Op0VT = N0.getOperand(0).getValueType(); 2527 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 2528 N0.getOpcode() == ISD::SIGN_EXTEND || 2529 N0.getOpcode() == ISD::BSWAP || 2530 // Avoid infinite looping with PromoteIntBinOp. 2531 (N0.getOpcode() == ISD::ANY_EXTEND && 2532 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 2533 (N0.getOpcode() == ISD::TRUNCATE && 2534 (!TLI.isZExtFree(VT, Op0VT) || 2535 !TLI.isTruncateFree(Op0VT, VT)) && 2536 TLI.isTypeLegal(Op0VT))) && 2537 !VT.isVector() && 2538 Op0VT == N1.getOperand(0).getValueType() && 2539 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 2540 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2541 N0.getOperand(0).getValueType(), 2542 N0.getOperand(0), N1.getOperand(0)); 2543 AddToWorklist(ORNode.getNode()); 2544 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); 2545 } 2546 2547 // For each of OP in SHL/SRL/SRA/AND... 2548 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 2549 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 2550 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 2551 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 2552 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 2553 N0.getOperand(1) == N1.getOperand(1)) { 2554 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2555 N0.getOperand(0).getValueType(), 2556 N0.getOperand(0), N1.getOperand(0)); 2557 AddToWorklist(ORNode.getNode()); 2558 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 2559 ORNode, N0.getOperand(1)); 2560 } 2561 2562 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 2563 // Only perform this optimization after type legalization and before 2564 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 2565 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 2566 // we don't want to undo this promotion. 2567 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 2568 // on scalars. 2569 if ((N0.getOpcode() == ISD::BITCAST || 2570 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && 2571 Level == AfterLegalizeTypes) { 2572 SDValue In0 = N0.getOperand(0); 2573 SDValue In1 = N1.getOperand(0); 2574 EVT In0Ty = In0.getValueType(); 2575 EVT In1Ty = In1.getValueType(); 2576 SDLoc DL(N); 2577 // If both incoming values are integers, and the original types are the 2578 // same. 2579 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 2580 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); 2581 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); 2582 AddToWorklist(Op.getNode()); 2583 return BC; 2584 } 2585 } 2586 2587 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 2588 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 2589 // If both shuffles use the same mask, and both shuffle within a single 2590 // vector, then it is worthwhile to move the swizzle after the operation. 2591 // The type-legalizer generates this pattern when loading illegal 2592 // vector types from memory. In many cases this allows additional shuffle 2593 // optimizations. 2594 // There are other cases where moving the shuffle after the xor/and/or 2595 // is profitable even if shuffles don't perform a swizzle. 2596 // If both shuffles use the same mask, and both shuffles have the same first 2597 // or second operand, then it might still be profitable to move the shuffle 2598 // after the xor/and/or operation. 2599 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { 2600 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); 2601 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); 2602 2603 assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && 2604 "Inputs to shuffles are not the same type"); 2605 2606 // Check that both shuffles use the same mask. The masks are known to be of 2607 // the same length because the result vector type is the same. 2608 // Check also that shuffles have only one use to avoid introducing extra 2609 // instructions. 2610 if (SVN0->hasOneUse() && SVN1->hasOneUse() && 2611 SVN0->getMask().equals(SVN1->getMask())) { 2612 SDValue ShOp = N0->getOperand(1); 2613 2614 // Don't try to fold this node if it requires introducing a 2615 // build vector of all zeros that might be illegal at this stage. 2616 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2617 if (!LegalTypes) 2618 ShOp = DAG.getConstant(0, VT); 2619 else 2620 ShOp = SDValue(); 2621 } 2622 2623 // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) 2624 // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) 2625 // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) 2626 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { 2627 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2628 N0->getOperand(0), N1->getOperand(0)); 2629 AddToWorklist(NewNode.getNode()); 2630 return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, 2631 &SVN0->getMask()[0]); 2632 } 2633 2634 // Don't try to fold this node if it requires introducing a 2635 // build vector of all zeros that might be illegal at this stage. 2636 ShOp = N0->getOperand(0); 2637 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2638 if (!LegalTypes) 2639 ShOp = DAG.getConstant(0, VT); 2640 else 2641 ShOp = SDValue(); 2642 } 2643 2644 // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) 2645 // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) 2646 // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) 2647 if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { 2648 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2649 N0->getOperand(1), N1->getOperand(1)); 2650 AddToWorklist(NewNode.getNode()); 2651 return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, 2652 &SVN0->getMask()[0]); 2653 } 2654 } 2655 } 2656 2657 return SDValue(); 2658} 2659 2660SDValue DAGCombiner::visitAND(SDNode *N) { 2661 SDValue N0 = N->getOperand(0); 2662 SDValue N1 = N->getOperand(1); 2663 SDValue LL, LR, RL, RR, CC0, CC1; 2664 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2665 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2666 EVT VT = N1.getValueType(); 2667 unsigned BitWidth = VT.getScalarType().getSizeInBits(); 2668 2669 // fold vector ops 2670 if (VT.isVector()) { 2671 SDValue FoldedVOp = SimplifyVBinOp(N); 2672 if (FoldedVOp.getNode()) return FoldedVOp; 2673 2674 // fold (and x, 0) -> 0, vector edition 2675 if (ISD::isBuildVectorAllZeros(N0.getNode())) 2676 // do not return N0, because undef node may exist in N0 2677 return DAG.getConstant( 2678 APInt::getNullValue( 2679 N0.getValueType().getScalarType().getSizeInBits()), 2680 N0.getValueType()); 2681 if (ISD::isBuildVectorAllZeros(N1.getNode())) 2682 // do not return N1, because undef node may exist in N1 2683 return DAG.getConstant( 2684 APInt::getNullValue( 2685 N1.getValueType().getScalarType().getSizeInBits()), 2686 N1.getValueType()); 2687 2688 // fold (and x, -1) -> x, vector edition 2689 if (ISD::isBuildVectorAllOnes(N0.getNode())) 2690 return N1; 2691 if (ISD::isBuildVectorAllOnes(N1.getNode())) 2692 return N0; 2693 } 2694 2695 // fold (and x, undef) -> 0 2696 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2697 return DAG.getConstant(0, VT); 2698 // fold (and c1, c2) -> c1&c2 2699 if (N0C && N1C) 2700 return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); 2701 // canonicalize constant to RHS 2702 if (N0C && !N1C) 2703 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); 2704 // fold (and x, -1) -> x 2705 if (N1C && N1C->isAllOnesValue()) 2706 return N0; 2707 // if (and x, c) is known to be zero, return 0 2708 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 2709 APInt::getAllOnesValue(BitWidth))) 2710 return DAG.getConstant(0, VT); 2711 // reassociate and 2712 SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); 2713 if (RAND.getNode()) 2714 return RAND; 2715 // fold (and (or x, C), D) -> D if (C & D) == D 2716 if (N1C && N0.getOpcode() == ISD::OR) 2717 if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 2718 if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) 2719 return N1; 2720 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 2721 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 2722 SDValue N0Op0 = N0.getOperand(0); 2723 APInt Mask = ~N1C->getAPIntValue(); 2724 Mask = Mask.trunc(N0Op0.getValueSizeInBits()); 2725 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 2726 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 2727 N0.getValueType(), N0Op0); 2728 2729 // Replace uses of the AND with uses of the Zero extend node. 2730 CombineTo(N, Zext); 2731 2732 // We actually want to replace all uses of the any_extend with the 2733 // zero_extend, to avoid duplicating things. This will later cause this 2734 // AND to be folded. 2735 CombineTo(N0.getNode(), Zext); 2736 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2737 } 2738 } 2739 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 2740 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must 2741 // already be zero by virtue of the width of the base type of the load. 2742 // 2743 // the 'X' node here can either be nothing or an extract_vector_elt to catch 2744 // more cases. 2745 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 2746 N0.getOperand(0).getOpcode() == ISD::LOAD) || 2747 N0.getOpcode() == ISD::LOAD) { 2748 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 2749 N0 : N0.getOperand(0) ); 2750 2751 // Get the constant (if applicable) the zero'th operand is being ANDed with. 2752 // This can be a pure constant or a vector splat, in which case we treat the 2753 // vector as a scalar and use the splat value. 2754 APInt Constant = APInt::getNullValue(1); 2755 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 2756 Constant = C->getAPIntValue(); 2757 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { 2758 APInt SplatValue, SplatUndef; 2759 unsigned SplatBitSize; 2760 bool HasAnyUndefs; 2761 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 2762 SplatBitSize, HasAnyUndefs); 2763 if (IsSplat) { 2764 // Undef bits can contribute to a possible optimisation if set, so 2765 // set them. 2766 SplatValue |= SplatUndef; 2767 2768 // The splat value may be something like "0x00FFFFFF", which means 0 for 2769 // the first vector value and FF for the rest, repeating. We need a mask 2770 // that will apply equally to all members of the vector, so AND all the 2771 // lanes of the constant together. 2772 EVT VT = Vector->getValueType(0); 2773 unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); 2774 2775 // If the splat value has been compressed to a bitlength lower 2776 // than the size of the vector lane, we need to re-expand it to 2777 // the lane size. 2778 if (BitWidth > SplatBitSize) 2779 for (SplatValue = SplatValue.zextOrTrunc(BitWidth); 2780 SplatBitSize < BitWidth; 2781 SplatBitSize = SplatBitSize * 2) 2782 SplatValue |= SplatValue.shl(SplatBitSize); 2783
|
2794 } 2795 } 2796 2797 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 2798 // actually legal and isn't going to get expanded, else this is a false 2799 // optimisation. 2800 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 2801 Load->getValueType(0), 2802 Load->getMemoryVT()); 2803 2804 // Resize the constant to the same size as the original memory access before 2805 // extension. If it is still the AllOnesValue then this AND is completely 2806 // unneeded. 2807 Constant = 2808 Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); 2809 2810 bool B; 2811 switch (Load->getExtensionType()) { 2812 default: B = false; break; 2813 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 2814 case ISD::ZEXTLOAD: 2815 case ISD::NON_EXTLOAD: B = true; break; 2816 } 2817 2818 if (B && Constant.isAllOnesValue()) { 2819 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 2820 // preserve semantics once we get rid of the AND. 2821 SDValue NewLoad(Load, 0); 2822 if (Load->getExtensionType() == ISD::EXTLOAD) { 2823 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 2824 Load->getValueType(0), SDLoc(Load), 2825 Load->getChain(), Load->getBasePtr(), 2826 Load->getOffset(), Load->getMemoryVT(), 2827 Load->getMemOperand()); 2828 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 2829 if (Load->getNumValues() == 3) { 2830 // PRE/POST_INC loads have 3 values. 2831 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 2832 NewLoad.getValue(2) }; 2833 CombineTo(Load, To, 3, true); 2834 } else { 2835 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 2836 } 2837 } 2838 2839 // Fold the AND away, taking care not to fold to the old load node if we 2840 // replaced it. 2841 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 2842 2843 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2844 } 2845 } 2846 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 2847 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2848 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2849 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2850 2851 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2852 LL.getValueType().isInteger()) { 2853 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 2854 if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { 2855 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2856 LR.getValueType(), LL, RL); 2857 AddToWorklist(ORNode.getNode()); 2858 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 2859 } 2860 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 2861 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { 2862 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), 2863 LR.getValueType(), LL, RL); 2864 AddToWorklist(ANDNode.getNode()); 2865 return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); 2866 } 2867 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 2868 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { 2869 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2870 LR.getValueType(), LL, RL); 2871 AddToWorklist(ORNode.getNode()); 2872 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 2873 } 2874 } 2875 // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) 2876 if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && 2877 Op0 == Op1 && LL.getValueType().isInteger() && 2878 Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && 2879 cast<ConstantSDNode>(RR)->isAllOnesValue()) || 2880 (cast<ConstantSDNode>(LR)->isAllOnesValue() && 2881 cast<ConstantSDNode>(RR)->isNullValue()))) { 2882 SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), 2883 LL, DAG.getConstant(1, LL.getValueType())); 2884 AddToWorklist(ADDNode.getNode()); 2885 return DAG.getSetCC(SDLoc(N), VT, ADDNode, 2886 DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); 2887 } 2888 // canonicalize equivalent to ll == rl 2889 if (LL == RR && LR == RL) { 2890 Op1 = ISD::getSetCCSwappedOperands(Op1); 2891 std::swap(RL, RR); 2892 } 2893 if (LL == RL && LR == RR) { 2894 bool isInteger = LL.getValueType().isInteger(); 2895 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 2896 if (Result != ISD::SETCC_INVALID && 2897 (!LegalOperations || 2898 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 2899 TLI.isOperationLegal(ISD::SETCC, 2900 getSetCCResultType(N0.getSimpleValueType()))))) 2901 return DAG.getSetCC(SDLoc(N), N0.getValueType(), 2902 LL, LR, Result); 2903 } 2904 } 2905 2906 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 2907 if (N0.getOpcode() == N1.getOpcode()) { 2908 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 2909 if (Tmp.getNode()) return Tmp; 2910 } 2911 2912 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 2913 // fold (and (sra)) -> (and (srl)) when possible. 2914 if (!VT.isVector() && 2915 SimplifyDemandedBits(SDValue(N, 0))) 2916 return SDValue(N, 0); 2917 2918 // fold (zext_inreg (extload x)) -> (zextload x) 2919 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 2920 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 2921 EVT MemVT = LN0->getMemoryVT(); 2922 // If we zero all the possible extended bits, then we can turn this into 2923 // a zextload if we are running before legalize or the operation is legal. 2924 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2925 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2926 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2927 ((!LegalOperations && !LN0->isVolatile()) || 2928 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 2929 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 2930 LN0->getChain(), LN0->getBasePtr(), 2931 MemVT, LN0->getMemOperand()); 2932 AddToWorklist(N); 2933 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2934 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2935 } 2936 } 2937 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 2938 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 2939 N0.hasOneUse()) { 2940 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 2941 EVT MemVT = LN0->getMemoryVT(); 2942 // If we zero all the possible extended bits, then we can turn this into 2943 // a zextload if we are running before legalize or the operation is legal. 2944 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2945 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2946 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2947 ((!LegalOperations && !LN0->isVolatile()) || 2948 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 2949 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 2950 LN0->getChain(), LN0->getBasePtr(), 2951 MemVT, LN0->getMemOperand()); 2952 AddToWorklist(N); 2953 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2954 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2955 } 2956 } 2957 2958 // fold (and (load x), 255) -> (zextload x, i8) 2959 // fold (and (extload x, i16), 255) -> (zextload x, i8) 2960 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 2961 if (N1C && (N0.getOpcode() == ISD::LOAD || 2962 (N0.getOpcode() == ISD::ANY_EXTEND && 2963 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 2964 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; 2965 LoadSDNode *LN0 = HasAnyExt 2966 ? cast<LoadSDNode>(N0.getOperand(0)) 2967 : cast<LoadSDNode>(N0); 2968 if (LN0->getExtensionType() != ISD::SEXTLOAD && 2969 LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { 2970 uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); 2971 if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ 2972 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 2973 EVT LoadedVT = LN0->getMemoryVT(); 2974 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2975 2976 if (ExtVT == LoadedVT && 2977 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, 2978 ExtVT))) { 2979 2980 SDValue NewLoad = 2981 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 2982 LN0->getChain(), LN0->getBasePtr(), ExtVT, 2983 LN0->getMemOperand()); 2984 AddToWorklist(N); 2985 CombineTo(LN0, NewLoad, NewLoad.getValue(1)); 2986 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2987 } 2988 2989 // Do not change the width of a volatile load. 2990 // Do not generate loads of non-round integer types since these can 2991 // be expensive (and would be wrong if the type is not byte sized). 2992 if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && 2993 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, 2994 ExtVT))) { 2995 EVT PtrType = LN0->getOperand(1).getValueType(); 2996 2997 unsigned Alignment = LN0->getAlignment(); 2998 SDValue NewPtr = LN0->getBasePtr(); 2999 3000 // For big endian targets, we need to add an offset to the pointer 3001 // to load the correct bytes. For little endian systems, we merely 3002 // need to read fewer bytes from the same pointer. 3003 if (TLI.isBigEndian()) { 3004 unsigned LVTStoreBytes = LoadedVT.getStoreSize(); 3005 unsigned EVTStoreBytes = ExtVT.getStoreSize(); 3006 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; 3007 NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, 3008 NewPtr, DAG.getConstant(PtrOff, PtrType)); 3009 Alignment = MinAlign(Alignment, PtrOff); 3010 } 3011 3012 AddToWorklist(NewPtr.getNode()); 3013 3014 SDValue Load = 3015 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 3016 LN0->getChain(), NewPtr, 3017 LN0->getPointerInfo(), 3018 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 3019 LN0->isInvariant(), Alignment, LN0->getAAInfo()); 3020 AddToWorklist(N); 3021 CombineTo(LN0, Load, Load.getValue(1)); 3022 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3023 } 3024 } 3025 } 3026 } 3027 3028 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 3029 VT.getSizeInBits() <= 64) { 3030 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 3031 APInt ADDC = ADDI->getAPIntValue(); 3032 if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 3033 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 3034 // immediate for an add, but it is legal if its top c2 bits are set, 3035 // transform the ADD so the immediate doesn't need to be materialized 3036 // in a register. 3037 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 3038 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 3039 SRLI->getZExtValue()); 3040 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 3041 ADDC |= Mask; 3042 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 3043 SDValue NewAdd = 3044 DAG.getNode(ISD::ADD, SDLoc(N0), VT, 3045 N0.getOperand(0), DAG.getConstant(ADDC, VT)); 3046 CombineTo(N0.getNode(), NewAdd); 3047 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3048 } 3049 } 3050 } 3051 } 3052 } 3053 } 3054 3055 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) 3056 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { 3057 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 3058 N0.getOperand(1), false); 3059 if (BSwap.getNode()) 3060 return BSwap; 3061 } 3062 3063 return SDValue(); 3064} 3065 3066/// Match (a >> 8) | (a << 8) as (bswap a) >> 16. 3067SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 3068 bool DemandHighBits) { 3069 if (!LegalOperations) 3070 return SDValue(); 3071 3072 EVT VT = N->getValueType(0); 3073 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 3074 return SDValue(); 3075 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3076 return SDValue(); 3077 3078 // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) 3079 bool LookPassAnd0 = false; 3080 bool LookPassAnd1 = false; 3081 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 3082 std::swap(N0, N1); 3083 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 3084 std::swap(N0, N1); 3085 if (N0.getOpcode() == ISD::AND) { 3086 if (!N0.getNode()->hasOneUse()) 3087 return SDValue(); 3088 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3089 if (!N01C || N01C->getZExtValue() != 0xFF00) 3090 return SDValue(); 3091 N0 = N0.getOperand(0); 3092 LookPassAnd0 = true; 3093 } 3094 3095 if (N1.getOpcode() == ISD::AND) { 3096 if (!N1.getNode()->hasOneUse()) 3097 return SDValue(); 3098 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3099 if (!N11C || N11C->getZExtValue() != 0xFF) 3100 return SDValue(); 3101 N1 = N1.getOperand(0); 3102 LookPassAnd1 = true; 3103 } 3104 3105 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 3106 std::swap(N0, N1); 3107 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 3108 return SDValue(); 3109 if (!N0.getNode()->hasOneUse() || 3110 !N1.getNode()->hasOneUse()) 3111 return SDValue(); 3112 3113 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3114 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3115 if (!N01C || !N11C) 3116 return SDValue(); 3117 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) 3118 return SDValue(); 3119 3120 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 3121 SDValue N00 = N0->getOperand(0); 3122 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 3123 if (!N00.getNode()->hasOneUse()) 3124 return SDValue(); 3125 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 3126 if (!N001C || N001C->getZExtValue() != 0xFF) 3127 return SDValue(); 3128 N00 = N00.getOperand(0); 3129 LookPassAnd0 = true; 3130 } 3131 3132 SDValue N10 = N1->getOperand(0); 3133 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 3134 if (!N10.getNode()->hasOneUse()) 3135 return SDValue(); 3136 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 3137 if (!N101C || N101C->getZExtValue() != 0xFF00) 3138 return SDValue(); 3139 N10 = N10.getOperand(0); 3140 LookPassAnd1 = true; 3141 } 3142 3143 if (N00 != N10) 3144 return SDValue(); 3145 3146 // Make sure everything beyond the low halfword gets set to zero since the SRL 3147 // 16 will clear the top bits. 3148 unsigned OpSizeInBits = VT.getSizeInBits(); 3149 if (DemandHighBits && OpSizeInBits > 16) { 3150 // If the left-shift isn't masked out then the only way this is a bswap is 3151 // if all bits beyond the low 8 are 0. In that case the entire pattern 3152 // reduces to a left shift anyway: leave it for other parts of the combiner. 3153 if (!LookPassAnd0) 3154 return SDValue(); 3155 3156 // However, if the right shift isn't masked out then it might be because 3157 // it's not needed. See if we can spot that too. 3158 if (!LookPassAnd1 && 3159 !DAG.MaskedValueIsZero( 3160 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) 3161 return SDValue(); 3162 } 3163 3164 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); 3165 if (OpSizeInBits > 16) 3166 Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res, 3167 DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); 3168 return Res; 3169} 3170 3171/// Return true if the specified node is an element that makes up a 32-bit 3172/// packed halfword byteswap. 3173/// ((x & 0x000000ff) << 8) | 3174/// ((x & 0x0000ff00) >> 8) | 3175/// ((x & 0x00ff0000) << 8) | 3176/// ((x & 0xff000000) >> 8) 3177static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { 3178 if (!N.getNode()->hasOneUse()) 3179 return false; 3180 3181 unsigned Opc = N.getOpcode(); 3182 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 3183 return false; 3184 3185 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3186 if (!N1C) 3187 return false; 3188 3189 unsigned Num; 3190 switch (N1C->getZExtValue()) { 3191 default: 3192 return false; 3193 case 0xFF: Num = 0; break; 3194 case 0xFF00: Num = 1; break; 3195 case 0xFF0000: Num = 2; break; 3196 case 0xFF000000: Num = 3; break; 3197 } 3198 3199 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 3200 SDValue N0 = N.getOperand(0); 3201 if (Opc == ISD::AND) { 3202 if (Num == 0 || Num == 2) { 3203 // (x >> 8) & 0xff 3204 // (x >> 8) & 0xff0000 3205 if (N0.getOpcode() != ISD::SRL) 3206 return false; 3207 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3208 if (!C || C->getZExtValue() != 8) 3209 return false; 3210 } else { 3211 // (x << 8) & 0xff00 3212 // (x << 8) & 0xff000000 3213 if (N0.getOpcode() != ISD::SHL) 3214 return false; 3215 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3216 if (!C || C->getZExtValue() != 8) 3217 return false; 3218 } 3219 } else if (Opc == ISD::SHL) { 3220 // (x & 0xff) << 8 3221 // (x & 0xff0000) << 8 3222 if (Num != 0 && Num != 2) 3223 return false; 3224 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3225 if (!C || C->getZExtValue() != 8) 3226 return false; 3227 } else { // Opc == ISD::SRL 3228 // (x & 0xff00) >> 8 3229 // (x & 0xff000000) >> 8 3230 if (Num != 1 && Num != 3) 3231 return false; 3232 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3233 if (!C || C->getZExtValue() != 8) 3234 return false; 3235 } 3236 3237 if (Parts[Num]) 3238 return false; 3239 3240 Parts[Num] = N0.getOperand(0).getNode(); 3241 return true; 3242} 3243 3244/// Match a 32-bit packed halfword bswap. That is 3245/// ((x & 0x000000ff) << 8) | 3246/// ((x & 0x0000ff00) >> 8) | 3247/// ((x & 0x00ff0000) << 8) | 3248/// ((x & 0xff000000) >> 8) 3249/// => (rotl (bswap x), 16) 3250SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { 3251 if (!LegalOperations) 3252 return SDValue(); 3253 3254 EVT VT = N->getValueType(0); 3255 if (VT != MVT::i32) 3256 return SDValue(); 3257 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3258 return SDValue(); 3259 3260 // Look for either 3261 // (or (or (and), (and)), (or (and), (and))) 3262 // (or (or (or (and), (and)), (and)), (and)) 3263 if (N0.getOpcode() != ISD::OR) 3264 return SDValue(); 3265 SDValue N00 = N0.getOperand(0); 3266 SDValue N01 = N0.getOperand(1); 3267 SDNode *Parts[4] = {}; 3268 3269 if (N1.getOpcode() == ISD::OR && 3270 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { 3271 // (or (or (and), (and)), (or (and), (and))) 3272 SDValue N000 = N00.getOperand(0); 3273 if (!isBSwapHWordElement(N000, Parts)) 3274 return SDValue(); 3275 3276 SDValue N001 = N00.getOperand(1); 3277 if (!isBSwapHWordElement(N001, Parts)) 3278 return SDValue(); 3279 SDValue N010 = N01.getOperand(0); 3280 if (!isBSwapHWordElement(N010, Parts)) 3281 return SDValue(); 3282 SDValue N011 = N01.getOperand(1); 3283 if (!isBSwapHWordElement(N011, Parts)) 3284 return SDValue(); 3285 } else { 3286 // (or (or (or (and), (and)), (and)), (and)) 3287 if (!isBSwapHWordElement(N1, Parts)) 3288 return SDValue(); 3289 if (!isBSwapHWordElement(N01, Parts)) 3290 return SDValue(); 3291 if (N00.getOpcode() != ISD::OR) 3292 return SDValue(); 3293 SDValue N000 = N00.getOperand(0); 3294 if (!isBSwapHWordElement(N000, Parts)) 3295 return SDValue(); 3296 SDValue N001 = N00.getOperand(1); 3297 if (!isBSwapHWordElement(N001, Parts)) 3298 return SDValue(); 3299 } 3300 3301 // Make sure the parts are all coming from the same node. 3302 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) 3303 return SDValue(); 3304 3305 SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, 3306 SDValue(Parts[0],0)); 3307 3308 // Result of the bswap should be rotated by 16. If it's not legal, then 3309 // do (x << 16) | (x >> 16). 3310 SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); 3311 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 3312 return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt); 3313 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 3314 return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt); 3315 return DAG.getNode(ISD::OR, SDLoc(N), VT, 3316 DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt), 3317 DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); 3318} 3319 3320SDValue DAGCombiner::visitOR(SDNode *N) { 3321 SDValue N0 = N->getOperand(0); 3322 SDValue N1 = N->getOperand(1); 3323 SDValue LL, LR, RL, RR, CC0, CC1; 3324 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3325 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3326 EVT VT = N1.getValueType(); 3327 3328 // fold vector ops 3329 if (VT.isVector()) { 3330 SDValue FoldedVOp = SimplifyVBinOp(N); 3331 if (FoldedVOp.getNode()) return FoldedVOp; 3332 3333 // fold (or x, 0) -> x, vector edition 3334 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3335 return N1; 3336 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3337 return N0; 3338 3339 // fold (or x, -1) -> -1, vector edition 3340 if (ISD::isBuildVectorAllOnes(N0.getNode())) 3341 // do not return N0, because undef node may exist in N0 3342 return DAG.getConstant( 3343 APInt::getAllOnesValue( 3344 N0.getValueType().getScalarType().getSizeInBits()), 3345 N0.getValueType()); 3346 if (ISD::isBuildVectorAllOnes(N1.getNode())) 3347 // do not return N1, because undef node may exist in N1 3348 return DAG.getConstant( 3349 APInt::getAllOnesValue( 3350 N1.getValueType().getScalarType().getSizeInBits()), 3351 N1.getValueType()); 3352 3353 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) 3354 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) 3355 // Do this only if the resulting shuffle is legal. 3356 if (isa<ShuffleVectorSDNode>(N0) && 3357 isa<ShuffleVectorSDNode>(N1) && 3358 // Avoid folding a node with illegal type. 3359 TLI.isTypeLegal(VT) && 3360 N0->getOperand(1) == N1->getOperand(1) && 3361 ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { 3362 bool CanFold = true; 3363 unsigned NumElts = VT.getVectorNumElements(); 3364 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); 3365 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); 3366 // We construct two shuffle masks: 3367 // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand 3368 // and N1 as the second operand. 3369 // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand 3370 // and N0 as the second operand. 3371 // We do this because OR is commutable and therefore there might be 3372 // two ways to fold this node into a shuffle. 3373 SmallVector<int,4> Mask1; 3374 SmallVector<int,4> Mask2; 3375 3376 for (unsigned i = 0; i != NumElts && CanFold; ++i) { 3377 int M0 = SV0->getMaskElt(i); 3378 int M1 = SV1->getMaskElt(i); 3379 3380 // Both shuffle indexes are undef. Propagate Undef. 3381 if (M0 < 0 && M1 < 0) { 3382 Mask1.push_back(M0); 3383 Mask2.push_back(M0); 3384 continue; 3385 } 3386 3387 if (M0 < 0 || M1 < 0 || 3388 (M0 < (int)NumElts && M1 < (int)NumElts) || 3389 (M0 >= (int)NumElts && M1 >= (int)NumElts)) { 3390 CanFold = false; 3391 break; 3392 } 3393 3394 Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); 3395 Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); 3396 } 3397 3398 if (CanFold) { 3399 // Fold this sequence only if the resulting shuffle is 'legal'. 3400 if (TLI.isShuffleMaskLegal(Mask1, VT)) 3401 return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), 3402 N1->getOperand(0), &Mask1[0]); 3403 if (TLI.isShuffleMaskLegal(Mask2, VT)) 3404 return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), 3405 N0->getOperand(0), &Mask2[0]); 3406 } 3407 } 3408 } 3409 3410 // fold (or x, undef) -> -1 3411 if (!LegalOperations && 3412 (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { 3413 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; 3414 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); 3415 } 3416 // fold (or c1, c2) -> c1|c2 3417 if (N0C && N1C) 3418 return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); 3419 // canonicalize constant to RHS 3420 if (N0C && !N1C) 3421 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); 3422 // fold (or x, 0) -> x 3423 if (N1C && N1C->isNullValue()) 3424 return N0; 3425 // fold (or x, -1) -> -1 3426 if (N1C && N1C->isAllOnesValue()) 3427 return N1; 3428 // fold (or x, c) -> c iff (x & ~c) == 0 3429 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 3430 return N1; 3431 3432 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 3433 SDValue BSwap = MatchBSwapHWord(N, N0, N1); 3434 if (BSwap.getNode()) 3435 return BSwap; 3436 BSwap = MatchBSwapHWordLow(N, N0, N1); 3437 if (BSwap.getNode()) 3438 return BSwap; 3439 3440 // reassociate or 3441 SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); 3442 if (ROR.getNode()) 3443 return ROR; 3444 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 3445 // iff (c1 & c2) == 0. 3446 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3447 isa<ConstantSDNode>(N0.getOperand(1))) { 3448 ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); 3449 if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { 3450 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)) 3451 return DAG.getNode( 3452 ISD::AND, SDLoc(N), VT, 3453 DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); 3454 return SDValue(); 3455 } 3456 } 3457 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) 3458 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 3459 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 3460 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 3461 3462 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 3463 LL.getValueType().isInteger()) { 3464 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) 3465 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) 3466 if (cast<ConstantSDNode>(LR)->isNullValue() && 3467 (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { 3468 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), 3469 LR.getValueType(), LL, RL); 3470 AddToWorklist(ORNode.getNode()); 3471 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 3472 } 3473 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) 3474 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) 3475 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && 3476 (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { 3477 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), 3478 LR.getValueType(), LL, RL); 3479 AddToWorklist(ANDNode.getNode()); 3480 return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); 3481 } 3482 } 3483 // canonicalize equivalent to ll == rl 3484 if (LL == RR && LR == RL) { 3485 Op1 = ISD::getSetCCSwappedOperands(Op1); 3486 std::swap(RL, RR); 3487 } 3488 if (LL == RL && LR == RR) { 3489 bool isInteger = LL.getValueType().isInteger(); 3490 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); 3491 if (Result != ISD::SETCC_INVALID && 3492 (!LegalOperations || 3493 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 3494 TLI.isOperationLegal(ISD::SETCC, 3495 getSetCCResultType(N0.getValueType()))))) 3496 return DAG.getSetCC(SDLoc(N), N0.getValueType(), 3497 LL, LR, Result); 3498 } 3499 } 3500 3501 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 3502 if (N0.getOpcode() == N1.getOpcode()) { 3503 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3504 if (Tmp.getNode()) return Tmp; 3505 } 3506 3507 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 3508 if (N0.getOpcode() == ISD::AND && 3509 N1.getOpcode() == ISD::AND && 3510 N0.getOperand(1).getOpcode() == ISD::Constant && 3511 N1.getOperand(1).getOpcode() == ISD::Constant && 3512 // Don't increase # computations. 3513 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 3514 // We can only do this xform if we know that bits from X that are set in C2 3515 // but not in C1 are already zero. Likewise for Y. 3516 const APInt &LHSMask = 3517 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 3518 const APInt &RHSMask = 3519 cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); 3520 3521 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 3522 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 3523 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 3524 N0.getOperand(0), N1.getOperand(0)); 3525 return DAG.getNode(ISD::AND, SDLoc(N), VT, X, 3526 DAG.getConstant(LHSMask | RHSMask, VT)); 3527 } 3528 } 3529 3530 // See if this is some rotate idiom. 3531 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) 3532 return SDValue(Rot, 0); 3533 3534 // Simplify the operands using demanded-bits information. 3535 if (!VT.isVector() && 3536 SimplifyDemandedBits(SDValue(N, 0))) 3537 return SDValue(N, 0); 3538 3539 return SDValue(); 3540} 3541 3542/// Match "(X shl/srl V1) & V2" where V2 may not be present. 3543static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 3544 if (Op.getOpcode() == ISD::AND) { 3545 if (isa<ConstantSDNode>(Op.getOperand(1))) { 3546 Mask = Op.getOperand(1); 3547 Op = Op.getOperand(0); 3548 } else { 3549 return false; 3550 } 3551 } 3552 3553 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 3554 Shift = Op; 3555 return true; 3556 } 3557 3558 return false; 3559} 3560 3561// Return true if we can prove that, whenever Neg and Pos are both in the 3562// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that 3563// for two opposing shifts shift1 and shift2 and a value X with OpBits bits: 3564// 3565// (or (shift1 X, Neg), (shift2 X, Pos)) 3566// 3567// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate 3568// in direction shift1 by Neg. The range [0, OpSize) means that we only need 3569// to consider shift amounts with defined behavior. 3570static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) { 3571 // If OpSize is a power of 2 then: 3572 // 3573 // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1) 3574 // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize). 3575 // 3576 // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check 3577 // for the stronger condition: 3578 // 3579 // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A] 3580 // 3581 // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1) 3582 // we can just replace Neg with Neg' for the rest of the function. 3583 // 3584 // In other cases we check for the even stronger condition: 3585 // 3586 // Neg == OpSize - Pos [B] 3587 // 3588 // for all Neg and Pos. Note that the (or ...) then invokes undefined 3589 // behavior if Pos == 0 (and consequently Neg == OpSize). 3590 // 3591 // We could actually use [A] whenever OpSize is a power of 2, but the 3592 // only extra cases that it would match are those uninteresting ones 3593 // where Neg and Pos are never in range at the same time. E.g. for 3594 // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) 3595 // as well as (sub 32, Pos), but: 3596 // 3597 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) 3598 // 3599 // always invokes undefined behavior for 32-bit X. 3600 // 3601 // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise. 3602 unsigned MaskLoBits = 0; 3603 if (Neg.getOpcode() == ISD::AND && 3604 isPowerOf2_64(OpSize) && 3605 Neg.getOperand(1).getOpcode() == ISD::Constant && 3606 cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) { 3607 Neg = Neg.getOperand(0); 3608 MaskLoBits = Log2_64(OpSize); 3609 } 3610 3611 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. 3612 if (Neg.getOpcode() != ISD::SUB) 3613 return 0; 3614 ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0)); 3615 if (!NegC) 3616 return 0; 3617 SDValue NegOp1 = Neg.getOperand(1); 3618 3619 // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with 3620 // Pos'. The truncation is redundant for the purpose of the equality. 3621 if (MaskLoBits && 3622 Pos.getOpcode() == ISD::AND && 3623 Pos.getOperand(1).getOpcode() == ISD::Constant && 3624 cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1) 3625 Pos = Pos.getOperand(0); 3626 3627 // The condition we need is now: 3628 // 3629 // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask 3630 // 3631 // If NegOp1 == Pos then we need: 3632 // 3633 // OpSize & Mask == NegC & Mask 3634 // 3635 // (because "x & Mask" is a truncation and distributes through subtraction). 3636 APInt Width; 3637 if (Pos == NegOp1) 3638 Width = NegC->getAPIntValue(); 3639 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. 3640 // Then the condition we want to prove becomes: 3641 // 3642 // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask 3643 // 3644 // which, again because "x & Mask" is a truncation, becomes: 3645 // 3646 // NegC & Mask == (OpSize - PosC) & Mask 3647 // OpSize & Mask == (NegC + PosC) & Mask 3648 else if (Pos.getOpcode() == ISD::ADD && 3649 Pos.getOperand(0) == NegOp1 && 3650 Pos.getOperand(1).getOpcode() == ISD::Constant) 3651 Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() + 3652 NegC->getAPIntValue()); 3653 else 3654 return false; 3655 3656 // Now we just need to check that OpSize & Mask == Width & Mask. 3657 if (MaskLoBits) 3658 // Opsize & Mask is 0 since Mask is Opsize - 1. 3659 return Width.getLoBits(MaskLoBits) == 0; 3660 return Width == OpSize; 3661} 3662 3663// A subroutine of MatchRotate used once we have found an OR of two opposite 3664// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces 3665// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the 3666// former being preferred if supported. InnerPos and InnerNeg are Pos and 3667// Neg with outer conversions stripped away. 3668SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, 3669 SDValue Neg, SDValue InnerPos, 3670 SDValue InnerNeg, unsigned PosOpcode, 3671 unsigned NegOpcode, SDLoc DL) { 3672 // fold (or (shl x, (*ext y)), 3673 // (srl x, (*ext (sub 32, y)))) -> 3674 // (rotl x, y) or (rotr x, (sub 32, y)) 3675 // 3676 // fold (or (shl x, (*ext (sub 32, y))), 3677 // (srl x, (*ext y))) -> 3678 // (rotr x, y) or (rotl x, (sub 32, y)) 3679 EVT VT = Shifted.getValueType(); 3680 if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) { 3681 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); 3682 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, 3683 HasPos ? Pos : Neg).getNode(); 3684 } 3685 3686 return nullptr; 3687} 3688 3689// MatchRotate - Handle an 'or' of two operands. If this is one of the many 3690// idioms for rotate, and if the target supports rotation instructions, generate 3691// a rot[lr]. 3692SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { 3693 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 3694 EVT VT = LHS.getValueType(); 3695 if (!TLI.isTypeLegal(VT)) return nullptr; 3696 3697 // The target must have at least one rotate flavor. 3698 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); 3699 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); 3700 if (!HasROTL && !HasROTR) return nullptr; 3701 3702 // Match "(X shl/srl V1) & V2" where V2 may not be present. 3703 SDValue LHSShift; // The shift. 3704 SDValue LHSMask; // AND value if any. 3705 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 3706 return nullptr; // Not part of a rotate. 3707 3708 SDValue RHSShift; // The shift. 3709 SDValue RHSMask; // AND value if any. 3710 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 3711 return nullptr; // Not part of a rotate. 3712 3713 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 3714 return nullptr; // Not shifting the same value. 3715 3716 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 3717 return nullptr; // Shifts must disagree. 3718 3719 // Canonicalize shl to left side in a shl/srl pair. 3720 if (RHSShift.getOpcode() == ISD::SHL) { 3721 std::swap(LHS, RHS); 3722 std::swap(LHSShift, RHSShift); 3723 std::swap(LHSMask , RHSMask ); 3724 } 3725 3726 unsigned OpSizeInBits = VT.getSizeInBits(); 3727 SDValue LHSShiftArg = LHSShift.getOperand(0); 3728 SDValue LHSShiftAmt = LHSShift.getOperand(1); 3729 SDValue RHSShiftArg = RHSShift.getOperand(0); 3730 SDValue RHSShiftAmt = RHSShift.getOperand(1); 3731 3732 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 3733 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 3734 if (LHSShiftAmt.getOpcode() == ISD::Constant && 3735 RHSShiftAmt.getOpcode() == ISD::Constant) { 3736 uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); 3737 uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); 3738 if ((LShVal + RShVal) != OpSizeInBits) 3739 return nullptr; 3740 3741 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 3742 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); 3743 3744 // If there is an AND of either shifted operand, apply it to the result. 3745 if (LHSMask.getNode() || RHSMask.getNode()) { 3746 APInt Mask = APInt::getAllOnesValue(OpSizeInBits); 3747 3748 if (LHSMask.getNode()) { 3749 APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); 3750 Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; 3751 } 3752 if (RHSMask.getNode()) { 3753 APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); 3754 Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; 3755 } 3756 3757 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); 3758 } 3759 3760 return Rot.getNode(); 3761 } 3762 3763 // If there is a mask here, and we have a variable shift, we can't be sure 3764 // that we're masking out the right stuff. 3765 if (LHSMask.getNode() || RHSMask.getNode()) 3766 return nullptr; 3767 3768 // If the shift amount is sign/zext/any-extended just peel it off. 3769 SDValue LExtOp0 = LHSShiftAmt; 3770 SDValue RExtOp0 = RHSShiftAmt; 3771 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 3772 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 3773 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 3774 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 3775 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 3776 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 3777 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 3778 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 3779 LExtOp0 = LHSShiftAmt.getOperand(0); 3780 RExtOp0 = RHSShiftAmt.getOperand(0); 3781 } 3782 3783 SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, 3784 LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); 3785 if (TryL) 3786 return TryL; 3787 3788 SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, 3789 RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); 3790 if (TryR) 3791 return TryR; 3792 3793 return nullptr; 3794} 3795 3796SDValue DAGCombiner::visitXOR(SDNode *N) { 3797 SDValue N0 = N->getOperand(0); 3798 SDValue N1 = N->getOperand(1); 3799 SDValue LHS, RHS, CC; 3800 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3801 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3802 EVT VT = N0.getValueType(); 3803 3804 // fold vector ops 3805 if (VT.isVector()) { 3806 SDValue FoldedVOp = SimplifyVBinOp(N); 3807 if (FoldedVOp.getNode()) return FoldedVOp; 3808 3809 // fold (xor x, 0) -> x, vector edition 3810 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3811 return N1; 3812 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3813 return N0; 3814 } 3815 3816 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 3817 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 3818 return DAG.getConstant(0, VT); 3819 // fold (xor x, undef) -> undef 3820 if (N0.getOpcode() == ISD::UNDEF) 3821 return N0; 3822 if (N1.getOpcode() == ISD::UNDEF) 3823 return N1; 3824 // fold (xor c1, c2) -> c1^c2 3825 if (N0C && N1C) 3826 return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); 3827 // canonicalize constant to RHS 3828 if (N0C && !N1C) 3829 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 3830 // fold (xor x, 0) -> x 3831 if (N1C && N1C->isNullValue()) 3832 return N0; 3833 // reassociate xor 3834 SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); 3835 if (RXOR.getNode()) 3836 return RXOR; 3837 3838 // fold !(x cc y) -> (x !cc y) 3839 if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { 3840 bool isInt = LHS.getValueType().isInteger(); 3841 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 3842 isInt); 3843 3844 if (!LegalOperations || 3845 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { 3846 switch (N0.getOpcode()) { 3847 default: 3848 llvm_unreachable("Unhandled SetCC Equivalent!"); 3849 case ISD::SETCC: 3850 return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); 3851 case ISD::SELECT_CC: 3852 return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), 3853 N0.getOperand(3), NotCC); 3854 } 3855 } 3856 } 3857 3858 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 3859 if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && 3860 N0.getNode()->hasOneUse() && 3861 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 3862 SDValue V = N0.getOperand(0); 3863 V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, 3864 DAG.getConstant(1, V.getValueType())); 3865 AddToWorklist(V.getNode()); 3866 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); 3867 } 3868 3869 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 3870 if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && 3871 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 3872 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3873 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 3874 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3875 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 3876 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 3877 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 3878 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 3879 } 3880 } 3881 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 3882 if (N1C && N1C->isAllOnesValue() && 3883 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 3884 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3885 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 3886 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3887 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 3888 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 3889 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 3890 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 3891 } 3892 } 3893 // fold (xor (and x, y), y) -> (and (not x), y) 3894 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3895 N0->getOperand(1) == N1) { 3896 SDValue X = N0->getOperand(0); 3897 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); 3898 AddToWorklist(NotX.getNode()); 3899 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); 3900 } 3901 // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) 3902 if (N1C && N0.getOpcode() == ISD::XOR) { 3903 ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 3904 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3905 if (N00C) 3906 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1), 3907 DAG.getConstant(N1C->getAPIntValue() ^ 3908 N00C->getAPIntValue(), VT)); 3909 if (N01C) 3910 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0), 3911 DAG.getConstant(N1C->getAPIntValue() ^ 3912 N01C->getAPIntValue(), VT)); 3913 } 3914 // fold (xor x, x) -> 0 3915 if (N0 == N1) 3916 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 3917 3918 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 3919 if (N0.getOpcode() == N1.getOpcode()) { 3920 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3921 if (Tmp.getNode()) return Tmp; 3922 } 3923 3924 // Simplify the expression using non-local knowledge. 3925 if (!VT.isVector() && 3926 SimplifyDemandedBits(SDValue(N, 0))) 3927 return SDValue(N, 0); 3928 3929 return SDValue(); 3930} 3931 3932/// Handle transforms common to the three shifts, when the shift amount is a 3933/// constant. 3934SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { 3935 // We can't and shouldn't fold opaque constants. 3936 if (Amt->isOpaque()) 3937 return SDValue(); 3938 3939 SDNode *LHS = N->getOperand(0).getNode(); 3940 if (!LHS->hasOneUse()) return SDValue(); 3941 3942 // We want to pull some binops through shifts, so that we have (and (shift)) 3943 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 3944 // thing happens with address calculations, so it's important to canonicalize 3945 // it. 3946 bool HighBitSet = false; // Can we transform this if the high bit is set? 3947 3948 switch (LHS->getOpcode()) { 3949 default: return SDValue(); 3950 case ISD::OR: 3951 case ISD::XOR: 3952 HighBitSet = false; // We can only transform sra if the high bit is clear. 3953 break; 3954 case ISD::AND: 3955 HighBitSet = true; // We can only transform sra if the high bit is set. 3956 break; 3957 case ISD::ADD: 3958 if (N->getOpcode() != ISD::SHL) 3959 return SDValue(); // only shl(add) not sr[al](add). 3960 HighBitSet = false; // We can only transform sra if the high bit is clear. 3961 break; 3962 } 3963 3964 // We require the RHS of the binop to be a constant and not opaque as well. 3965 ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); 3966 if (!BinOpCst || BinOpCst->isOpaque()) return SDValue(); 3967 3968 // FIXME: disable this unless the input to the binop is a shift by a constant. 3969 // If it is not a shift, it pessimizes some common cases like: 3970 // 3971 // void foo(int *X, int i) { X[i & 1235] = 1; } 3972 // int bar(int *X, int i) { return X[i & 255]; } 3973 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 3974 if ((BinOpLHSVal->getOpcode() != ISD::SHL && 3975 BinOpLHSVal->getOpcode() != ISD::SRA && 3976 BinOpLHSVal->getOpcode() != ISD::SRL) || 3977 !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) 3978 return SDValue(); 3979 3980 EVT VT = N->getValueType(0); 3981 3982 // If this is a signed shift right, and the high bit is modified by the 3983 // logical operation, do not perform the transformation. The highBitSet 3984 // boolean indicates the value of the high bit of the constant which would 3985 // cause it to be modified for this operation. 3986 if (N->getOpcode() == ISD::SRA) { 3987 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 3988 if (BinOpRHSSignSet != HighBitSet) 3989 return SDValue(); 3990 } 3991 3992 if (!TLI.isDesirableToCommuteWithShift(LHS)) 3993 return SDValue(); 3994 3995 // Fold the constants, shifting the binop RHS by the shift amount. 3996 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), 3997 N->getValueType(0), 3998 LHS->getOperand(1), N->getOperand(1)); 3999 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); 4000 4001 // Create the new shift. 4002 SDValue NewShift = DAG.getNode(N->getOpcode(), 4003 SDLoc(LHS->getOperand(0)), 4004 VT, LHS->getOperand(0), N->getOperand(1)); 4005 4006 // Create the new binop. 4007 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); 4008} 4009 4010SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { 4011 assert(N->getOpcode() == ISD::TRUNCATE); 4012 assert(N->getOperand(0).getOpcode() == ISD::AND); 4013 4014 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) 4015 if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { 4016 SDValue N01 = N->getOperand(0).getOperand(1); 4017 4018 if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { 4019 EVT TruncVT = N->getValueType(0); 4020 SDValue N00 = N->getOperand(0).getOperand(0); 4021 APInt TruncC = N01C->getAPIntValue(); 4022 TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); 4023 4024 return DAG.getNode(ISD::AND, SDLoc(N), TruncVT, 4025 DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00), 4026 DAG.getConstant(TruncC, TruncVT)); 4027 } 4028 } 4029 4030 return SDValue(); 4031} 4032 4033SDValue DAGCombiner::visitRotate(SDNode *N) { 4034 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). 4035 if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && 4036 N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { 4037 SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); 4038 if (NewOp1.getNode()) 4039 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), 4040 N->getOperand(0), NewOp1); 4041 } 4042 return SDValue(); 4043} 4044 4045SDValue DAGCombiner::visitSHL(SDNode *N) { 4046 SDValue N0 = N->getOperand(0); 4047 SDValue N1 = N->getOperand(1); 4048 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4049 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4050 EVT VT = N0.getValueType(); 4051 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 4052 4053 // fold vector ops 4054 if (VT.isVector()) { 4055 SDValue FoldedVOp = SimplifyVBinOp(N); 4056 if (FoldedVOp.getNode()) return FoldedVOp; 4057 4058 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); 4059 // If setcc produces all-one true value then: 4060 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) 4061 if (N1CV && N1CV->isConstant()) { 4062 if (N0.getOpcode() == ISD::AND) { 4063 SDValue N00 = N0->getOperand(0); 4064 SDValue N01 = N0->getOperand(1); 4065 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); 4066 4067 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && 4068 TLI.getBooleanContents(N00.getOperand(0).getValueType()) == 4069 TargetLowering::ZeroOrNegativeOneBooleanContent) { 4070 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV)) 4071 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); 4072 } 4073 } else { 4074 N1C = isConstOrConstSplat(N1); 4075 } 4076 } 4077 } 4078 4079 // fold (shl c1, c2) -> c1<<c2 4080 if (N0C && N1C) 4081 return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); 4082 // fold (shl 0, x) -> 0 4083 if (N0C && N0C->isNullValue()) 4084 return N0; 4085 // fold (shl x, c >= size(x)) -> undef 4086 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4087 return DAG.getUNDEF(VT); 4088 // fold (shl x, 0) -> x 4089 if (N1C && N1C->isNullValue()) 4090 return N0; 4091 // fold (shl undef, x) -> 0 4092 if (N0.getOpcode() == ISD::UNDEF) 4093 return DAG.getConstant(0, VT); 4094 // if (shl x, c) is known to be zero, return 0 4095 if (DAG.MaskedValueIsZero(SDValue(N, 0), 4096 APInt::getAllOnesValue(OpSizeInBits))) 4097 return DAG.getConstant(0, VT); 4098 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 4099 if (N1.getOpcode() == ISD::TRUNCATE && 4100 N1.getOperand(0).getOpcode() == ISD::AND) { 4101 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4102 if (NewOp1.getNode()) 4103 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); 4104 } 4105 4106 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4107 return SDValue(N, 0); 4108 4109 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 4110 if (N1C && N0.getOpcode() == ISD::SHL) { 4111 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4112 uint64_t c1 = N0C1->getZExtValue(); 4113 uint64_t c2 = N1C->getZExtValue(); 4114 if (c1 + c2 >= OpSizeInBits) 4115 return DAG.getConstant(0, VT); 4116 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), 4117 DAG.getConstant(c1 + c2, N1.getValueType())); 4118 } 4119 } 4120 4121 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 4122 // For this to be valid, the second form must not preserve any of the bits 4123 // that are shifted out by the inner shift in the first form. This means 4124 // the outer shift size must be >= the number of bits added by the ext. 4125 // As a corollary, we don't care what kind of ext it is. 4126 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || 4127 N0.getOpcode() == ISD::ANY_EXTEND || 4128 N0.getOpcode() == ISD::SIGN_EXTEND) && 4129 N0.getOperand(0).getOpcode() == ISD::SHL) { 4130 SDValue N0Op0 = N0.getOperand(0); 4131 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4132 uint64_t c1 = N0Op0C1->getZExtValue(); 4133 uint64_t c2 = N1C->getZExtValue(); 4134 EVT InnerShiftVT = N0Op0.getValueType(); 4135 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); 4136 if (c2 >= OpSizeInBits - InnerShiftSize) { 4137 if (c1 + c2 >= OpSizeInBits) 4138 return DAG.getConstant(0, VT); 4139 return DAG.getNode(ISD::SHL, SDLoc(N0), VT, 4140 DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, 4141 N0Op0->getOperand(0)), 4142 DAG.getConstant(c1 + c2, N1.getValueType())); 4143 } 4144 } 4145 } 4146 4147 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) 4148 // Only fold this if the inner zext has no other uses to avoid increasing 4149 // the total number of instructions. 4150 if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && 4151 N0.getOperand(0).getOpcode() == ISD::SRL) { 4152 SDValue N0Op0 = N0.getOperand(0); 4153 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4154 uint64_t c1 = N0Op0C1->getZExtValue(); 4155 if (c1 < VT.getScalarSizeInBits()) { 4156 uint64_t c2 = N1C->getZExtValue(); 4157 if (c1 == c2) { 4158 SDValue NewOp0 = N0.getOperand(0); 4159 EVT CountVT = NewOp0.getOperand(1).getValueType(); 4160 SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), 4161 NewOp0, DAG.getConstant(c2, CountVT)); 4162 AddToWorklist(NewSHL.getNode()); 4163 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); 4164 } 4165 } 4166 } 4167 } 4168 4169 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 4170 // (and (srl x, (sub c1, c2), MASK) 4171 // Only fold this if the inner shift has no other uses -- if it does, folding 4172 // this will increase the total number of instructions. 4173 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 4174 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4175 uint64_t c1 = N0C1->getZExtValue(); 4176 if (c1 < OpSizeInBits) { 4177 uint64_t c2 = N1C->getZExtValue(); 4178 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); 4179 SDValue Shift; 4180 if (c2 > c1) { 4181 Mask = Mask.shl(c2 - c1); 4182 Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), 4183 DAG.getConstant(c2 - c1, N1.getValueType())); 4184 } else { 4185 Mask = Mask.lshr(c1 - c2); 4186 Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), 4187 DAG.getConstant(c1 - c2, N1.getValueType())); 4188 } 4189 return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, 4190 DAG.getConstant(Mask, VT)); 4191 } 4192 } 4193 } 4194 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 4195 if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { 4196 unsigned BitSize = VT.getScalarSizeInBits(); 4197 SDValue HiBitsMask = 4198 DAG.getConstant(APInt::getHighBitsSet(BitSize, 4199 BitSize - N1C->getZExtValue()), VT); 4200 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), 4201 HiBitsMask); 4202 } 4203 4204 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 4205 // Variant of version done on multiply, except mul by a power of 2 is turned 4206 // into a shift. 4207 APInt Val; 4208 if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 4209 (isa<ConstantSDNode>(N0.getOperand(1)) || 4210 isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { 4211 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); 4212 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); 4213 return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); 4214 } 4215 4216 if (N1C) { 4217 SDValue NewSHL = visitShiftByConstant(N, N1C); 4218 if (NewSHL.getNode()) 4219 return NewSHL; 4220 } 4221 4222 return SDValue(); 4223} 4224 4225SDValue DAGCombiner::visitSRA(SDNode *N) { 4226 SDValue N0 = N->getOperand(0); 4227 SDValue N1 = N->getOperand(1); 4228 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4229 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4230 EVT VT = N0.getValueType(); 4231 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4232 4233 // fold vector ops 4234 if (VT.isVector()) { 4235 SDValue FoldedVOp = SimplifyVBinOp(N); 4236 if (FoldedVOp.getNode()) return FoldedVOp; 4237 4238 N1C = isConstOrConstSplat(N1); 4239 } 4240 4241 // fold (sra c1, c2) -> (sra c1, c2) 4242 if (N0C && N1C) 4243 return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); 4244 // fold (sra 0, x) -> 0 4245 if (N0C && N0C->isNullValue()) 4246 return N0; 4247 // fold (sra -1, x) -> -1 4248 if (N0C && N0C->isAllOnesValue()) 4249 return N0; 4250 // fold (sra x, (setge c, size(x))) -> undef 4251 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4252 return DAG.getUNDEF(VT); 4253 // fold (sra x, 0) -> x 4254 if (N1C && N1C->isNullValue()) 4255 return N0; 4256 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 4257 // sext_inreg. 4258 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 4259 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 4260 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); 4261 if (VT.isVector()) 4262 ExtVT = EVT::getVectorVT(*DAG.getContext(), 4263 ExtVT, VT.getVectorNumElements()); 4264 if ((!LegalOperations || 4265 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 4266 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 4267 N0.getOperand(0), DAG.getValueType(ExtVT)); 4268 } 4269 4270 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 4271 if (N1C && N0.getOpcode() == ISD::SRA) { 4272 if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) { 4273 unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); 4274 if (Sum >= OpSizeInBits) 4275 Sum = OpSizeInBits - 1; 4276 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), 4277 DAG.getConstant(Sum, N1.getValueType())); 4278 } 4279 } 4280 4281 // fold (sra (shl X, m), (sub result_size, n)) 4282 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 4283 // result_size - n != m. 4284 // If truncate is free for the target sext(shl) is likely to result in better 4285 // code. 4286 if (N0.getOpcode() == ISD::SHL && N1C) { 4287 // Get the two constanst of the shifts, CN0 = m, CN = n. 4288 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); 4289 if (N01C) { 4290 LLVMContext &Ctx = *DAG.getContext(); 4291 // Determine what the truncate's result bitsize and type would be. 4292 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); 4293 4294 if (VT.isVector()) 4295 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); 4296 4297 // Determine the residual right-shift amount. 4298 signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 4299 4300 // If the shift is not a no-op (in which case this should be just a sign 4301 // extend already), the truncated to type is legal, sign_extend is legal 4302 // on that type, and the truncate to that type is both legal and free, 4303 // perform the transform. 4304 if ((ShiftAmt > 0) && 4305 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 4306 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 4307 TLI.isTruncateFree(VT, TruncVT)) { 4308 4309 SDValue Amt = DAG.getConstant(ShiftAmt, 4310 getShiftAmountTy(N0.getOperand(0).getValueType())); 4311 SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT, 4312 N0.getOperand(0), Amt); 4313 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT, 4314 Shift); 4315 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), 4316 N->getValueType(0), Trunc); 4317 } 4318 } 4319 } 4320 4321 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 4322 if (N1.getOpcode() == ISD::TRUNCATE && 4323 N1.getOperand(0).getOpcode() == ISD::AND) { 4324 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4325 if (NewOp1.getNode()) 4326 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); 4327 } 4328 4329 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) 4330 // if c1 is equal to the number of bits the trunc removes 4331 if (N0.getOpcode() == ISD::TRUNCATE && 4332 (N0.getOperand(0).getOpcode() == ISD::SRL || 4333 N0.getOperand(0).getOpcode() == ISD::SRA) && 4334 N0.getOperand(0).hasOneUse() && 4335 N0.getOperand(0).getOperand(1).hasOneUse() && 4336 N1C) { 4337 SDValue N0Op0 = N0.getOperand(0); 4338 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { 4339 unsigned LargeShiftVal = LargeShift->getZExtValue(); 4340 EVT LargeVT = N0Op0.getValueType(); 4341 4342 if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { 4343 SDValue Amt = 4344 DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), 4345 getShiftAmountTy(N0Op0.getOperand(0).getValueType())); 4346 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, 4347 N0Op0.getOperand(0), Amt); 4348 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); 4349 } 4350 } 4351 } 4352 4353 // Simplify, based on bits shifted out of the LHS. 4354 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4355 return SDValue(N, 0); 4356 4357 4358 // If the sign bit is known to be zero, switch this to a SRL. 4359 if (DAG.SignBitIsZero(N0)) 4360 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); 4361 4362 if (N1C) { 4363 SDValue NewSRA = visitShiftByConstant(N, N1C); 4364 if (NewSRA.getNode()) 4365 return NewSRA; 4366 } 4367 4368 return SDValue(); 4369} 4370 4371SDValue DAGCombiner::visitSRL(SDNode *N) { 4372 SDValue N0 = N->getOperand(0); 4373 SDValue N1 = N->getOperand(1); 4374 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4375 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4376 EVT VT = N0.getValueType(); 4377 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4378 4379 // fold vector ops 4380 if (VT.isVector()) { 4381 SDValue FoldedVOp = SimplifyVBinOp(N); 4382 if (FoldedVOp.getNode()) return FoldedVOp; 4383 4384 N1C = isConstOrConstSplat(N1); 4385 } 4386 4387 // fold (srl c1, c2) -> c1 >>u c2 4388 if (N0C && N1C) 4389 return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); 4390 // fold (srl 0, x) -> 0 4391 if (N0C && N0C->isNullValue()) 4392 return N0; 4393 // fold (srl x, c >= size(x)) -> undef 4394 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4395 return DAG.getUNDEF(VT); 4396 // fold (srl x, 0) -> x 4397 if (N1C && N1C->isNullValue()) 4398 return N0; 4399 // if (srl x, c) is known to be zero, return 0 4400 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 4401 APInt::getAllOnesValue(OpSizeInBits))) 4402 return DAG.getConstant(0, VT); 4403 4404 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 4405 if (N1C && N0.getOpcode() == ISD::SRL) { 4406 if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { 4407 uint64_t c1 = N01C->getZExtValue(); 4408 uint64_t c2 = N1C->getZExtValue(); 4409 if (c1 + c2 >= OpSizeInBits) 4410 return DAG.getConstant(0, VT); 4411 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), 4412 DAG.getConstant(c1 + c2, N1.getValueType())); 4413 } 4414 } 4415 4416 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) 4417 if (N1C && N0.getOpcode() == ISD::TRUNCATE && 4418 N0.getOperand(0).getOpcode() == ISD::SRL && 4419 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 4420 uint64_t c1 = 4421 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 4422 uint64_t c2 = N1C->getZExtValue(); 4423 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 4424 EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); 4425 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 4426 // This is only valid if the OpSizeInBits + c1 = size of inner shift. 4427 if (c1 + OpSizeInBits == InnerShiftSize) { 4428 if (c1 + c2 >= InnerShiftSize) 4429 return DAG.getConstant(0, VT); 4430 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, 4431 DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT, 4432 N0.getOperand(0)->getOperand(0), 4433 DAG.getConstant(c1 + c2, ShiftCountVT))); 4434 } 4435 } 4436 4437 // fold (srl (shl x, c), c) -> (and x, cst2) 4438 if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) { 4439 unsigned BitSize = N0.getScalarValueSizeInBits(); 4440 if (BitSize <= 64) { 4441 uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; 4442 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), 4443 DAG.getConstant(~0ULL >> ShAmt, VT)); 4444 } 4445 } 4446 4447 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) 4448 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 4449 // Shifting in all undef bits? 4450 EVT SmallVT = N0.getOperand(0).getValueType(); 4451 unsigned BitSize = SmallVT.getScalarSizeInBits(); 4452 if (N1C->getZExtValue() >= BitSize) 4453 return DAG.getUNDEF(VT); 4454 4455 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 4456 uint64_t ShiftAmt = N1C->getZExtValue(); 4457 SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, 4458 N0.getOperand(0), 4459 DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); 4460 AddToWorklist(SmallShift.getNode()); 4461 APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); 4462 return DAG.getNode(ISD::AND, SDLoc(N), VT, 4463 DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), 4464 DAG.getConstant(Mask, VT)); 4465 } 4466 } 4467 4468 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 4469 // bit, which is unmodified by sra. 4470 if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { 4471 if (N0.getOpcode() == ISD::SRA) 4472 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); 4473 } 4474 4475 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 4476 if (N1C && N0.getOpcode() == ISD::CTLZ && 4477 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { 4478 APInt KnownZero, KnownOne; 4479 DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne); 4480 4481 // If any of the input bits are KnownOne, then the input couldn't be all 4482 // zeros, thus the result of the srl will always be zero. 4483 if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); 4484 4485 // If all of the bits input the to ctlz node are known to be zero, then 4486 // the result of the ctlz is "32" and the result of the shift is one. 4487 APInt UnknownBits = ~KnownZero; 4488 if (UnknownBits == 0) return DAG.getConstant(1, VT); 4489 4490 // Otherwise, check to see if there is exactly one bit input to the ctlz. 4491 if ((UnknownBits & (UnknownBits - 1)) == 0) { 4492 // Okay, we know that only that the single bit specified by UnknownBits 4493 // could be set on input to the CTLZ node. If this bit is set, the SRL 4494 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 4495 // to an SRL/XOR pair, which is likely to simplify more. 4496 unsigned ShAmt = UnknownBits.countTrailingZeros(); 4497 SDValue Op = N0.getOperand(0); 4498 4499 if (ShAmt) { 4500 Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, 4501 DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); 4502 AddToWorklist(Op.getNode()); 4503 } 4504 4505 return DAG.getNode(ISD::XOR, SDLoc(N), VT, 4506 Op, DAG.getConstant(1, VT)); 4507 } 4508 } 4509 4510 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 4511 if (N1.getOpcode() == ISD::TRUNCATE && 4512 N1.getOperand(0).getOpcode() == ISD::AND) { 4513 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4514 if (NewOp1.getNode()) 4515 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); 4516 } 4517 4518 // fold operands of srl based on knowledge that the low bits are not 4519 // demanded. 4520 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4521 return SDValue(N, 0); 4522 4523 if (N1C) { 4524 SDValue NewSRL = visitShiftByConstant(N, N1C); 4525 if (NewSRL.getNode()) 4526 return NewSRL; 4527 } 4528 4529 // Attempt to convert a srl of a load into a narrower zero-extending load. 4530 SDValue NarrowLoad = ReduceLoadWidth(N); 4531 if (NarrowLoad.getNode()) 4532 return NarrowLoad; 4533 4534 // Here is a common situation. We want to optimize: 4535 // 4536 // %a = ... 4537 // %b = and i32 %a, 2 4538 // %c = srl i32 %b, 1 4539 // brcond i32 %c ... 4540 // 4541 // into 4542 // 4543 // %a = ... 4544 // %b = and %a, 2 4545 // %c = setcc eq %b, 0 4546 // brcond %c ... 4547 // 4548 // However when after the source operand of SRL is optimized into AND, the SRL 4549 // itself may not be optimized further. Look for it and add the BRCOND into 4550 // the worklist. 4551 if (N->hasOneUse()) { 4552 SDNode *Use = *N->use_begin(); 4553 if (Use->getOpcode() == ISD::BRCOND) 4554 AddToWorklist(Use); 4555 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 4556 // Also look pass the truncate. 4557 Use = *Use->use_begin(); 4558 if (Use->getOpcode() == ISD::BRCOND) 4559 AddToWorklist(Use); 4560 } 4561 } 4562 4563 return SDValue(); 4564} 4565 4566SDValue DAGCombiner::visitCTLZ(SDNode *N) { 4567 SDValue N0 = N->getOperand(0); 4568 EVT VT = N->getValueType(0); 4569 4570 // fold (ctlz c1) -> c2 4571 if (isa<ConstantSDNode>(N0)) 4572 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); 4573 return SDValue(); 4574} 4575 4576SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { 4577 SDValue N0 = N->getOperand(0); 4578 EVT VT = N->getValueType(0); 4579 4580 // fold (ctlz_zero_undef c1) -> c2 4581 if (isa<ConstantSDNode>(N0)) 4582 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); 4583 return SDValue(); 4584} 4585 4586SDValue DAGCombiner::visitCTTZ(SDNode *N) { 4587 SDValue N0 = N->getOperand(0); 4588 EVT VT = N->getValueType(0); 4589 4590 // fold (cttz c1) -> c2 4591 if (isa<ConstantSDNode>(N0)) 4592 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); 4593 return SDValue(); 4594} 4595 4596SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { 4597 SDValue N0 = N->getOperand(0); 4598 EVT VT = N->getValueType(0); 4599 4600 // fold (cttz_zero_undef c1) -> c2 4601 if (isa<ConstantSDNode>(N0)) 4602 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); 4603 return SDValue(); 4604} 4605 4606SDValue DAGCombiner::visitCTPOP(SDNode *N) { 4607 SDValue N0 = N->getOperand(0); 4608 EVT VT = N->getValueType(0); 4609 4610 // fold (ctpop c1) -> c2 4611 if (isa<ConstantSDNode>(N0)) 4612 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); 4613 return SDValue(); 4614} 4615 4616 4617/// \brief Generate Min/Max node 4618static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, 4619 SDValue True, SDValue False, 4620 ISD::CondCode CC, const TargetLowering &TLI, 4621 SelectionDAG &DAG) { 4622 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) 4623 return SDValue(); 4624 4625 switch (CC) { 4626 case ISD::SETOLT: 4627 case ISD::SETOLE: 4628 case ISD::SETLT: 4629 case ISD::SETLE: 4630 case ISD::SETULT: 4631 case ISD::SETULE: { 4632 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; 4633 if (TLI.isOperationLegal(Opcode, VT)) 4634 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 4635 return SDValue(); 4636 } 4637 case ISD::SETOGT: 4638 case ISD::SETOGE: 4639 case ISD::SETGT: 4640 case ISD::SETGE: 4641 case ISD::SETUGT: 4642 case ISD::SETUGE: { 4643 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; 4644 if (TLI.isOperationLegal(Opcode, VT)) 4645 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 4646 return SDValue(); 4647 } 4648 default: 4649 return SDValue(); 4650 } 4651} 4652 4653SDValue DAGCombiner::visitSELECT(SDNode *N) { 4654 SDValue N0 = N->getOperand(0); 4655 SDValue N1 = N->getOperand(1); 4656 SDValue N2 = N->getOperand(2); 4657 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4658 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4659 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 4660 EVT VT = N->getValueType(0); 4661 EVT VT0 = N0.getValueType(); 4662 4663 // fold (select C, X, X) -> X 4664 if (N1 == N2) 4665 return N1; 4666 // fold (select true, X, Y) -> X 4667 if (N0C && !N0C->isNullValue()) 4668 return N1; 4669 // fold (select false, X, Y) -> Y 4670 if (N0C && N0C->isNullValue()) 4671 return N2; 4672 // fold (select C, 1, X) -> (or C, X) 4673 if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) 4674 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 4675 // fold (select C, 0, 1) -> (xor C, 1) 4676 // We can't do this reliably if integer based booleans have different contents 4677 // to floating point based booleans. This is because we can't tell whether we 4678 // have an integer-based boolean or a floating-point-based boolean unless we 4679 // can find the SETCC that produced it and inspect its operands. This is 4680 // fairly easy if C is the SETCC node, but it can potentially be 4681 // undiscoverable (or not reasonably discoverable). For example, it could be 4682 // in another basic block or it could require searching a complicated 4683 // expression. 4684 if (VT.isInteger() && 4685 (VT0 == MVT::i1 || (VT0.isInteger() && 4686 TLI.getBooleanContents(false, false) == 4687 TLI.getBooleanContents(false, true) && 4688 TLI.getBooleanContents(false, false) == 4689 TargetLowering::ZeroOrOneBooleanContent)) && 4690 N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { 4691 SDValue XORNode; 4692 if (VT == VT0) 4693 return DAG.getNode(ISD::XOR, SDLoc(N), VT0, 4694 N0, DAG.getConstant(1, VT0)); 4695 XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, 4696 N0, DAG.getConstant(1, VT0)); 4697 AddToWorklist(XORNode.getNode()); 4698 if (VT.bitsGT(VT0)) 4699 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); 4700 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); 4701 } 4702 // fold (select C, 0, X) -> (and (not C), X) 4703 if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { 4704 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 4705 AddToWorklist(NOTNode.getNode()); 4706 return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); 4707 } 4708 // fold (select C, X, 1) -> (or (not C), X) 4709 if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { 4710 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 4711 AddToWorklist(NOTNode.getNode()); 4712 return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); 4713 } 4714 // fold (select C, X, 0) -> (and C, X) 4715 if (VT == MVT::i1 && N2C && N2C->isNullValue()) 4716 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 4717 // fold (select X, X, Y) -> (or X, Y) 4718 // fold (select X, 1, Y) -> (or X, Y) 4719 if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) 4720 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 4721 // fold (select X, Y, X) -> (and X, Y) 4722 // fold (select X, Y, 0) -> (and X, Y) 4723 if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) 4724 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 4725 4726 // If we can fold this based on the true/false value, do so. 4727 if (SimplifySelectOps(N, N1, N2)) 4728 return SDValue(N, 0); // Don't revisit N. 4729 4730 // fold selects based on a setcc into other things, such as min/max/abs 4731 if (N0.getOpcode() == ISD::SETCC) { 4732 // select x, y (fcmp lt x, y) -> fminnum x, y 4733 // select x, y (fcmp gt x, y) -> fmaxnum x, y 4734 // 4735 // This is OK if we don't care about what happens if either operand is a 4736 // NaN. 4737 // 4738 4739 // FIXME: Instead of testing for UnsafeFPMath, this should be checking for 4740 // no signed zeros as well as no nans. 4741 const TargetOptions &Options = DAG.getTarget().Options; 4742 if (Options.UnsafeFPMath && 4743 VT.isFloatingPoint() && N0.hasOneUse() && 4744 DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { 4745 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 4746 4747 SDValue FMinMax = 4748 combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), 4749 N1, N2, CC, TLI, DAG); 4750 if (FMinMax) 4751 return FMinMax; 4752 } 4753 4754 if ((!LegalOperations && 4755 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || 4756 TLI.isOperationLegal(ISD::SELECT_CC, VT)) 4757 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, 4758 N0.getOperand(0), N0.getOperand(1), 4759 N1, N2, N0.getOperand(2)); 4760 return SimplifySelect(SDLoc(N), N0, N1, N2); 4761 } 4762 4763 return SDValue(); 4764} 4765 4766static 4767std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { 4768 SDLoc DL(N); 4769 EVT LoVT, HiVT; 4770 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 4771 4772 // Split the inputs. 4773 SDValue Lo, Hi, LL, LH, RL, RH; 4774 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); 4775 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); 4776 4777 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); 4778 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); 4779 4780 return std::make_pair(Lo, Hi); 4781} 4782 4783// This function assumes all the vselect's arguments are CONCAT_VECTOR 4784// nodes and that the condition is a BV of ConstantSDNodes (or undefs). 4785static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { 4786 SDLoc dl(N); 4787 SDValue Cond = N->getOperand(0); 4788 SDValue LHS = N->getOperand(1); 4789 SDValue RHS = N->getOperand(2); 4790 EVT VT = N->getValueType(0); 4791 int NumElems = VT.getVectorNumElements(); 4792 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && 4793 RHS.getOpcode() == ISD::CONCAT_VECTORS && 4794 Cond.getOpcode() == ISD::BUILD_VECTOR); 4795 4796 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about 4797 // binary ones here. 4798 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2) 4799 return SDValue(); 4800 4801 // We're sure we have an even number of elements due to the 4802 // concat_vectors we have as arguments to vselect. 4803 // Skip BV elements until we find one that's not an UNDEF 4804 // After we find an UNDEF element, keep looping until we get to half the 4805 // length of the BV and see if all the non-undef nodes are the same. 4806 ConstantSDNode *BottomHalf = nullptr; 4807 for (int i = 0; i < NumElems / 2; ++i) { 4808 if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) 4809 continue; 4810 4811 if (BottomHalf == nullptr) 4812 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 4813 else if (Cond->getOperand(i).getNode() != BottomHalf) 4814 return SDValue(); 4815 } 4816 4817 // Do the same for the second half of the BuildVector 4818 ConstantSDNode *TopHalf = nullptr; 4819 for (int i = NumElems / 2; i < NumElems; ++i) { 4820 if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) 4821 continue; 4822 4823 if (TopHalf == nullptr) 4824 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 4825 else if (Cond->getOperand(i).getNode() != TopHalf) 4826 return SDValue(); 4827 } 4828 4829 assert(TopHalf && BottomHalf && 4830 "One half of the selector was all UNDEFs and the other was all the " 4831 "same value. This should have been addressed before this function."); 4832 return DAG.getNode( 4833 ISD::CONCAT_VECTORS, dl, VT, 4834 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), 4835 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); 4836} 4837 4838SDValue DAGCombiner::visitMSTORE(SDNode *N) { 4839 4840 if (Level >= AfterLegalizeTypes) 4841 return SDValue(); 4842 4843 MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); 4844 SDValue Mask = MST->getMask(); 4845 SDValue Data = MST->getValue(); 4846 SDLoc DL(N); 4847 4848 // If the MSTORE data type requires splitting and the mask is provided by a 4849 // SETCC, then split both nodes and its operands before legalization. This 4850 // prevents the type legalizer from unrolling SETCC into scalar comparisons 4851 // and enables future optimizations (e.g. min/max pattern matching on X86). 4852 if (Mask.getOpcode() == ISD::SETCC) { 4853 4854 // Check if any splitting is required. 4855 if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != 4856 TargetLowering::TypeSplitVector) 4857 return SDValue(); 4858 4859 SDValue MaskLo, MaskHi, Lo, Hi; 4860 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 4861 4862 EVT LoVT, HiVT; 4863 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0)); 4864 4865 SDValue Chain = MST->getChain(); 4866 SDValue Ptr = MST->getBasePtr(); 4867 4868 EVT MemoryVT = MST->getMemoryVT(); 4869 unsigned Alignment = MST->getOriginalAlignment(); 4870 4871 // if Alignment is equal to the vector size, 4872 // take the half of it for the second part 4873 unsigned SecondHalfAlignment = 4874 (Alignment == Data->getValueType(0).getSizeInBits()/8) ? 4875 Alignment/2 : Alignment; 4876 4877 EVT LoMemVT, HiMemVT; 4878 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 4879 4880 SDValue DataLo, DataHi; 4881 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 4882 4883 MachineMemOperand *MMO = DAG.getMachineFunction(). 4884 getMachineMemOperand(MST->getPointerInfo(), 4885 MachineMemOperand::MOStore, LoMemVT.getStoreSize(), 4886 Alignment, MST->getAAInfo(), MST->getRanges()); 4887 4888 Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, 4889 MST->isTruncatingStore()); 4890 4891 unsigned IncrementSize = LoMemVT.getSizeInBits()/8; 4892 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 4893 DAG.getConstant(IncrementSize, Ptr.getValueType())); 4894 4895 MMO = DAG.getMachineFunction(). 4896 getMachineMemOperand(MST->getPointerInfo(), 4897 MachineMemOperand::MOStore, HiMemVT.getStoreSize(), 4898 SecondHalfAlignment, MST->getAAInfo(), 4899 MST->getRanges()); 4900 4901 Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, 4902 MST->isTruncatingStore()); 4903 4904 AddToWorklist(Lo.getNode()); 4905 AddToWorklist(Hi.getNode()); 4906 4907 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 4908 } 4909 return SDValue(); 4910} 4911 4912SDValue DAGCombiner::visitMLOAD(SDNode *N) { 4913 4914 if (Level >= AfterLegalizeTypes) 4915 return SDValue(); 4916 4917 MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N); 4918 SDValue Mask = MLD->getMask(); 4919 SDLoc DL(N); 4920 4921 // If the MLOAD result requires splitting and the mask is provided by a 4922 // SETCC, then split both nodes and its operands before legalization. This 4923 // prevents the type legalizer from unrolling SETCC into scalar comparisons 4924 // and enables future optimizations (e.g. min/max pattern matching on X86). 4925 4926 if (Mask.getOpcode() == ISD::SETCC) { 4927 EVT VT = N->getValueType(0); 4928 4929 // Check if any splitting is required. 4930 if (TLI.getTypeAction(*DAG.getContext(), VT) != 4931 TargetLowering::TypeSplitVector) 4932 return SDValue(); 4933 4934 SDValue MaskLo, MaskHi, Lo, Hi; 4935 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 4936 4937 SDValue Src0 = MLD->getSrc0(); 4938 SDValue Src0Lo, Src0Hi; 4939 std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); 4940 4941 EVT LoVT, HiVT; 4942 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); 4943 4944 SDValue Chain = MLD->getChain(); 4945 SDValue Ptr = MLD->getBasePtr(); 4946 EVT MemoryVT = MLD->getMemoryVT(); 4947 unsigned Alignment = MLD->getOriginalAlignment(); 4948 4949 // if Alignment is equal to the vector size, 4950 // take the half of it for the second part 4951 unsigned SecondHalfAlignment = 4952 (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? 4953 Alignment/2 : Alignment; 4954 4955 EVT LoMemVT, HiMemVT; 4956 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 4957 4958 MachineMemOperand *MMO = DAG.getMachineFunction(). 4959 getMachineMemOperand(MLD->getPointerInfo(), 4960 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), 4961 Alignment, MLD->getAAInfo(), MLD->getRanges()); 4962 4963 Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, 4964 ISD::NON_EXTLOAD); 4965 4966 unsigned IncrementSize = LoMemVT.getSizeInBits()/8; 4967 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 4968 DAG.getConstant(IncrementSize, Ptr.getValueType())); 4969 4970 MMO = DAG.getMachineFunction(). 4971 getMachineMemOperand(MLD->getPointerInfo(), 4972 MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), 4973 SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); 4974 4975 Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, 4976 ISD::NON_EXTLOAD); 4977 4978 AddToWorklist(Lo.getNode()); 4979 AddToWorklist(Hi.getNode()); 4980 4981 // Build a factor node to remember that this load is independent of the 4982 // other one. 4983 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), 4984 Hi.getValue(1)); 4985 4986 // Legalized the chain result - switch anything that used the old chain to 4987 // use the new one. 4988 DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); 4989 4990 SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 4991 4992 SDValue RetOps[] = { LoadRes, Chain }; 4993 return DAG.getMergeValues(RetOps, DL); 4994 } 4995 return SDValue(); 4996} 4997 4998SDValue DAGCombiner::visitVSELECT(SDNode *N) { 4999 SDValue N0 = N->getOperand(0); 5000 SDValue N1 = N->getOperand(1); 5001 SDValue N2 = N->getOperand(2); 5002 SDLoc DL(N); 5003 5004 // Canonicalize integer abs. 5005 // vselect (setg[te] X, 0), X, -X -> 5006 // vselect (setgt X, -1), X, -X -> 5007 // vselect (setl[te] X, 0), -X, X -> 5008 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 5009 if (N0.getOpcode() == ISD::SETCC) { 5010 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 5011 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 5012 bool isAbs = false; 5013 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); 5014 5015 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || 5016 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && 5017 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) 5018 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); 5019 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && 5020 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) 5021 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); 5022 5023 if (isAbs) { 5024 EVT VT = LHS.getValueType(); 5025 SDValue Shift = DAG.getNode( 5026 ISD::SRA, DL, VT, LHS, 5027 DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); 5028 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); 5029 AddToWorklist(Shift.getNode()); 5030 AddToWorklist(Add.getNode()); 5031 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); 5032 } 5033 } 5034 5035 // If the VSELECT result requires splitting and the mask is provided by a 5036 // SETCC, then split both nodes and its operands before legalization. This 5037 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5038 // and enables future optimizations (e.g. min/max pattern matching on X86). 5039 if (N0.getOpcode() == ISD::SETCC) { 5040 EVT VT = N->getValueType(0); 5041 5042 // Check if any splitting is required. 5043 if (TLI.getTypeAction(*DAG.getContext(), VT) != 5044 TargetLowering::TypeSplitVector) 5045 return SDValue(); 5046 5047 SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; 5048 std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); 5049 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); 5050 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); 5051 5052 Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); 5053 Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); 5054 5055 // Add the new VSELECT nodes to the work list in case they need to be split 5056 // again. 5057 AddToWorklist(Lo.getNode()); 5058 AddToWorklist(Hi.getNode()); 5059 5060 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 5061 } 5062 5063 // Fold (vselect (build_vector all_ones), N1, N2) -> N1 5064 if (ISD::isBuildVectorAllOnes(N0.getNode())) 5065 return N1; 5066 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 5067 if (ISD::isBuildVectorAllZeros(N0.getNode())) 5068 return N2; 5069 5070 // The ConvertSelectToConcatVector function is assuming both the above 5071 // checks for (vselect (build_vector all{ones,zeros) ...) have been made 5072 // and addressed. 5073 if (N1.getOpcode() == ISD::CONCAT_VECTORS && 5074 N2.getOpcode() == ISD::CONCAT_VECTORS && 5075 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { 5076 SDValue CV = ConvertSelectToConcatVector(N, DAG); 5077 if (CV.getNode()) 5078 return CV; 5079 } 5080 5081 return SDValue(); 5082} 5083 5084SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { 5085 SDValue N0 = N->getOperand(0); 5086 SDValue N1 = N->getOperand(1); 5087 SDValue N2 = N->getOperand(2); 5088 SDValue N3 = N->getOperand(3); 5089 SDValue N4 = N->getOperand(4); 5090 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); 5091 5092 // fold select_cc lhs, rhs, x, x, cc -> x 5093 if (N2 == N3) 5094 return N2; 5095 5096 // Determine if the condition we're dealing with is constant 5097 SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), 5098 N0, N1, CC, SDLoc(N), false); 5099 if (SCC.getNode()) { 5100 AddToWorklist(SCC.getNode()); 5101 5102 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { 5103 if (!SCCC->isNullValue()) 5104 return N2; // cond always true -> true val 5105 else 5106 return N3; // cond always false -> false val 5107 } else if (SCC->getOpcode() == ISD::UNDEF) { 5108 // When the condition is UNDEF, just return the first operand. This is 5109 // coherent the DAG creation, no setcc node is created in this case 5110 return N2; 5111 } else if (SCC.getOpcode() == ISD::SETCC) { 5112 // Fold to a simpler select_cc 5113 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), 5114 SCC.getOperand(0), SCC.getOperand(1), N2, N3, 5115 SCC.getOperand(2)); 5116 } 5117 } 5118 5119 // If we can fold this based on the true/false value, do so. 5120 if (SimplifySelectOps(N, N2, N3)) 5121 return SDValue(N, 0); // Don't revisit N. 5122 5123 // fold select_cc into other things, such as min/max/abs 5124 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); 5125} 5126 5127SDValue DAGCombiner::visitSETCC(SDNode *N) { 5128 return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), 5129 cast<CondCodeSDNode>(N->getOperand(2))->get(), 5130 SDLoc(N)); 5131} 5132 5133// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext 5134// dag node into a ConstantSDNode or a build_vector of constants. 5135// This function is called by the DAGCombiner when visiting sext/zext/aext 5136// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). 5137// Vector extends are not folded if operations are legal; this is to 5138// avoid introducing illegal build_vector dag nodes. 5139static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, 5140 SelectionDAG &DAG, bool LegalTypes, 5141 bool LegalOperations) { 5142 unsigned Opcode = N->getOpcode(); 5143 SDValue N0 = N->getOperand(0); 5144 EVT VT = N->getValueType(0); 5145 5146 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || 5147 Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"); 5148 5149 // fold (sext c1) -> c1 5150 // fold (zext c1) -> c1 5151 // fold (aext c1) -> c1 5152 if (isa<ConstantSDNode>(N0)) 5153 return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode(); 5154 5155 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) 5156 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) 5157 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) 5158 EVT SVT = VT.getScalarType(); 5159 if (!(VT.isVector() && 5160 (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) && 5161 ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) 5162 return nullptr; 5163 5164 // We can fold this node into a build_vector. 5165 unsigned VTBits = SVT.getSizeInBits(); 5166 unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); 5167 unsigned ShAmt = VTBits - EVTBits; 5168 SmallVector<SDValue, 8> Elts; 5169 unsigned NumElts = N0->getNumOperands(); 5170 SDLoc DL(N); 5171 5172 for (unsigned i=0; i != NumElts; ++i) { 5173 SDValue Op = N0->getOperand(i); 5174 if (Op->getOpcode() == ISD::UNDEF) { 5175 Elts.push_back(DAG.getUNDEF(SVT)); 5176 continue; 5177 } 5178 5179 ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); 5180 const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); 5181 if (Opcode == ISD::SIGN_EXTEND) 5182 Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), 5183 SVT)); 5184 else 5185 Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(), 5186 SVT)); 5187 } 5188 5189 return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); 5190} 5191 5192// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: 5193// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" 5194// transformation. Returns true if extension are possible and the above 5195// mentioned transformation is profitable. 5196static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, 5197 unsigned ExtOpc, 5198 SmallVectorImpl<SDNode *> &ExtendNodes, 5199 const TargetLowering &TLI) { 5200 bool HasCopyToRegUses = false; 5201 bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); 5202 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 5203 UE = N0.getNode()->use_end(); 5204 UI != UE; ++UI) { 5205 SDNode *User = *UI; 5206 if (User == N) 5207 continue; 5208 if (UI.getUse().getResNo() != N0.getResNo()) 5209 continue; 5210 // FIXME: Only extend SETCC N, N and SETCC N, c for now. 5211 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { 5212 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); 5213 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) 5214 // Sign bits will be lost after a zext. 5215 return false; 5216 bool Add = false; 5217 for (unsigned i = 0; i != 2; ++i) { 5218 SDValue UseOp = User->getOperand(i); 5219 if (UseOp == N0) 5220 continue; 5221 if (!isa<ConstantSDNode>(UseOp)) 5222 return false; 5223 Add = true; 5224 } 5225 if (Add) 5226 ExtendNodes.push_back(User); 5227 continue; 5228 } 5229 // If truncates aren't free and there are users we can't 5230 // extend, it isn't worthwhile. 5231 if (!isTruncFree) 5232 return false; 5233 // Remember if this value is live-out. 5234 if (User->getOpcode() == ISD::CopyToReg) 5235 HasCopyToRegUses = true; 5236 } 5237 5238 if (HasCopyToRegUses) { 5239 bool BothLiveOut = false; 5240 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 5241 UI != UE; ++UI) { 5242 SDUse &Use = UI.getUse(); 5243 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { 5244 BothLiveOut = true; 5245 break; 5246 } 5247 } 5248 if (BothLiveOut) 5249 // Both unextended and extended values are live out. There had better be 5250 // a good reason for the transformation. 5251 return ExtendNodes.size(); 5252 } 5253 return true; 5254} 5255 5256void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 5257 SDValue Trunc, SDValue ExtLoad, SDLoc DL, 5258 ISD::NodeType ExtType) { 5259 // Extend SetCC uses if necessary. 5260 for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { 5261 SDNode *SetCC = SetCCs[i]; 5262 SmallVector<SDValue, 4> Ops; 5263 5264 for (unsigned j = 0; j != 2; ++j) { 5265 SDValue SOp = SetCC->getOperand(j); 5266 if (SOp == Trunc) 5267 Ops.push_back(ExtLoad); 5268 else 5269 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); 5270 } 5271 5272 Ops.push_back(SetCC->getOperand(2)); 5273 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); 5274 } 5275} 5276 5277SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { 5278 SDValue N0 = N->getOperand(0); 5279 EVT VT = N->getValueType(0); 5280 5281 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 5282 LegalOperations)) 5283 return SDValue(Res, 0); 5284 5285 // fold (sext (sext x)) -> (sext x) 5286 // fold (sext (aext x)) -> (sext x) 5287 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 5288 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, 5289 N0.getOperand(0)); 5290 5291 if (N0.getOpcode() == ISD::TRUNCATE) { 5292 // fold (sext (truncate (load x))) -> (sext (smaller load x)) 5293 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) 5294 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5295 if (NarrowLoad.getNode()) { 5296 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5297 if (NarrowLoad.getNode() != N0.getNode()) { 5298 CombineTo(N0.getNode(), NarrowLoad); 5299 // CombineTo deleted the truncate, if needed, but not what's under it. 5300 AddToWorklist(oye); 5301 } 5302 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5303 } 5304 5305 // See if the value being truncated is already sign extended. If so, just 5306 // eliminate the trunc/sext pair. 5307 SDValue Op = N0.getOperand(0); 5308 unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); 5309 unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); 5310 unsigned DestBits = VT.getScalarType().getSizeInBits(); 5311 unsigned NumSignBits = DAG.ComputeNumSignBits(Op); 5312 5313 if (OpBits == DestBits) { 5314 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 5315 // bits, it is already ready. 5316 if (NumSignBits > DestBits-MidBits) 5317 return Op; 5318 } else if (OpBits < DestBits) { 5319 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 5320 // bits, just sext from i32. 5321 if (NumSignBits > OpBits-MidBits) 5322 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op); 5323 } else { 5324 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 5325 // bits, just truncate to i32. 5326 if (NumSignBits > OpBits-MidBits) 5327 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 5328 } 5329 5330 // fold (sext (truncate x)) -> (sextinreg x). 5331 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, 5332 N0.getValueType())) { 5333 if (OpBits < DestBits) 5334 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); 5335 else if (OpBits > DestBits) 5336 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); 5337 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op, 5338 DAG.getValueType(N0.getValueType())); 5339 } 5340 } 5341 5342 // fold (sext (load x)) -> (sext (truncate (sextload x))) 5343 // None of the supported targets knows how to perform load and sign extend 5344 // on vectors in one instruction. We only perform this transformation on 5345 // scalars. 5346 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 5347 ISD::isUNINDEXEDLoad(N0.getNode()) && 5348 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 5349 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { 5350 bool DoXform = true; 5351 SmallVector<SDNode*, 4> SetCCs; 5352 if (!N0.hasOneUse()) 5353 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); 5354 if (DoXform) { 5355 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5356 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 5357 LN0->getChain(), 5358 LN0->getBasePtr(), N0.getValueType(), 5359 LN0->getMemOperand()); 5360 CombineTo(N, ExtLoad); 5361 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5362 N0.getValueType(), ExtLoad); 5363 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 5364 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5365 ISD::SIGN_EXTEND); 5366 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5367 } 5368 } 5369 5370 // fold (sext (sextload x)) -> (sext (truncate (sextload x))) 5371 // fold (sext ( extload x)) -> (sext (truncate (sextload x))) 5372 if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 5373 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 5374 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5375 EVT MemVT = LN0->getMemoryVT(); 5376 if ((!LegalOperations && !LN0->isVolatile()) || 5377 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) { 5378 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 5379 LN0->getChain(), 5380 LN0->getBasePtr(), MemVT, 5381 LN0->getMemOperand()); 5382 CombineTo(N, ExtLoad); 5383 CombineTo(N0.getNode(), 5384 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5385 N0.getValueType(), ExtLoad), 5386 ExtLoad.getValue(1)); 5387 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5388 } 5389 } 5390 5391 // fold (sext (and/or/xor (load x), cst)) -> 5392 // (and/or/xor (sextload x), (sext cst)) 5393 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 5394 N0.getOpcode() == ISD::XOR) && 5395 isa<LoadSDNode>(N0.getOperand(0)) && 5396 N0.getOperand(1).getOpcode() == ISD::Constant && 5397 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) && 5398 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 5399 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 5400 if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { 5401 bool DoXform = true; 5402 SmallVector<SDNode*, 4> SetCCs; 5403 if (!N0.hasOneUse()) 5404 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, 5405 SetCCs, TLI); 5406 if (DoXform) { 5407 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, 5408 LN0->getChain(), LN0->getBasePtr(), 5409 LN0->getMemoryVT(), 5410 LN0->getMemOperand()); 5411 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5412 Mask = Mask.sext(VT.getSizeInBits()); 5413 SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 5414 ExtLoad, DAG.getConstant(Mask, VT)); 5415 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 5416 SDLoc(N0.getOperand(0)), 5417 N0.getOperand(0).getValueType(), ExtLoad); 5418 CombineTo(N, And); 5419 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 5420 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5421 ISD::SIGN_EXTEND); 5422 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5423 } 5424 } 5425 } 5426 5427 if (N0.getOpcode() == ISD::SETCC) { 5428 EVT N0VT = N0.getOperand(0).getValueType(); 5429 // sext(setcc) -> sext_in_reg(vsetcc) for vectors. 5430 // Only do this before legalize for now. 5431 if (VT.isVector() && !LegalOperations && 5432 TLI.getBooleanContents(N0VT) == 5433 TargetLowering::ZeroOrNegativeOneBooleanContent) { 5434 // On some architectures (such as SSE/NEON/etc) the SETCC result type is 5435 // of the same size as the compared operands. Only optimize sext(setcc()) 5436 // if this is the case. 5437 EVT SVT = getSetCCResultType(N0VT); 5438 5439 // We know that the # elements of the results is the same as the 5440 // # elements of the compare (and the # elements of the compare result 5441 // for that matter). Check to see that they are the same size. If so, 5442 // we know that the element size of the sext'd result matches the 5443 // element size of the compare operands. 5444 if (VT.getSizeInBits() == SVT.getSizeInBits()) 5445 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 5446 N0.getOperand(1), 5447 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5448 5449 // If the desired elements are smaller or larger than the source 5450 // elements we can use a matching integer vector type and then 5451 // truncate/sign extend 5452 EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); 5453 if (SVT == MatchingVectorType) { 5454 SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, 5455 N0.getOperand(0), N0.getOperand(1), 5456 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5457 return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); 5458 } 5459 } 5460 5461 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) 5462 unsigned ElementWidth = VT.getScalarType().getSizeInBits(); 5463 SDValue NegOne = 5464 DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); 5465 SDValue SCC = 5466 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 5467 NegOne, DAG.getConstant(0, VT), 5468 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 5469 if (SCC.getNode()) return SCC; 5470 5471 if (!VT.isVector()) { 5472 EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); 5473 if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) { 5474 SDLoc DL(N); 5475 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 5476 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, 5477 N0.getOperand(0), N0.getOperand(1), CC); 5478 return DAG.getSelect(DL, VT, SetCC, 5479 NegOne, DAG.getConstant(0, VT)); 5480 } 5481 } 5482 } 5483 5484 // fold (sext x) -> (zext x) if the sign bit is known zero. 5485 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && 5486 DAG.SignBitIsZero(N0)) 5487 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); 5488 5489 return SDValue(); 5490} 5491 5492// isTruncateOf - If N is a truncate of some other value, return true, record 5493// the value being truncated in Op and which of Op's bits are zero in KnownZero. 5494// This function computes KnownZero to avoid a duplicated call to 5495// computeKnownBits in the caller. 5496static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, 5497 APInt &KnownZero) { 5498 APInt KnownOne; 5499 if (N->getOpcode() == ISD::TRUNCATE) { 5500 Op = N->getOperand(0); 5501 DAG.computeKnownBits(Op, KnownZero, KnownOne); 5502 return true; 5503 } 5504 5505 if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || 5506 cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) 5507 return false; 5508 5509 SDValue Op0 = N->getOperand(0); 5510 SDValue Op1 = N->getOperand(1); 5511 assert(Op0.getValueType() == Op1.getValueType()); 5512 5513 ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); 5514 ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); 5515 if (COp0 && COp0->isNullValue()) 5516 Op = Op1; 5517 else if (COp1 && COp1->isNullValue()) 5518 Op = Op0; 5519 else 5520 return false; 5521 5522 DAG.computeKnownBits(Op, KnownZero, KnownOne); 5523 5524 if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) 5525 return false; 5526 5527 return true; 5528} 5529 5530SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { 5531 SDValue N0 = N->getOperand(0); 5532 EVT VT = N->getValueType(0); 5533 5534 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 5535 LegalOperations)) 5536 return SDValue(Res, 0); 5537 5538 // fold (zext (zext x)) -> (zext x) 5539 // fold (zext (aext x)) -> (zext x) 5540 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 5541 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, 5542 N0.getOperand(0)); 5543 5544 // fold (zext (truncate x)) -> (zext x) or 5545 // (zext (truncate x)) -> (truncate x) 5546 // This is valid when the truncated bits of x are already zero. 5547 // FIXME: We should extend this to work for vectors too. 5548 SDValue Op; 5549 APInt KnownZero; 5550 if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { 5551 APInt TruncatedBits = 5552 (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? 5553 APInt(Op.getValueSizeInBits(), 0) : 5554 APInt::getBitsSet(Op.getValueSizeInBits(), 5555 N0.getValueSizeInBits(), 5556 std::min(Op.getValueSizeInBits(), 5557 VT.getSizeInBits())); 5558 if (TruncatedBits == (KnownZero & TruncatedBits)) { 5559 if (VT.bitsGT(Op.getValueType())) 5560 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); 5561 if (VT.bitsLT(Op.getValueType())) 5562 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 5563 5564 return Op; 5565 } 5566 } 5567 5568 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 5569 // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) 5570 if (N0.getOpcode() == ISD::TRUNCATE) { 5571 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5572 if (NarrowLoad.getNode()) { 5573 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5574 if (NarrowLoad.getNode() != N0.getNode()) { 5575 CombineTo(N0.getNode(), NarrowLoad); 5576 // CombineTo deleted the truncate, if needed, but not what's under it. 5577 AddToWorklist(oye); 5578 } 5579 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5580 } 5581 } 5582 5583 // fold (zext (truncate x)) -> (and x, mask) 5584 if (N0.getOpcode() == ISD::TRUNCATE && 5585 (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { 5586 5587 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 5588 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) 5589 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5590 if (NarrowLoad.getNode()) { 5591 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5592 if (NarrowLoad.getNode() != N0.getNode()) { 5593 CombineTo(N0.getNode(), NarrowLoad); 5594 // CombineTo deleted the truncate, if needed, but not what's under it. 5595 AddToWorklist(oye); 5596 } 5597 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5598 } 5599 5600 SDValue Op = N0.getOperand(0); 5601 if (Op.getValueType().bitsLT(VT)) { 5602 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); 5603 AddToWorklist(Op.getNode()); 5604 } else if (Op.getValueType().bitsGT(VT)) { 5605 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 5606 AddToWorklist(Op.getNode()); 5607 } 5608 return DAG.getZeroExtendInReg(Op, SDLoc(N), 5609 N0.getValueType().getScalarType()); 5610 } 5611 5612 // Fold (zext (and (trunc x), cst)) -> (and x, cst), 5613 // if either of the casts is not free. 5614 if (N0.getOpcode() == ISD::AND && 5615 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 5616 N0.getOperand(1).getOpcode() == ISD::Constant && 5617 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 5618 N0.getValueType()) || 5619 !TLI.isZExtFree(N0.getValueType(), VT))) { 5620 SDValue X = N0.getOperand(0).getOperand(0); 5621 if (X.getValueType().bitsLT(VT)) { 5622 X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X); 5623 } else if (X.getValueType().bitsGT(VT)) { 5624 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 5625 } 5626 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5627 Mask = Mask.zext(VT.getSizeInBits()); 5628 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5629 X, DAG.getConstant(Mask, VT)); 5630 } 5631 5632 // fold (zext (load x)) -> (zext (truncate (zextload x))) 5633 // None of the supported targets knows how to perform load and vector_zext 5634 // on vectors in one instruction. We only perform this transformation on 5635 // scalars. 5636 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 5637 ISD::isUNINDEXEDLoad(N0.getNode()) && 5638 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 5639 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { 5640 bool DoXform = true; 5641 SmallVector<SDNode*, 4> SetCCs; 5642 if (!N0.hasOneUse()) 5643 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); 5644 if (DoXform) { 5645 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5646 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, 5647 LN0->getChain(), 5648 LN0->getBasePtr(), N0.getValueType(), 5649 LN0->getMemOperand()); 5650 CombineTo(N, ExtLoad); 5651 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5652 N0.getValueType(), ExtLoad); 5653 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 5654 5655 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5656 ISD::ZERO_EXTEND); 5657 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5658 } 5659 } 5660 5661 // fold (zext (and/or/xor (load x), cst)) -> 5662 // (and/or/xor (zextload x), (zext cst)) 5663 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 5664 N0.getOpcode() == ISD::XOR) && 5665 isa<LoadSDNode>(N0.getOperand(0)) && 5666 N0.getOperand(1).getOpcode() == ISD::Constant && 5667 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) && 5668 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 5669 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 5670 if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { 5671 bool DoXform = true; 5672 SmallVector<SDNode*, 4> SetCCs; 5673 if (!N0.hasOneUse()) 5674 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, 5675 SetCCs, TLI); 5676 if (DoXform) { 5677 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, 5678 LN0->getChain(), LN0->getBasePtr(), 5679 LN0->getMemoryVT(), 5680 LN0->getMemOperand()); 5681 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5682 Mask = Mask.zext(VT.getSizeInBits()); 5683 SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 5684 ExtLoad, DAG.getConstant(Mask, VT)); 5685 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 5686 SDLoc(N0.getOperand(0)), 5687 N0.getOperand(0).getValueType(), ExtLoad); 5688 CombineTo(N, And); 5689 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 5690 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5691 ISD::ZERO_EXTEND); 5692 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5693 } 5694 } 5695 } 5696 5697 // fold (zext (zextload x)) -> (zext (truncate (zextload x))) 5698 // fold (zext ( extload x)) -> (zext (truncate (zextload x))) 5699 if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 5700 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 5701 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5702 EVT MemVT = LN0->getMemoryVT(); 5703 if ((!LegalOperations && !LN0->isVolatile()) || 5704 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) { 5705 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, 5706 LN0->getChain(), 5707 LN0->getBasePtr(), MemVT, 5708 LN0->getMemOperand()); 5709 CombineTo(N, ExtLoad); 5710 CombineTo(N0.getNode(), 5711 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), 5712 ExtLoad), 5713 ExtLoad.getValue(1)); 5714 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5715 } 5716 } 5717 5718 if (N0.getOpcode() == ISD::SETCC) { 5719 if (!LegalOperations && VT.isVector() && 5720 N0.getValueType().getVectorElementType() == MVT::i1) { 5721 EVT N0VT = N0.getOperand(0).getValueType(); 5722 if (getSetCCResultType(N0VT) == N0.getValueType()) 5723 return SDValue(); 5724 5725 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. 5726 // Only do this before legalize for now. 5727 EVT EltVT = VT.getVectorElementType(); 5728 SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), 5729 DAG.getConstant(1, EltVT)); 5730 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 5731 // We know that the # elements of the results is the same as the 5732 // # elements of the compare (and the # elements of the compare result 5733 // for that matter). Check to see that they are the same size. If so, 5734 // we know that the element size of the sext'd result matches the 5735 // element size of the compare operands. 5736 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5737 DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 5738 N0.getOperand(1), 5739 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 5740 DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, 5741 OneOps)); 5742 5743 // If the desired elements are smaller or larger than the source 5744 // elements we can use a matching integer vector type and then 5745 // truncate/sign extend 5746 EVT MatchingElementType = 5747 EVT::getIntegerVT(*DAG.getContext(), 5748 N0VT.getScalarType().getSizeInBits()); 5749 EVT MatchingVectorType = 5750 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 5751 N0VT.getVectorNumElements()); 5752 SDValue VsetCC = 5753 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 5754 N0.getOperand(1), 5755 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5756 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5757 DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), 5758 DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps)); 5759 } 5760 5761 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 5762 SDValue SCC = 5763 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 5764 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 5765 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 5766 if (SCC.getNode()) return SCC; 5767 } 5768 5769 // (zext (shl (zext x), cst)) -> (shl (zext x), cst) 5770 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && 5771 isa<ConstantSDNode>(N0.getOperand(1)) && 5772 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && 5773 N0.hasOneUse()) { 5774 SDValue ShAmt = N0.getOperand(1); 5775 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 5776 if (N0.getOpcode() == ISD::SHL) { 5777 SDValue InnerZExt = N0.getOperand(0); 5778 // If the original shl may be shifting out bits, do not perform this 5779 // transformation. 5780 unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - 5781 InnerZExt.getOperand(0).getValueType().getSizeInBits(); 5782 if (ShAmtVal > KnownZeroBits) 5783 return SDValue(); 5784 } 5785 5786 SDLoc DL(N); 5787 5788 // Ensure that the shift amount is wide enough for the shifted value. 5789 if (VT.getSizeInBits() >= 256) 5790 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); 5791 5792 return DAG.getNode(N0.getOpcode(), DL, VT, 5793 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), 5794 ShAmt); 5795 } 5796 5797 return SDValue(); 5798} 5799 5800SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { 5801 SDValue N0 = N->getOperand(0); 5802 EVT VT = N->getValueType(0); 5803 5804 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 5805 LegalOperations)) 5806 return SDValue(Res, 0); 5807 5808 // fold (aext (aext x)) -> (aext x) 5809 // fold (aext (zext x)) -> (zext x) 5810 // fold (aext (sext x)) -> (sext x) 5811 if (N0.getOpcode() == ISD::ANY_EXTEND || 5812 N0.getOpcode() == ISD::ZERO_EXTEND || 5813 N0.getOpcode() == ISD::SIGN_EXTEND) 5814 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); 5815 5816 // fold (aext (truncate (load x))) -> (aext (smaller load x)) 5817 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) 5818 if (N0.getOpcode() == ISD::TRUNCATE) { 5819 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5820 if (NarrowLoad.getNode()) { 5821 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5822 if (NarrowLoad.getNode() != N0.getNode()) { 5823 CombineTo(N0.getNode(), NarrowLoad); 5824 // CombineTo deleted the truncate, if needed, but not what's under it. 5825 AddToWorklist(oye); 5826 } 5827 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5828 } 5829 } 5830 5831 // fold (aext (truncate x)) 5832 if (N0.getOpcode() == ISD::TRUNCATE) { 5833 SDValue TruncOp = N0.getOperand(0); 5834 if (TruncOp.getValueType() == VT) 5835 return TruncOp; // x iff x size == zext size. 5836 if (TruncOp.getValueType().bitsGT(VT)) 5837 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp); 5838 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp); 5839 } 5840 5841 // Fold (aext (and (trunc x), cst)) -> (and x, cst) 5842 // if the trunc is not free. 5843 if (N0.getOpcode() == ISD::AND && 5844 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 5845 N0.getOperand(1).getOpcode() == ISD::Constant && 5846 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 5847 N0.getValueType())) { 5848 SDValue X = N0.getOperand(0).getOperand(0); 5849 if (X.getValueType().bitsLT(VT)) { 5850 X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X); 5851 } else if (X.getValueType().bitsGT(VT)) { 5852 X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); 5853 } 5854 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5855 Mask = Mask.zext(VT.getSizeInBits()); 5856 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5857 X, DAG.getConstant(Mask, VT)); 5858 } 5859 5860 // fold (aext (load x)) -> (aext (truncate (extload x))) 5861 // None of the supported targets knows how to perform load and any_ext 5862 // on vectors in one instruction. We only perform this transformation on 5863 // scalars. 5864 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 5865 ISD::isUNINDEXEDLoad(N0.getNode()) && 5866 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 5867 bool DoXform = true; 5868 SmallVector<SDNode*, 4> SetCCs; 5869 if (!N0.hasOneUse()) 5870 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); 5871 if (DoXform) { 5872 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5873 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 5874 LN0->getChain(), 5875 LN0->getBasePtr(), N0.getValueType(), 5876 LN0->getMemOperand()); 5877 CombineTo(N, ExtLoad); 5878 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5879 N0.getValueType(), ExtLoad); 5880 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 5881 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5882 ISD::ANY_EXTEND); 5883 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5884 } 5885 } 5886 5887 // fold (aext (zextload x)) -> (aext (truncate (zextload x))) 5888 // fold (aext (sextload x)) -> (aext (truncate (sextload x))) 5889 // fold (aext ( extload x)) -> (aext (truncate (extload x))) 5890 if (N0.getOpcode() == ISD::LOAD && 5891 !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 5892 N0.hasOneUse()) { 5893 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5894 ISD::LoadExtType ExtType = LN0->getExtensionType(); 5895 EVT MemVT = LN0->getMemoryVT(); 5896 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) { 5897 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), 5898 VT, LN0->getChain(), LN0->getBasePtr(), 5899 MemVT, LN0->getMemOperand()); 5900 CombineTo(N, ExtLoad); 5901 CombineTo(N0.getNode(), 5902 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5903 N0.getValueType(), ExtLoad), 5904 ExtLoad.getValue(1)); 5905 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5906 } 5907 } 5908 5909 if (N0.getOpcode() == ISD::SETCC) { 5910 // For vectors: 5911 // aext(setcc) -> vsetcc 5912 // aext(setcc) -> truncate(vsetcc) 5913 // aext(setcc) -> aext(vsetcc) 5914 // Only do this before legalize for now. 5915 if (VT.isVector() && !LegalOperations) { 5916 EVT N0VT = N0.getOperand(0).getValueType(); 5917 // We know that the # elements of the results is the same as the 5918 // # elements of the compare (and the # elements of the compare result 5919 // for that matter). Check to see that they are the same size. If so, 5920 // we know that the element size of the sext'd result matches the 5921 // element size of the compare operands. 5922 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 5923 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 5924 N0.getOperand(1), 5925 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5926 // If the desired elements are smaller or larger than the source 5927 // elements we can use a matching integer vector type and then 5928 // truncate/any extend 5929 else { 5930 EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); 5931 SDValue VsetCC = 5932 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 5933 N0.getOperand(1), 5934 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5935 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); 5936 } 5937 } 5938 5939 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 5940 SDValue SCC = 5941 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 5942 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 5943 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 5944 if (SCC.getNode()) 5945 return SCC; 5946 } 5947 5948 return SDValue(); 5949} 5950 5951/// See if the specified operand can be simplified with the knowledge that only 5952/// the bits specified by Mask are used. If so, return the simpler operand, 5953/// otherwise return a null SDValue. 5954SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { 5955 switch (V.getOpcode()) { 5956 default: break; 5957 case ISD::Constant: { 5958 const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); 5959 assert(CV && "Const value should be ConstSDNode."); 5960 const APInt &CVal = CV->getAPIntValue(); 5961 APInt NewVal = CVal & Mask; 5962 if (NewVal != CVal) 5963 return DAG.getConstant(NewVal, V.getValueType()); 5964 break; 5965 } 5966 case ISD::OR: 5967 case ISD::XOR: 5968 // If the LHS or RHS don't contribute bits to the or, drop them. 5969 if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) 5970 return V.getOperand(1); 5971 if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) 5972 return V.getOperand(0); 5973 break; 5974 case ISD::SRL: 5975 // Only look at single-use SRLs. 5976 if (!V.getNode()->hasOneUse()) 5977 break; 5978 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { 5979 // See if we can recursively simplify the LHS. 5980 unsigned Amt = RHSC->getZExtValue(); 5981 5982 // Watch out for shift count overflow though. 5983 if (Amt >= Mask.getBitWidth()) break; 5984 APInt NewMask = Mask << Amt; 5985 SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); 5986 if (SimplifyLHS.getNode()) 5987 return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), 5988 SimplifyLHS, V.getOperand(1)); 5989 } 5990 } 5991 return SDValue(); 5992} 5993 5994/// If the result of a wider load is shifted to right of N bits and then 5995/// truncated to a narrower type and where N is a multiple of number of bits of 5996/// the narrower type, transform it to a narrower load from address + N / num of 5997/// bits of new type. If the result is to be extended, also fold the extension 5998/// to form a extending load. 5999SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { 6000 unsigned Opc = N->getOpcode(); 6001 6002 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; 6003 SDValue N0 = N->getOperand(0); 6004 EVT VT = N->getValueType(0); 6005 EVT ExtVT = VT; 6006 6007 // This transformation isn't valid for vector loads. 6008 if (VT.isVector()) 6009 return SDValue(); 6010 6011 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then 6012 // extended to VT. 6013 if (Opc == ISD::SIGN_EXTEND_INREG) { 6014 ExtType = ISD::SEXTLOAD; 6015 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 6016 } else if (Opc == ISD::SRL) { 6017 // Another special-case: SRL is basically zero-extending a narrower value. 6018 ExtType = ISD::ZEXTLOAD; 6019 N0 = SDValue(N, 0); 6020 ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 6021 if (!N01) return SDValue(); 6022 ExtVT = EVT::getIntegerVT(*DAG.getContext(), 6023 VT.getSizeInBits() - N01->getZExtValue()); 6024 } 6025 if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT)) 6026 return SDValue(); 6027 6028 unsigned EVTBits = ExtVT.getSizeInBits(); 6029 6030 // Do not generate loads of non-round integer types since these can 6031 // be expensive (and would be wrong if the type is not byte sized). 6032 if (!ExtVT.isRound()) 6033 return SDValue(); 6034 6035 unsigned ShAmt = 0; 6036 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 6037 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 6038 ShAmt = N01->getZExtValue(); 6039 // Is the shift amount a multiple of size of VT? 6040 if ((ShAmt & (EVTBits-1)) == 0) { 6041 N0 = N0.getOperand(0); 6042 // Is the load width a multiple of size of VT? 6043 if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) 6044 return SDValue(); 6045 } 6046 6047 // At this point, we must have a load or else we can't do the transform. 6048 if (!isa<LoadSDNode>(N0)) return SDValue(); 6049 6050 // Because a SRL must be assumed to *need* to zero-extend the high bits 6051 // (as opposed to anyext the high bits), we can't combine the zextload 6052 // lowering of SRL and an sextload. 6053 if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD) 6054 return SDValue(); 6055 6056 // If the shift amount is larger than the input type then we're not 6057 // accessing any of the loaded bytes. If the load was a zextload/extload 6058 // then the result of the shift+trunc is zero/undef (handled elsewhere). 6059 if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) 6060 return SDValue(); 6061 } 6062 } 6063 6064 // If the load is shifted left (and the result isn't shifted back right), 6065 // we can fold the truncate through the shift. 6066 unsigned ShLeftAmt = 0; 6067 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 6068 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { 6069 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 6070 ShLeftAmt = N01->getZExtValue(); 6071 N0 = N0.getOperand(0); 6072 } 6073 } 6074 6075 // If we haven't found a load, we can't narrow it. Don't transform one with 6076 // multiple uses, this would require adding a new load. 6077 if (!isa<LoadSDNode>(N0) || !N0.hasOneUse()) 6078 return SDValue(); 6079 6080 // Don't change the width of a volatile load. 6081 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6082 if (LN0->isVolatile()) 6083 return SDValue(); 6084 6085 // Verify that we are actually reducing a load width here. 6086 if (LN0->getMemoryVT().getSizeInBits() < EVTBits) 6087 return SDValue(); 6088 6089 // For the transform to be legal, the load must produce only two values 6090 // (the value loaded and the chain). Don't transform a pre-increment 6091 // load, for example, which produces an extra value. Otherwise the 6092 // transformation is not equivalent, and the downstream logic to replace 6093 // uses gets things wrong. 6094 if (LN0->getNumValues() > 2) 6095 return SDValue(); 6096 6097 // If the load that we're shrinking is an extload and we're not just 6098 // discarding the extension we can't simply shrink the load. Bail. 6099 // TODO: It would be possible to merge the extensions in some cases. 6100 if (LN0->getExtensionType() != ISD::NON_EXTLOAD && 6101 LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) 6102 return SDValue(); 6103 6104 if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT)) 6105 return SDValue(); 6106 6107 EVT PtrType = N0.getOperand(1).getValueType(); 6108 6109 if (PtrType == MVT::Untyped || PtrType.isExtended()) 6110 // It's not possible to generate a constant of extended or untyped type. 6111 return SDValue(); 6112 6113 // For big endian targets, we need to adjust the offset to the pointer to 6114 // load the correct bytes. 6115 if (TLI.isBigEndian()) { 6116 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); 6117 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); 6118 ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; 6119 } 6120 6121 uint64_t PtrOff = ShAmt / 8; 6122 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); 6123 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), 6124 PtrType, LN0->getBasePtr(), 6125 DAG.getConstant(PtrOff, PtrType)); 6126 AddToWorklist(NewPtr.getNode()); 6127 6128 SDValue Load; 6129 if (ExtType == ISD::NON_EXTLOAD) 6130 Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, 6131 LN0->getPointerInfo().getWithOffset(PtrOff), 6132 LN0->isVolatile(), LN0->isNonTemporal(), 6133 LN0->isInvariant(), NewAlign, LN0->getAAInfo()); 6134 else 6135 Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, 6136 LN0->getPointerInfo().getWithOffset(PtrOff), 6137 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 6138 LN0->isInvariant(), NewAlign, LN0->getAAInfo()); 6139 6140 // Replace the old load's chain with the new load's chain. 6141 WorklistRemover DeadNodes(*this); 6142 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 6143 6144 // Shift the result left, if we've swallowed a left shift. 6145 SDValue Result = Load; 6146 if (ShLeftAmt != 0) { 6147 EVT ShImmTy = getShiftAmountTy(Result.getValueType()); 6148 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) 6149 ShImmTy = VT; 6150 // If the shift amount is as large as the result size (but, presumably, 6151 // no larger than the source) then the useful bits of the result are 6152 // zero; we can't simply return the shortened shift, because the result 6153 // of that operation is undefined. 6154 if (ShLeftAmt >= VT.getSizeInBits()) 6155 Result = DAG.getConstant(0, VT); 6156 else 6157 Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT, 6158 Result, DAG.getConstant(ShLeftAmt, ShImmTy)); 6159 } 6160 6161 // Return the new loaded value. 6162 return Result; 6163} 6164 6165SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { 6166 SDValue N0 = N->getOperand(0); 6167 SDValue N1 = N->getOperand(1); 6168 EVT VT = N->getValueType(0); 6169 EVT EVT = cast<VTSDNode>(N1)->getVT(); 6170 unsigned VTBits = VT.getScalarType().getSizeInBits(); 6171 unsigned EVTBits = EVT.getScalarType().getSizeInBits(); 6172 6173 // fold (sext_in_reg c1) -> c1 6174 if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) 6175 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); 6176 6177 // If the input is already sign extended, just drop the extension. 6178 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) 6179 return N0; 6180 6181 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 6182 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 6183 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) 6184 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 6185 N0.getOperand(0), N1); 6186 6187 // fold (sext_in_reg (sext x)) -> (sext x) 6188 // fold (sext_in_reg (aext x)) -> (sext x) 6189 // if x is small enough. 6190 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { 6191 SDValue N00 = N0.getOperand(0); 6192 if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && 6193 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 6194 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); 6195 } 6196 6197 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. 6198 if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) 6199 return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT); 6200 6201 // fold operands of sext_in_reg based on knowledge that the top bits are not 6202 // demanded. 6203 if (SimplifyDemandedBits(SDValue(N, 0))) 6204 return SDValue(N, 0); 6205 6206 // fold (sext_in_reg (load x)) -> (smaller sextload x) 6207 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) 6208 SDValue NarrowLoad = ReduceLoadWidth(N); 6209 if (NarrowLoad.getNode()) 6210 return NarrowLoad; 6211 6212 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) 6213 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. 6214 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. 6215 if (N0.getOpcode() == ISD::SRL) { 6216 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 6217 if (ShAmt->getZExtValue()+EVTBits <= VTBits) { 6218 // We can turn this into an SRA iff the input to the SRL is already sign 6219 // extended enough. 6220 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); 6221 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) 6222 return DAG.getNode(ISD::SRA, SDLoc(N), VT, 6223 N0.getOperand(0), N0.getOperand(1)); 6224 } 6225 } 6226 6227 // fold (sext_inreg (extload x)) -> (sextload x) 6228 if (ISD::isEXTLoad(N0.getNode()) && 6229 ISD::isUNINDEXEDLoad(N0.getNode()) && 6230 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 6231 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 6232 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 6233 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6234 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 6235 LN0->getChain(), 6236 LN0->getBasePtr(), EVT, 6237 LN0->getMemOperand()); 6238 CombineTo(N, ExtLoad); 6239 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 6240 AddToWorklist(ExtLoad.getNode()); 6241 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6242 } 6243 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use 6244 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 6245 N0.hasOneUse() && 6246 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 6247 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 6248 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 6249 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6250 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 6251 LN0->getChain(), 6252 LN0->getBasePtr(), EVT, 6253 LN0->getMemOperand()); 6254 CombineTo(N, ExtLoad); 6255 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 6256 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6257 } 6258 6259 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) 6260 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { 6261 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 6262 N0.getOperand(1), false); 6263 if (BSwap.getNode()) 6264 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 6265 BSwap, N1); 6266 } 6267 6268 // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs 6269 // into a build_vector. 6270 if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { 6271 SmallVector<SDValue, 8> Elts; 6272 unsigned NumElts = N0->getNumOperands(); 6273 unsigned ShAmt = VTBits - EVTBits; 6274 6275 for (unsigned i = 0; i != NumElts; ++i) { 6276 SDValue Op = N0->getOperand(i); 6277 if (Op->getOpcode() == ISD::UNDEF) { 6278 Elts.push_back(Op); 6279 continue; 6280 } 6281 6282 ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); 6283 const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); 6284 Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), 6285 Op.getValueType())); 6286 } 6287 6288 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts); 6289 } 6290 6291 return SDValue(); 6292} 6293 6294SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { 6295 SDValue N0 = N->getOperand(0); 6296 EVT VT = N->getValueType(0); 6297 bool isLE = TLI.isLittleEndian(); 6298 6299 // noop truncate 6300 if (N0.getValueType() == N->getValueType(0)) 6301 return N0; 6302 // fold (truncate c1) -> c1 6303 if (isa<ConstantSDNode>(N0)) 6304 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); 6305 // fold (truncate (truncate x)) -> (truncate x) 6306 if (N0.getOpcode() == ISD::TRUNCATE) 6307 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 6308 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x 6309 if (N0.getOpcode() == ISD::ZERO_EXTEND || 6310 N0.getOpcode() == ISD::SIGN_EXTEND || 6311 N0.getOpcode() == ISD::ANY_EXTEND) { 6312 if (N0.getOperand(0).getValueType().bitsLT(VT)) 6313 // if the source is smaller than the dest, we still need an extend 6314 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 6315 N0.getOperand(0)); 6316 if (N0.getOperand(0).getValueType().bitsGT(VT)) 6317 // if the source is larger than the dest, than we just need the truncate 6318 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 6319 // if the source and dest are the same type, we can drop both the extend 6320 // and the truncate. 6321 return N0.getOperand(0); 6322 } 6323 6324 // Fold extract-and-trunc into a narrow extract. For example: 6325 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) 6326 // i32 y = TRUNCATE(i64 x) 6327 // -- becomes -- 6328 // v16i8 b = BITCAST (v2i64 val) 6329 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) 6330 // 6331 // Note: We only run this optimization after type legalization (which often 6332 // creates this pattern) and before operation legalization after which 6333 // we need to be more careful about the vector instructions that we generate. 6334 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 6335 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { 6336 6337 EVT VecTy = N0.getOperand(0).getValueType(); 6338 EVT ExTy = N0.getValueType(); 6339 EVT TrTy = N->getValueType(0); 6340 6341 unsigned NumElem = VecTy.getVectorNumElements(); 6342 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); 6343 6344 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); 6345 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); 6346 6347 SDValue EltNo = N0->getOperand(1); 6348 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { 6349 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 6350 EVT IndexTy = TLI.getVectorIdxTy(); 6351 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); 6352 6353 SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), 6354 NVT, N0.getOperand(0)); 6355 6356 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 6357 SDLoc(N), TrTy, V, 6358 DAG.getConstant(Index, IndexTy)); 6359 } 6360 } 6361 6362 // trunc (select c, a, b) -> select c, (trunc a), (trunc b) 6363 if (N0.getOpcode() == ISD::SELECT) { 6364 EVT SrcVT = N0.getValueType(); 6365 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && 6366 TLI.isTruncateFree(SrcVT, VT)) { 6367 SDLoc SL(N0); 6368 SDValue Cond = N0.getOperand(0); 6369 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); 6370 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); 6371 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); 6372 } 6373 } 6374 6375 // Fold a series of buildvector, bitcast, and truncate if possible. 6376 // For example fold 6377 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to 6378 // (2xi32 (buildvector x, y)). 6379 if (Level == AfterLegalizeVectorOps && VT.isVector() && 6380 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && 6381 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 6382 N0.getOperand(0).hasOneUse()) { 6383 6384 SDValue BuildVect = N0.getOperand(0); 6385 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); 6386 EVT TruncVecEltTy = VT.getVectorElementType(); 6387 6388 // Check that the element types match. 6389 if (BuildVectEltTy == TruncVecEltTy) { 6390 // Now we only need to compute the offset of the truncated elements. 6391 unsigned BuildVecNumElts = BuildVect.getNumOperands(); 6392 unsigned TruncVecNumElts = VT.getVectorNumElements(); 6393 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; 6394 6395 assert((BuildVecNumElts % TruncVecNumElts) == 0 && 6396 "Invalid number of elements"); 6397 6398 SmallVector<SDValue, 8> Opnds; 6399 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) 6400 Opnds.push_back(BuildVect.getOperand(i)); 6401 6402 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); 6403 } 6404 } 6405 6406 // See if we can simplify the input to this truncate through knowledge that 6407 // only the low bits are being used. 6408 // For example "trunc (or (shl x, 8), y)" // -> trunc y 6409 // Currently we only perform this optimization on scalars because vectors 6410 // may have different active low bits. 6411 if (!VT.isVector()) { 6412 SDValue Shorter = 6413 GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), 6414 VT.getSizeInBits())); 6415 if (Shorter.getNode()) 6416 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); 6417 } 6418 // fold (truncate (load x)) -> (smaller load x) 6419 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) 6420 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { 6421 SDValue Reduced = ReduceLoadWidth(N); 6422 if (Reduced.getNode()) 6423 return Reduced; 6424 // Handle the case where the load remains an extending load even 6425 // after truncation. 6426 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { 6427 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6428 if (!LN0->isVolatile() && 6429 LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { 6430 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), 6431 VT, LN0->getChain(), LN0->getBasePtr(), 6432 LN0->getMemoryVT(), 6433 LN0->getMemOperand()); 6434 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); 6435 return NewLoad; 6436 } 6437 } 6438 } 6439 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), 6440 // where ... are all 'undef'. 6441 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { 6442 SmallVector<EVT, 8> VTs; 6443 SDValue V; 6444 unsigned Idx = 0; 6445 unsigned NumDefs = 0; 6446 6447 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 6448 SDValue X = N0.getOperand(i); 6449 if (X.getOpcode() != ISD::UNDEF) { 6450 V = X; 6451 Idx = i; 6452 NumDefs++; 6453 } 6454 // Stop if more than one members are non-undef. 6455 if (NumDefs > 1) 6456 break; 6457 VTs.push_back(EVT::getVectorVT(*DAG.getContext(), 6458 VT.getVectorElementType(), 6459 X.getValueType().getVectorNumElements())); 6460 } 6461 6462 if (NumDefs == 0) 6463 return DAG.getUNDEF(VT); 6464 6465 if (NumDefs == 1) { 6466 assert(V.getNode() && "The single defined operand is empty!"); 6467 SmallVector<SDValue, 8> Opnds; 6468 for (unsigned i = 0, e = VTs.size(); i != e; ++i) { 6469 if (i != Idx) { 6470 Opnds.push_back(DAG.getUNDEF(VTs[i])); 6471 continue; 6472 } 6473 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); 6474 AddToWorklist(NV.getNode()); 6475 Opnds.push_back(NV); 6476 } 6477 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); 6478 } 6479 } 6480 6481 // Simplify the operands using demanded-bits information. 6482 if (!VT.isVector() && 6483 SimplifyDemandedBits(SDValue(N, 0))) 6484 return SDValue(N, 0); 6485 6486 return SDValue(); 6487} 6488 6489static SDNode *getBuildPairElt(SDNode *N, unsigned i) { 6490 SDValue Elt = N->getOperand(i); 6491 if (Elt.getOpcode() != ISD::MERGE_VALUES) 6492 return Elt.getNode(); 6493 return Elt.getOperand(Elt.getResNo()).getNode(); 6494} 6495 6496/// build_pair (load, load) -> load 6497/// if load locations are consecutive. 6498SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { 6499 assert(N->getOpcode() == ISD::BUILD_PAIR); 6500 6501 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); 6502 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); 6503 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || 6504 LD1->getAddressSpace() != LD2->getAddressSpace()) 6505 return SDValue(); 6506 EVT LD1VT = LD1->getValueType(0); 6507 6508 if (ISD::isNON_EXTLoad(LD2) && 6509 LD2->hasOneUse() && 6510 // If both are volatile this would reduce the number of volatile loads. 6511 // If one is volatile it might be ok, but play conservative and bail out. 6512 !LD1->isVolatile() && 6513 !LD2->isVolatile() && 6514 DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { 6515 unsigned Align = LD1->getAlignment(); 6516 unsigned NewAlign = TLI.getDataLayout()-> 6517 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 6518 6519 if (NewAlign <= Align && 6520 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) 6521 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), 6522 LD1->getBasePtr(), LD1->getPointerInfo(), 6523 false, false, false, Align); 6524 } 6525 6526 return SDValue(); 6527} 6528 6529SDValue DAGCombiner::visitBITCAST(SDNode *N) { 6530 SDValue N0 = N->getOperand(0); 6531 EVT VT = N->getValueType(0); 6532 6533 // If the input is a BUILD_VECTOR with all constant elements, fold this now. 6534 // Only do this before legalize, since afterward the target may be depending 6535 // on the bitconvert. 6536 // First check to see if this is all constant. 6537 if (!LegalTypes && 6538 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && 6539 VT.isVector()) { 6540 bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant(); 6541 6542 EVT DestEltVT = N->getValueType(0).getVectorElementType(); 6543 assert(!DestEltVT.isVector() && 6544 "Element type of vector ValueType must not be vector!"); 6545 if (isSimple) 6546 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); 6547 } 6548 6549 // If the input is a constant, let getNode fold it. 6550 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { 6551 // If we can't allow illegal operations, we need to check that this is just 6552 // a fp -> int or int -> conversion and that the resulting operation will 6553 // be legal. 6554 if (!LegalOperations || 6555 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && 6556 TLI.isOperationLegal(ISD::ConstantFP, VT)) || 6557 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && 6558 TLI.isOperationLegal(ISD::Constant, VT))) 6559 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); 6560 } 6561 6562 // (conv (conv x, t1), t2) -> (conv x, t2) 6563 if (N0.getOpcode() == ISD::BITCAST) 6564 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, 6565 N0.getOperand(0)); 6566 6567 // fold (conv (load x)) -> (load (conv*)x) 6568 // If the resultant load doesn't need a higher alignment than the original! 6569 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 6570 // Do not change the width of a volatile load. 6571 !cast<LoadSDNode>(N0)->isVolatile() && 6572 // Do not remove the cast if the types differ in endian layout. 6573 TLI.hasBigEndianPartOrdering(N0.getValueType()) == 6574 TLI.hasBigEndianPartOrdering(VT) && 6575 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && 6576 TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { 6577 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6578 unsigned Align = TLI.getDataLayout()-> 6579 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 6580 unsigned OrigAlign = LN0->getAlignment(); 6581 6582 if (Align <= OrigAlign) { 6583 SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), 6584 LN0->getBasePtr(), LN0->getPointerInfo(), 6585 LN0->isVolatile(), LN0->isNonTemporal(), 6586 LN0->isInvariant(), OrigAlign, 6587 LN0->getAAInfo()); 6588 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 6589 return Load; 6590 } 6591 } 6592 6593 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 6594 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 6595 // This often reduces constant pool loads. 6596 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || 6597 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && 6598 N0.getNode()->hasOneUse() && VT.isInteger() && 6599 !VT.isVector() && !N0.getValueType().isVector()) { 6600 SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, 6601 N0.getOperand(0)); 6602 AddToWorklist(NewConv.getNode()); 6603 6604 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 6605 if (N0.getOpcode() == ISD::FNEG) 6606 return DAG.getNode(ISD::XOR, SDLoc(N), VT, 6607 NewConv, DAG.getConstant(SignBit, VT)); 6608 assert(N0.getOpcode() == ISD::FABS); 6609 return DAG.getNode(ISD::AND, SDLoc(N), VT, 6610 NewConv, DAG.getConstant(~SignBit, VT)); 6611 } 6612 6613 // fold (bitconvert (fcopysign cst, x)) -> 6614 // (or (and (bitconvert x), sign), (and cst, (not sign))) 6615 // Note that we don't handle (copysign x, cst) because this can always be 6616 // folded to an fneg or fabs. 6617 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && 6618 isa<ConstantFPSDNode>(N0.getOperand(0)) && 6619 VT.isInteger() && !VT.isVector()) { 6620 unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); 6621 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); 6622 if (isTypeLegal(IntXVT)) { 6623 SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), 6624 IntXVT, N0.getOperand(1)); 6625 AddToWorklist(X.getNode()); 6626 6627 // If X has a different width than the result/lhs, sext it or truncate it. 6628 unsigned VTWidth = VT.getSizeInBits(); 6629 if (OrigXWidth < VTWidth) { 6630 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); 6631 AddToWorklist(X.getNode()); 6632 } else if (OrigXWidth > VTWidth) { 6633 // To get the sign bit in the right place, we have to shift it right 6634 // before truncating. 6635 X = DAG.getNode(ISD::SRL, SDLoc(X), 6636 X.getValueType(), X, 6637 DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); 6638 AddToWorklist(X.getNode()); 6639 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 6640 AddToWorklist(X.getNode()); 6641 } 6642 6643 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 6644 X = DAG.getNode(ISD::AND, SDLoc(X), VT, 6645 X, DAG.getConstant(SignBit, VT)); 6646 AddToWorklist(X.getNode()); 6647 6648 SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), 6649 VT, N0.getOperand(0)); 6650 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, 6651 Cst, DAG.getConstant(~SignBit, VT)); 6652 AddToWorklist(Cst.getNode()); 6653 6654 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); 6655 } 6656 } 6657 6658 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. 6659 if (N0.getOpcode() == ISD::BUILD_PAIR) { 6660 SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); 6661 if (CombineLD.getNode()) 6662 return CombineLD; 6663 } 6664 6665 return SDValue(); 6666} 6667 6668SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { 6669 EVT VT = N->getValueType(0); 6670 return CombineConsecutiveLoads(N, VT); 6671} 6672 6673/// We know that BV is a build_vector node with Constant, ConstantFP or Undef 6674/// operands. DstEltVT indicates the destination element value type. 6675SDValue DAGCombiner:: 6676ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { 6677 EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 6678 6679 // If this is already the right type, we're done. 6680 if (SrcEltVT == DstEltVT) return SDValue(BV, 0); 6681 6682 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 6683 unsigned DstBitSize = DstEltVT.getSizeInBits(); 6684 6685 // If this is a conversion of N elements of one type to N elements of another 6686 // type, convert each element. This handles FP<->INT cases. 6687 if (SrcBitSize == DstBitSize) { 6688 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 6689 BV->getValueType(0).getVectorNumElements()); 6690 6691 // Due to the FP element handling below calling this routine recursively, 6692 // we can end up with a scalar-to-vector node here. 6693 if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) 6694 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, 6695 DAG.getNode(ISD::BITCAST, SDLoc(BV), 6696 DstEltVT, BV->getOperand(0))); 6697 6698 SmallVector<SDValue, 8> Ops; 6699 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 6700 SDValue Op = BV->getOperand(i); 6701 // If the vector element type is not legal, the BUILD_VECTOR operands 6702 // are promoted and implicitly truncated. Make that explicit here. 6703 if (Op.getValueType() != SrcEltVT) 6704 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); 6705 Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), 6706 DstEltVT, Op)); 6707 AddToWorklist(Ops.back().getNode()); 6708 } 6709 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 6710 } 6711 6712 // Otherwise, we're growing or shrinking the elements. To avoid having to 6713 // handle annoying details of growing/shrinking FP values, we convert them to 6714 // int first. 6715 if (SrcEltVT.isFloatingPoint()) { 6716 // Convert the input float vector to a int vector where the elements are the 6717 // same sizes. 6718 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); 6719 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); 6720 SrcEltVT = IntVT; 6721 } 6722 6723 // Now we know the input is an integer vector. If the output is a FP type, 6724 // convert to integer first, then to FP of the right size. 6725 if (DstEltVT.isFloatingPoint()) { 6726 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); 6727 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); 6728 6729 // Next, convert to FP elements of the same size. 6730 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); 6731 } 6732 6733 // Okay, we know the src/dst types are both integers of differing types. 6734 // Handling growing first. 6735 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 6736 if (SrcBitSize < DstBitSize) { 6737 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; 6738 6739 SmallVector<SDValue, 8> Ops; 6740 for (unsigned i = 0, e = BV->getNumOperands(); i != e; 6741 i += NumInputsPerOutput) { 6742 bool isLE = TLI.isLittleEndian(); 6743 APInt NewBits = APInt(DstBitSize, 0); 6744 bool EltIsUndef = true; 6745 for (unsigned j = 0; j != NumInputsPerOutput; ++j) { 6746 // Shift the previously computed bits over. 6747 NewBits <<= SrcBitSize; 6748 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); 6749 if (Op.getOpcode() == ISD::UNDEF) continue; 6750 EltIsUndef = false; 6751 6752 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). 6753 zextOrTrunc(SrcBitSize).zext(DstBitSize); 6754 } 6755 6756 if (EltIsUndef) 6757 Ops.push_back(DAG.getUNDEF(DstEltVT)); 6758 else 6759 Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); 6760 } 6761 6762 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); 6763 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 6764 } 6765 6766 // Finally, this must be the case where we are shrinking elements: each input 6767 // turns into multiple outputs. 6768 bool isS2V = ISD::isScalarToVector(BV); 6769 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; 6770 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 6771 NumOutputsPerInput*BV->getNumOperands()); 6772 SmallVector<SDValue, 8> Ops; 6773 6774 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 6775 if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { 6776 for (unsigned j = 0; j != NumOutputsPerInput; ++j) 6777 Ops.push_back(DAG.getUNDEF(DstEltVT)); 6778 continue; 6779 } 6780 6781 APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))-> 6782 getAPIntValue().zextOrTrunc(SrcBitSize); 6783 6784 for (unsigned j = 0; j != NumOutputsPerInput; ++j) { 6785 APInt ThisVal = OpVal.trunc(DstBitSize); 6786 Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); 6787 if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) 6788 // Simply turn this into a SCALAR_TO_VECTOR of the new type. 6789 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, 6790 Ops[0]); 6791 OpVal = OpVal.lshr(DstBitSize); 6792 } 6793 6794 // For big endian targets, swap the order of the pieces of each element. 6795 if (TLI.isBigEndian()) 6796 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); 6797 } 6798 6799 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 6800} 6801 6802SDValue DAGCombiner::visitFADD(SDNode *N) { 6803 SDValue N0 = N->getOperand(0); 6804 SDValue N1 = N->getOperand(1); 6805 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6806 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6807 EVT VT = N->getValueType(0); 6808 const TargetOptions &Options = DAG.getTarget().Options; 6809 6810 // fold vector ops 6811 if (VT.isVector()) { 6812 SDValue FoldedVOp = SimplifyVBinOp(N); 6813 if (FoldedVOp.getNode()) return FoldedVOp; 6814 } 6815 6816 // fold (fadd c1, c2) -> c1 + c2 6817 if (N0CFP && N1CFP) 6818 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1); 6819 6820 // canonicalize constant to RHS 6821 if (N0CFP && !N1CFP) 6822 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); 6823 6824 // fold (fadd A, (fneg B)) -> (fsub A, B) 6825 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 6826 isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) 6827 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, 6828 GetNegatedExpression(N1, DAG, LegalOperations)); 6829 6830 // fold (fadd (fneg A), B) -> (fsub B, A) 6831 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 6832 isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) 6833 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1, 6834 GetNegatedExpression(N0, DAG, LegalOperations)); 6835 6836 // If 'unsafe math' is enabled, fold lots of things. 6837 if (Options.UnsafeFPMath) { 6838 // No FP constant should be created after legalization as Instruction 6839 // Selection pass has a hard time dealing with FP constants. 6840 bool AllowNewConst = (Level < AfterLegalizeDAG); 6841 6842 // fold (fadd A, 0) -> A 6843 if (N1CFP && N1CFP->getValueAPF().isZero()) 6844 return N0; 6845 6846 // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) 6847 if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && 6848 isa<ConstantFPSDNode>(N0.getOperand(1))) 6849 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), 6850 DAG.getNode(ISD::FADD, SDLoc(N), VT, 6851 N0.getOperand(1), N1)); 6852 6853 // If allowed, fold (fadd (fneg x), x) -> 0.0 6854 if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) 6855 return DAG.getConstantFP(0.0, VT); 6856 6857 // If allowed, fold (fadd x, (fneg x)) -> 0.0 6858 if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) 6859 return DAG.getConstantFP(0.0, VT); 6860 6861 // We can fold chains of FADD's of the same value into multiplications. 6862 // This transform is not safe in general because we are reducing the number 6863 // of rounding steps. 6864 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { 6865 if (N0.getOpcode() == ISD::FMUL) { 6866 ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); 6867 ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 6868 6869 // (fadd (fmul x, c), x) -> (fmul x, c+1) 6870 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { 6871 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6872 SDValue(CFP01, 0), 6873 DAG.getConstantFP(1.0, VT)); 6874 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); 6875 } 6876 6877 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) 6878 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && 6879 N1.getOperand(0) == N1.getOperand(1) && 6880 N0.getOperand(0) == N1.getOperand(0)) { 6881 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6882 SDValue(CFP01, 0), 6883 DAG.getConstantFP(2.0, VT)); 6884 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6885 N0.getOperand(0), NewCFP); 6886 } 6887 } 6888 6889 if (N1.getOpcode() == ISD::FMUL) { 6890 ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); 6891 ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); 6892 6893 // (fadd x, (fmul x, c)) -> (fmul x, c+1) 6894 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { 6895 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6896 SDValue(CFP11, 0), 6897 DAG.getConstantFP(1.0, VT)); 6898 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); 6899 } 6900 6901 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) 6902 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && 6903 N0.getOperand(0) == N0.getOperand(1) && 6904 N1.getOperand(0) == N0.getOperand(0)) { 6905 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6906 SDValue(CFP11, 0), 6907 DAG.getConstantFP(2.0, VT)); 6908 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP); 6909 } 6910 } 6911 6912 if (N0.getOpcode() == ISD::FADD && AllowNewConst) { 6913 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); 6914 // (fadd (fadd x, x), x) -> (fmul x, 3.0) 6915 if (!CFP && N0.getOperand(0) == N0.getOperand(1) && 6916 (N0.getOperand(0) == N1)) 6917 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6918 N1, DAG.getConstantFP(3.0, VT)); 6919 } 6920 6921 if (N1.getOpcode() == ISD::FADD && AllowNewConst) { 6922 ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); 6923 // (fadd x, (fadd x, x)) -> (fmul x, 3.0) 6924 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && 6925 N1.getOperand(0) == N0) 6926 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6927 N0, DAG.getConstantFP(3.0, VT)); 6928 } 6929 6930 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) 6931 if (AllowNewConst && 6932 N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && 6933 N0.getOperand(0) == N0.getOperand(1) && 6934 N1.getOperand(0) == N1.getOperand(1) && 6935 N0.getOperand(0) == N1.getOperand(0)) 6936 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6937 N0.getOperand(0), DAG.getConstantFP(4.0, VT)); 6938 } 6939 } // enable-unsafe-fp-math 6940 6941 // FADD -> FMA combines: 6942 if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && 6943 TLI.isFMAFasterThanFMulAndFAdd(VT) && 6944 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { 6945 6946 // fold (fadd (fmul x, y), z) -> (fma x, y, z) 6947 if (N0.getOpcode() == ISD::FMUL && 6948 (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) 6949 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6950 N0.getOperand(0), N0.getOperand(1), N1); 6951 6952 // fold (fadd x, (fmul y, z)) -> (fma y, z, x) 6953 // Note: Commutes FADD operands. 6954 if (N1.getOpcode() == ISD::FMUL && 6955 (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) 6956 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6957 N1.getOperand(0), N1.getOperand(1), N0); 6958 6959 // When FP_EXTEND nodes are free on the target, and there is an opportunity 6960 // to combine into FMA, arrange such nodes accordingly. 6961 if (TLI.isFPExtFree(VT)) { 6962 6963 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) 6964 if (N0.getOpcode() == ISD::FP_EXTEND) { 6965 SDValue N00 = N0.getOperand(0); 6966 if (N00.getOpcode() == ISD::FMUL) 6967 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6968 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 6969 N00.getOperand(0)), 6970 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 6971 N00.getOperand(1)), N1); 6972 } 6973 6974 // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x) 6975 // Note: Commutes FADD operands. 6976 if (N1.getOpcode() == ISD::FP_EXTEND) { 6977 SDValue N10 = N1.getOperand(0); 6978 if (N10.getOpcode() == ISD::FMUL) 6979 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6980 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 6981 N10.getOperand(0)), 6982 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 6983 N10.getOperand(1)), N0); 6984 } 6985 } 6986 6987 // More folding opportunities when target permits. 6988 if (TLI.enableAggressiveFMAFusion(VT)) { 6989 6990 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) 6991 if (N0.getOpcode() == ISD::FMA && 6992 N0.getOperand(2).getOpcode() == ISD::FMUL) 6993 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6994 N0.getOperand(0), N0.getOperand(1), 6995 DAG.getNode(ISD::FMA, SDLoc(N), VT, 6996 N0.getOperand(2).getOperand(0), 6997 N0.getOperand(2).getOperand(1), 6998 N1)); 6999 7000 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) 7001 if (N1->getOpcode() == ISD::FMA && 7002 N1.getOperand(2).getOpcode() == ISD::FMUL) 7003 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7004 N1.getOperand(0), N1.getOperand(1), 7005 DAG.getNode(ISD::FMA, SDLoc(N), VT, 7006 N1.getOperand(2).getOperand(0), 7007 N1.getOperand(2).getOperand(1), 7008 N0)); 7009 } 7010 } 7011 7012 return SDValue(); 7013} 7014 7015SDValue DAGCombiner::visitFSUB(SDNode *N) { 7016 SDValue N0 = N->getOperand(0); 7017 SDValue N1 = N->getOperand(1); 7018 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); 7019 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); 7020 EVT VT = N->getValueType(0); 7021 SDLoc dl(N); 7022 const TargetOptions &Options = DAG.getTarget().Options; 7023 7024 // fold vector ops 7025 if (VT.isVector()) { 7026 SDValue FoldedVOp = SimplifyVBinOp(N); 7027 if (FoldedVOp.getNode()) return FoldedVOp; 7028 } 7029 7030 // fold (fsub c1, c2) -> c1-c2 7031 if (N0CFP && N1CFP) 7032 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); 7033 7034 // fold (fsub A, (fneg B)) -> (fadd A, B) 7035 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 7036 return DAG.getNode(ISD::FADD, dl, VT, N0, 7037 GetNegatedExpression(N1, DAG, LegalOperations)); 7038 7039 // If 'unsafe math' is enabled, fold lots of things. 7040 if (Options.UnsafeFPMath) { 7041 // (fsub A, 0) -> A 7042 if (N1CFP && N1CFP->getValueAPF().isZero()) 7043 return N0; 7044 7045 // (fsub 0, B) -> -B 7046 if (N0CFP && N0CFP->getValueAPF().isZero()) { 7047 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 7048 return GetNegatedExpression(N1, DAG, LegalOperations); 7049 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 7050 return DAG.getNode(ISD::FNEG, dl, VT, N1); 7051 } 7052 7053 // (fsub x, x) -> 0.0 7054 if (N0 == N1) 7055 return DAG.getConstantFP(0.0f, VT); 7056 7057 // (fsub x, (fadd x, y)) -> (fneg y) 7058 // (fsub x, (fadd y, x)) -> (fneg y) 7059 if (N1.getOpcode() == ISD::FADD) { 7060 SDValue N10 = N1->getOperand(0); 7061 SDValue N11 = N1->getOperand(1); 7062 7063 if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options)) 7064 return GetNegatedExpression(N11, DAG, LegalOperations); 7065 7066 if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options)) 7067 return GetNegatedExpression(N10, DAG, LegalOperations); 7068 } 7069 } 7070 7071 // FSUB -> FMA combines: 7072 if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && 7073 TLI.isFMAFasterThanFMulAndFAdd(VT) && 7074 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { 7075 7076 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 7077 if (N0.getOpcode() == ISD::FMUL && 7078 (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) 7079 return DAG.getNode(ISD::FMA, dl, VT, 7080 N0.getOperand(0), N0.getOperand(1), 7081 DAG.getNode(ISD::FNEG, dl, VT, N1)); 7082 7083 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 7084 // Note: Commutes FSUB operands. 7085 if (N1.getOpcode() == ISD::FMUL && 7086 (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) 7087 return DAG.getNode(ISD::FMA, dl, VT, 7088 DAG.getNode(ISD::FNEG, dl, VT, 7089 N1.getOperand(0)), 7090 N1.getOperand(1), N0); 7091 7092 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) 7093 if (N0.getOpcode() == ISD::FNEG && 7094 N0.getOperand(0).getOpcode() == ISD::FMUL && 7095 ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) || 7096 TLI.enableAggressiveFMAFusion(VT))) { 7097 SDValue N00 = N0.getOperand(0).getOperand(0); 7098 SDValue N01 = N0.getOperand(0).getOperand(1); 7099 return DAG.getNode(ISD::FMA, dl, VT, 7100 DAG.getNode(ISD::FNEG, dl, VT, N00), N01, 7101 DAG.getNode(ISD::FNEG, dl, VT, N1)); 7102 } 7103 7104 // When FP_EXTEND nodes are free on the target, and there is an opportunity 7105 // to combine into FMA, arrange such nodes accordingly. 7106 if (TLI.isFPExtFree(VT)) { 7107 7108 // fold (fsub (fpext (fmul x, y)), z) 7109 // -> (fma (fpext x), (fpext y), (fneg z)) 7110 if (N0.getOpcode() == ISD::FP_EXTEND) { 7111 SDValue N00 = N0.getOperand(0); 7112 if (N00.getOpcode() == ISD::FMUL) 7113 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7114 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7115 N00.getOperand(0)), 7116 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7117 N00.getOperand(1)), 7118 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1)); 7119 } 7120 7121 // fold (fsub x, (fpext (fmul y, z))) 7122 // -> (fma (fneg (fpext y)), (fpext z), x) 7123 // Note: Commutes FSUB operands. 7124 if (N1.getOpcode() == ISD::FP_EXTEND) { 7125 SDValue N10 = N1.getOperand(0); 7126 if (N10.getOpcode() == ISD::FMUL) 7127 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7128 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7129 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), 7130 VT, N10.getOperand(0))), 7131 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7132 N10.getOperand(1)), 7133 N0); 7134 } 7135 7136 // fold (fsub (fpext (fneg (fmul, x, y))), z) 7137 // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) 7138 if (N0.getOpcode() == ISD::FP_EXTEND) { 7139 SDValue N00 = N0.getOperand(0); 7140 if (N00.getOpcode() == ISD::FNEG) { 7141 SDValue N000 = N00.getOperand(0); 7142 if (N000.getOpcode() == ISD::FMUL) { 7143 return DAG.getNode(ISD::FMA, dl, VT, 7144 DAG.getNode(ISD::FNEG, dl, VT, 7145 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), 7146 VT, N000.getOperand(0))), 7147 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7148 N000.getOperand(1)), 7149 DAG.getNode(ISD::FNEG, dl, VT, N1)); 7150 } 7151 } 7152 } 7153 7154 // fold (fsub (fneg (fpext (fmul, x, y))), z) 7155 // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) 7156 if (N0.getOpcode() == ISD::FNEG) { 7157 SDValue N00 = N0.getOperand(0); 7158 if (N00.getOpcode() == ISD::FP_EXTEND) { 7159 SDValue N000 = N00.getOperand(0); 7160 if (N000.getOpcode() == ISD::FMUL) { 7161 return DAG.getNode(ISD::FMA, dl, VT, 7162 DAG.getNode(ISD::FNEG, dl, VT, 7163 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), 7164 VT, N000.getOperand(0))), 7165 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7166 N000.getOperand(1)), 7167 DAG.getNode(ISD::FNEG, dl, VT, N1)); 7168 } 7169 } 7170 } 7171 } 7172 7173 // More folding opportunities when target permits. 7174 if (TLI.enableAggressiveFMAFusion(VT)) { 7175 7176 // fold (fsub (fma x, y, (fmul u, v)), z) 7177 // -> (fma x, y (fma u, v, (fneg z))) 7178 if (N0.getOpcode() == ISD::FMA && 7179 N0.getOperand(2).getOpcode() == ISD::FMUL) 7180 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7181 N0.getOperand(0), N0.getOperand(1), 7182 DAG.getNode(ISD::FMA, SDLoc(N), VT, 7183 N0.getOperand(2).getOperand(0), 7184 N0.getOperand(2).getOperand(1), 7185 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7186 N1))); 7187 7188 // fold (fsub x, (fma y, z, (fmul u, v))) 7189 // -> (fma (fneg y), z, (fma (fneg u), v, x)) 7190 if (N1.getOpcode() == ISD::FMA && 7191 N1.getOperand(2).getOpcode() == ISD::FMUL) { 7192 SDValue N20 = N1.getOperand(2).getOperand(0); 7193 SDValue N21 = N1.getOperand(2).getOperand(1); 7194 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7195 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7196 N1.getOperand(0)), 7197 N1.getOperand(1), 7198 DAG.getNode(ISD::FMA, SDLoc(N), VT, 7199 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7200 N20), 7201 N21, N0)); 7202 } 7203 } 7204 } 7205 7206 return SDValue(); 7207} 7208 7209SDValue DAGCombiner::visitFMUL(SDNode *N) { 7210 SDValue N0 = N->getOperand(0); 7211 SDValue N1 = N->getOperand(1); 7212 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); 7213 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); 7214 EVT VT = N->getValueType(0); 7215 const TargetOptions &Options = DAG.getTarget().Options; 7216 7217 // fold vector ops 7218 if (VT.isVector()) { 7219 // This just handles C1 * C2 for vectors. Other vector folds are below. 7220 SDValue FoldedVOp = SimplifyVBinOp(N); 7221 if (FoldedVOp.getNode()) 7222 return FoldedVOp; 7223 // Canonicalize vector constant to RHS. 7224 if (N0.getOpcode() == ISD::BUILD_VECTOR && 7225 N1.getOpcode() != ISD::BUILD_VECTOR) 7226 if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0)) 7227 if (BV0->isConstant()) 7228 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); 7229 } 7230 7231 // fold (fmul c1, c2) -> c1*c2 7232 if (N0CFP && N1CFP) 7233 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); 7234 7235 // canonicalize constant to RHS 7236 if (N0CFP && !N1CFP) 7237 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); 7238 7239 // fold (fmul A, 1.0) -> A 7240 if (N1CFP && N1CFP->isExactlyValue(1.0)) 7241 return N0; 7242 7243 if (Options.UnsafeFPMath) { 7244 // fold (fmul A, 0) -> 0 7245 if (N1CFP && N1CFP->getValueAPF().isZero()) 7246 return N1; 7247 7248 // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) 7249 if (N0.getOpcode() == ISD::FMUL) { 7250 // Fold scalars or any vector constants (not just splats). 7251 // This fold is done in general by InstCombine, but extra fmul insts 7252 // may have been generated during lowering. 7253 SDValue N01 = N0.getOperand(1); 7254 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); 7255 auto *BV01 = dyn_cast<BuildVectorSDNode>(N01); 7256 if ((N1CFP && isConstOrConstSplatFP(N01)) || 7257 (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { 7258 SDLoc SL(N); 7259 SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1); 7260 return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts); 7261 } 7262 } 7263 7264 // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) 7265 // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs 7266 // during an early run of DAGCombiner can prevent folding with fmuls 7267 // inserted during lowering. 7268 if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { 7269 SDLoc SL(N); 7270 const SDValue Two = DAG.getConstantFP(2.0, VT); 7271 SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1); 7272 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts); 7273 } 7274 } 7275 7276 // fold (fmul X, 2.0) -> (fadd X, X) 7277 if (N1CFP && N1CFP->isExactlyValue(+2.0)) 7278 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); 7279 7280 // fold (fmul X, -1.0) -> (fneg X) 7281 if (N1CFP && N1CFP->isExactlyValue(-1.0)) 7282 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 7283 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); 7284 7285 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) 7286 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 7287 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 7288 // Both can be negated for free, check to see if at least one is cheaper 7289 // negated. 7290 if (LHSNeg == 2 || RHSNeg == 2) 7291 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 7292 GetNegatedExpression(N0, DAG, LegalOperations), 7293 GetNegatedExpression(N1, DAG, LegalOperations)); 7294 } 7295 } 7296 7297 return SDValue(); 7298} 7299 7300SDValue DAGCombiner::visitFMA(SDNode *N) { 7301 SDValue N0 = N->getOperand(0); 7302 SDValue N1 = N->getOperand(1); 7303 SDValue N2 = N->getOperand(2); 7304 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7305 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7306 EVT VT = N->getValueType(0); 7307 SDLoc dl(N); 7308 const TargetOptions &Options = DAG.getTarget().Options; 7309 7310 // Constant fold FMA. 7311 if (isa<ConstantFPSDNode>(N0) && 7312 isa<ConstantFPSDNode>(N1) && 7313 isa<ConstantFPSDNode>(N2)) { 7314 return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2); 7315 } 7316 7317 if (Options.UnsafeFPMath) { 7318 if (N0CFP && N0CFP->isZero()) 7319 return N2; 7320 if (N1CFP && N1CFP->isZero()) 7321 return N2; 7322 } 7323 if (N0CFP && N0CFP->isExactlyValue(1.0)) 7324 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); 7325 if (N1CFP && N1CFP->isExactlyValue(1.0)) 7326 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); 7327 7328 // Canonicalize (fma c, x, y) -> (fma x, c, y) 7329 if (N0CFP && !N1CFP) 7330 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); 7331 7332 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 7333 if (Options.UnsafeFPMath && N1CFP && 7334 N2.getOpcode() == ISD::FMUL && 7335 N0 == N2.getOperand(0) && 7336 N2.getOperand(1).getOpcode() == ISD::ConstantFP) { 7337 return DAG.getNode(ISD::FMUL, dl, VT, N0, 7338 DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); 7339 } 7340 7341 7342 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 7343 if (Options.UnsafeFPMath && 7344 N0.getOpcode() == ISD::FMUL && N1CFP && 7345 N0.getOperand(1).getOpcode() == ISD::ConstantFP) { 7346 return DAG.getNode(ISD::FMA, dl, VT, 7347 N0.getOperand(0), 7348 DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), 7349 N2); 7350 } 7351 7352 // (fma x, 1, y) -> (fadd x, y) 7353 // (fma x, -1, y) -> (fadd (fneg x), y) 7354 if (N1CFP) { 7355 if (N1CFP->isExactlyValue(1.0)) 7356 return DAG.getNode(ISD::FADD, dl, VT, N0, N2); 7357 7358 if (N1CFP->isExactlyValue(-1.0) && 7359 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { 7360 SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); 7361 AddToWorklist(RHSNeg.getNode()); 7362 return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); 7363 } 7364 } 7365 7366 // (fma x, c, x) -> (fmul x, (c+1)) 7367 if (Options.UnsafeFPMath && N1CFP && N0 == N2) 7368 return DAG.getNode(ISD::FMUL, dl, VT, N0, 7369 DAG.getNode(ISD::FADD, dl, VT, 7370 N1, DAG.getConstantFP(1.0, VT))); 7371 7372 // (fma x, c, (fneg x)) -> (fmul x, (c-1)) 7373 if (Options.UnsafeFPMath && N1CFP && 7374 N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) 7375 return DAG.getNode(ISD::FMUL, dl, VT, N0, 7376 DAG.getNode(ISD::FADD, dl, VT, 7377 N1, DAG.getConstantFP(-1.0, VT))); 7378 7379 7380 return SDValue(); 7381} 7382 7383SDValue DAGCombiner::visitFDIV(SDNode *N) { 7384 SDValue N0 = N->getOperand(0); 7385 SDValue N1 = N->getOperand(1); 7386 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7387 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7388 EVT VT = N->getValueType(0); 7389 SDLoc DL(N); 7390 const TargetOptions &Options = DAG.getTarget().Options; 7391 7392 // fold vector ops 7393 if (VT.isVector()) { 7394 SDValue FoldedVOp = SimplifyVBinOp(N); 7395 if (FoldedVOp.getNode()) return FoldedVOp; 7396 } 7397 7398 // fold (fdiv c1, c2) -> c1/c2 7399 if (N0CFP && N1CFP) 7400 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); 7401 7402 if (Options.UnsafeFPMath) { 7403 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. 7404 if (N1CFP) { 7405 // Compute the reciprocal 1.0 / c2. 7406 APFloat N1APF = N1CFP->getValueAPF(); 7407 APFloat Recip(N1APF.getSemantics(), 1); // 1.0 7408 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); 7409 // Only do the transform if the reciprocal is a legal fp immediate that 7410 // isn't too nasty (eg NaN, denormal, ...). 7411 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty 7412 (!LegalOperations || 7413 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM 7414 // backend)... we should handle this gracefully after Legalize. 7415 // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || 7416 TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || 7417 TLI.isFPImmLegal(Recip, VT))) 7418 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, 7419 DAG.getConstantFP(Recip, VT)); 7420 } 7421 7422 // If this FDIV is part of a reciprocal square root, it may be folded 7423 // into a target-specific square root estimate instruction. 7424 if (N1.getOpcode() == ISD::FSQRT) { 7425 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) { 7426 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7427 } 7428 } else if (N1.getOpcode() == ISD::FP_EXTEND && 7429 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 7430 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { 7431 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); 7432 AddToWorklist(RV.getNode()); 7433 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7434 } 7435 } else if (N1.getOpcode() == ISD::FP_ROUND && 7436 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 7437 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { 7438 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); 7439 AddToWorklist(RV.getNode()); 7440 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7441 } 7442 } else if (N1.getOpcode() == ISD::FMUL) { 7443 // Look through an FMUL. Even though this won't remove the FDIV directly, 7444 // it's still worthwhile to get rid of the FSQRT if possible. 7445 SDValue SqrtOp; 7446 SDValue OtherOp; 7447 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { 7448 SqrtOp = N1.getOperand(0); 7449 OtherOp = N1.getOperand(1); 7450 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { 7451 SqrtOp = N1.getOperand(1); 7452 OtherOp = N1.getOperand(0); 7453 } 7454 if (SqrtOp.getNode()) { 7455 // We found a FSQRT, so try to make this fold: 7456 // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) 7457 if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) { 7458 RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp); 7459 AddToWorklist(RV.getNode()); 7460 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7461 } 7462 } 7463 } 7464 7465 // Fold into a reciprocal estimate and multiply instead of a real divide. 7466 if (SDValue RV = BuildReciprocalEstimate(N1)) { 7467 AddToWorklist(RV.getNode()); 7468 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7469 } 7470 } 7471 7472 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) 7473 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 7474 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 7475 // Both can be negated for free, check to see if at least one is cheaper 7476 // negated. 7477 if (LHSNeg == 2 || RHSNeg == 2) 7478 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, 7479 GetNegatedExpression(N0, DAG, LegalOperations), 7480 GetNegatedExpression(N1, DAG, LegalOperations)); 7481 } 7482 } 7483 7484 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 7485 // reciprocal. 7486 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) 7487 // Notice that this is not always beneficial. One reason is different target 7488 // may have different costs for FDIV and FMUL, so sometimes the cost of two 7489 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason 7490 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". 7491 if (Options.UnsafeFPMath) { 7492 // Skip if current node is a reciprocal. 7493 if (N0CFP && N0CFP->isExactlyValue(1.0)) 7494 return SDValue(); 7495 7496 SmallVector<SDNode *, 4> Users; 7497 // Find all FDIV users of the same divisor. 7498 for (SDNode::use_iterator UI = N1.getNode()->use_begin(), 7499 UE = N1.getNode()->use_end(); 7500 UI != UE; ++UI) { 7501 SDNode *User = UI.getUse().getUser(); 7502 if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1) 7503 Users.push_back(User); 7504 } 7505 7506 if (TLI.combineRepeatedFPDivisors(Users.size())) { 7507 SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0 7508 SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1); 7509 7510 // Dividend / Divisor -> Dividend * Reciprocal 7511 for (auto I = Users.begin(), E = Users.end(); I != E; ++I) { 7512 if ((*I)->getOperand(0) != FPOne) { 7513 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT, 7514 (*I)->getOperand(0), Reciprocal); 7515 DAG.ReplaceAllUsesWith(*I, NewNode.getNode()); 7516 } 7517 } 7518 return SDValue(); 7519 } 7520 } 7521 7522 return SDValue(); 7523} 7524 7525SDValue DAGCombiner::visitFREM(SDNode *N) { 7526 SDValue N0 = N->getOperand(0); 7527 SDValue N1 = N->getOperand(1); 7528 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7529 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7530 EVT VT = N->getValueType(0); 7531 7532 // fold (frem c1, c2) -> fmod(c1,c2) 7533 if (N0CFP && N1CFP) 7534 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); 7535 7536 return SDValue(); 7537} 7538 7539SDValue DAGCombiner::visitFSQRT(SDNode *N) { 7540 if (DAG.getTarget().Options.UnsafeFPMath && 7541 !TLI.isFsqrtCheap()) { 7542 // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) 7543 if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { 7544 EVT VT = RV.getValueType(); 7545 RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV); 7546 AddToWorklist(RV.getNode()); 7547 7548 // Unfortunately, RV is now NaN if the input was exactly 0. 7549 // Select out this case and force the answer to 0. 7550 SDValue Zero = DAG.getConstantFP(0.0, VT); 7551 SDValue ZeroCmp = 7552 DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT), 7553 N->getOperand(0), Zero, ISD::SETEQ); 7554 AddToWorklist(ZeroCmp.getNode()); 7555 AddToWorklist(RV.getNode()); 7556 7557 RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, 7558 SDLoc(N), VT, ZeroCmp, Zero, RV); 7559 return RV; 7560 } 7561 } 7562 return SDValue(); 7563} 7564 7565SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { 7566 SDValue N0 = N->getOperand(0); 7567 SDValue N1 = N->getOperand(1); 7568 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7569 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7570 EVT VT = N->getValueType(0); 7571 7572 if (N0CFP && N1CFP) // Constant fold 7573 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); 7574 7575 if (N1CFP) { 7576 const APFloat& V = N1CFP->getValueAPF(); 7577 // copysign(x, c1) -> fabs(x) iff ispos(c1) 7578 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) 7579 if (!V.isNegative()) { 7580 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) 7581 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 7582 } else { 7583 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 7584 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7585 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); 7586 } 7587 } 7588 7589 // copysign(fabs(x), y) -> copysign(x, y) 7590 // copysign(fneg(x), y) -> copysign(x, y) 7591 // copysign(copysign(x,z), y) -> copysign(x, y) 7592 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || 7593 N0.getOpcode() == ISD::FCOPYSIGN) 7594 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7595 N0.getOperand(0), N1); 7596 7597 // copysign(x, abs(y)) -> abs(x) 7598 if (N1.getOpcode() == ISD::FABS) 7599 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 7600 7601 // copysign(x, copysign(y,z)) -> copysign(x, z) 7602 if (N1.getOpcode() == ISD::FCOPYSIGN) 7603 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7604 N0, N1.getOperand(1)); 7605 7606 // copysign(x, fp_extend(y)) -> copysign(x, y) 7607 // copysign(x, fp_round(y)) -> copysign(x, y) 7608 if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) 7609 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7610 N0, N1.getOperand(0)); 7611 7612 return SDValue(); 7613} 7614 7615SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { 7616 SDValue N0 = N->getOperand(0); 7617 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 7618 EVT VT = N->getValueType(0); 7619 EVT OpVT = N0.getValueType(); 7620 7621 // fold (sint_to_fp c1) -> c1fp 7622 if (N0C && 7623 // ...but only if the target supports immediate floating-point values 7624 (!LegalOperations || 7625 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 7626 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 7627 7628 // If the input is a legal type, and SINT_TO_FP is not legal on this target, 7629 // but UINT_TO_FP is legal on this target, try to convert. 7630 if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && 7631 TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { 7632 // If the sign bit is known to be zero, we can change this to UINT_TO_FP. 7633 if (DAG.SignBitIsZero(N0)) 7634 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 7635 } 7636 7637 // The next optimizations are desirable only if SELECT_CC can be lowered. 7638 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { 7639 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 7640 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && 7641 !VT.isVector() && 7642 (!LegalOperations || 7643 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 7644 SDValue Ops[] = 7645 { N0.getOperand(0), N0.getOperand(1), 7646 DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), 7647 N0.getOperand(2) }; 7648 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); 7649 } 7650 7651 // fold (sint_to_fp (zext (setcc x, y, cc))) -> 7652 // (select_cc x, y, 1.0, 0.0,, cc) 7653 if (N0.getOpcode() == ISD::ZERO_EXTEND && 7654 N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && 7655 (!LegalOperations || 7656 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 7657 SDValue Ops[] = 7658 { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), 7659 DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), 7660 N0.getOperand(0).getOperand(2) }; 7661 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); 7662 } 7663 } 7664 7665 return SDValue(); 7666} 7667 7668SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { 7669 SDValue N0 = N->getOperand(0); 7670 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 7671 EVT VT = N->getValueType(0); 7672 EVT OpVT = N0.getValueType(); 7673 7674 // fold (uint_to_fp c1) -> c1fp 7675 if (N0C && 7676 // ...but only if the target supports immediate floating-point values 7677 (!LegalOperations || 7678 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 7679 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 7680 7681 // If the input is a legal type, and UINT_TO_FP is not legal on this target, 7682 // but SINT_TO_FP is legal on this target, try to convert. 7683 if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && 7684 TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { 7685 // If the sign bit is known to be zero, we can change this to SINT_TO_FP. 7686 if (DAG.SignBitIsZero(N0)) 7687 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 7688 } 7689 7690 // The next optimizations are desirable only if SELECT_CC can be lowered. 7691 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { 7692 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 7693 7694 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && 7695 (!LegalOperations || 7696 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 7697 SDValue Ops[] = 7698 { N0.getOperand(0), N0.getOperand(1), 7699 DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), 7700 N0.getOperand(2) }; 7701 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); 7702 } 7703 } 7704 7705 return SDValue(); 7706} 7707 7708SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { 7709 SDValue N0 = N->getOperand(0); 7710 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7711 EVT VT = N->getValueType(0); 7712 7713 // fold (fp_to_sint c1fp) -> c1 7714 if (N0CFP) 7715 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); 7716 7717 return SDValue(); 7718} 7719 7720SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { 7721 SDValue N0 = N->getOperand(0); 7722 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7723 EVT VT = N->getValueType(0); 7724 7725 // fold (fp_to_uint c1fp) -> c1 7726 if (N0CFP) 7727 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); 7728 7729 return SDValue(); 7730} 7731 7732SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { 7733 SDValue N0 = N->getOperand(0); 7734 SDValue N1 = N->getOperand(1); 7735 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7736 EVT VT = N->getValueType(0); 7737 7738 // fold (fp_round c1fp) -> c1fp 7739 if (N0CFP) 7740 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); 7741 7742 // fold (fp_round (fp_extend x)) -> x 7743 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) 7744 return N0.getOperand(0); 7745 7746 // fold (fp_round (fp_round x)) -> (fp_round x) 7747 if (N0.getOpcode() == ISD::FP_ROUND) { 7748 // This is a value preserving truncation if both round's are. 7749 bool IsTrunc = N->getConstantOperandVal(1) == 1 && 7750 N0.getNode()->getConstantOperandVal(1) == 1; 7751 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), 7752 DAG.getIntPtrConstant(IsTrunc)); 7753 } 7754 7755 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) 7756 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { 7757 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, 7758 N0.getOperand(0), N1); 7759 AddToWorklist(Tmp.getNode()); 7760 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7761 Tmp, N0.getOperand(1)); 7762 } 7763 7764 return SDValue(); 7765} 7766 7767SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { 7768 SDValue N0 = N->getOperand(0); 7769 EVT VT = N->getValueType(0); 7770 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 7771 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7772 7773 // fold (fp_round_inreg c1fp) -> c1fp 7774 if (N0CFP && isTypeLegal(EVT)) { 7775 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); 7776 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round); 7777 } 7778 7779 return SDValue(); 7780} 7781 7782SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { 7783 SDValue N0 = N->getOperand(0); 7784 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7785 EVT VT = N->getValueType(0); 7786 7787 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. 7788 if (N->hasOneUse() && 7789 N->use_begin()->getOpcode() == ISD::FP_ROUND) 7790 return SDValue(); 7791 7792 // fold (fp_extend c1fp) -> c1fp 7793 if (N0CFP) 7794 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); 7795 7796 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the 7797 // value of X. 7798 if (N0.getOpcode() == ISD::FP_ROUND 7799 && N0.getNode()->getConstantOperandVal(1) == 1) { 7800 SDValue In = N0.getOperand(0); 7801 if (In.getValueType() == VT) return In; 7802 if (VT.bitsLT(In.getValueType())) 7803 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, 7804 In, N0.getOperand(1)); 7805 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); 7806 } 7807 7808 // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) 7809 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 7810 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 7811 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 7812 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 7813 LN0->getChain(), 7814 LN0->getBasePtr(), N0.getValueType(), 7815 LN0->getMemOperand()); 7816 CombineTo(N, ExtLoad); 7817 CombineTo(N0.getNode(), 7818 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), 7819 N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), 7820 ExtLoad.getValue(1)); 7821 return SDValue(N, 0); // Return N so it doesn't get rechecked! 7822 } 7823 7824 return SDValue(); 7825} 7826 7827SDValue DAGCombiner::visitFCEIL(SDNode *N) { 7828 SDValue N0 = N->getOperand(0); 7829 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7830 EVT VT = N->getValueType(0); 7831 7832 // fold (fceil c1) -> fceil(c1) 7833 if (N0CFP) 7834 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); 7835 7836 return SDValue(); 7837} 7838 7839SDValue DAGCombiner::visitFTRUNC(SDNode *N) { 7840 SDValue N0 = N->getOperand(0); 7841 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7842 EVT VT = N->getValueType(0); 7843 7844 // fold (ftrunc c1) -> ftrunc(c1) 7845 if (N0CFP) 7846 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); 7847 7848 return SDValue(); 7849} 7850 7851SDValue DAGCombiner::visitFFLOOR(SDNode *N) { 7852 SDValue N0 = N->getOperand(0); 7853 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7854 EVT VT = N->getValueType(0); 7855 7856 // fold (ffloor c1) -> ffloor(c1) 7857 if (N0CFP) 7858 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); 7859 7860 return SDValue(); 7861} 7862 7863// FIXME: FNEG and FABS have a lot in common; refactor. 7864SDValue DAGCombiner::visitFNEG(SDNode *N) { 7865 SDValue N0 = N->getOperand(0); 7866 EVT VT = N->getValueType(0); 7867 7868 if (VT.isVector()) { 7869 SDValue FoldedVOp = SimplifyVUnaryOp(N); 7870 if (FoldedVOp.getNode()) return FoldedVOp; 7871 } 7872 7873 // Constant fold FNEG. 7874 if (isa<ConstantFPSDNode>(N0)) 7875 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0)); 7876 7877 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), 7878 &DAG.getTarget().Options)) 7879 return GetNegatedExpression(N0, DAG, LegalOperations); 7880 7881 // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading 7882 // constant pool values. 7883 if (!TLI.isFNegFree(VT) && 7884 N0.getOpcode() == ISD::BITCAST && 7885 N0.getNode()->hasOneUse()) { 7886 SDValue Int = N0.getOperand(0); 7887 EVT IntVT = Int.getValueType(); 7888 if (IntVT.isInteger() && !IntVT.isVector()) { 7889 APInt SignMask; 7890 if (N0.getValueType().isVector()) { 7891 // For a vector, get a mask such as 0x80... per scalar element 7892 // and splat it. 7893 SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); 7894 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 7895 } else { 7896 // For a scalar, just generate 0x80... 7897 SignMask = APInt::getSignBit(IntVT.getSizeInBits()); 7898 } 7899 Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, 7900 DAG.getConstant(SignMask, IntVT)); 7901 AddToWorklist(Int.getNode()); 7902 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); 7903 } 7904 } 7905 7906 // (fneg (fmul c, x)) -> (fmul -c, x) 7907 if (N0.getOpcode() == ISD::FMUL) { 7908 ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 7909 if (CFP1) { 7910 APFloat CVal = CFP1->getValueAPF(); 7911 CVal.changeSign(); 7912 if (Level >= AfterLegalizeDAG && 7913 (TLI.isFPImmLegal(CVal, N->getValueType(0)) || 7914 TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0)))) 7915 return DAG.getNode( 7916 ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), 7917 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); 7918 } 7919 } 7920 7921 return SDValue(); 7922} 7923 7924SDValue DAGCombiner::visitFMINNUM(SDNode *N) { 7925 SDValue N0 = N->getOperand(0); 7926 SDValue N1 = N->getOperand(1); 7927 const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7928 const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7929 7930 if (N0CFP && N1CFP) { 7931 const APFloat &C0 = N0CFP->getValueAPF(); 7932 const APFloat &C1 = N1CFP->getValueAPF(); 7933 return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0)); 7934 } 7935 7936 if (N0CFP) { 7937 EVT VT = N->getValueType(0); 7938 // Canonicalize to constant on RHS. 7939 return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); 7940 } 7941 7942 return SDValue(); 7943} 7944 7945SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { 7946 SDValue N0 = N->getOperand(0); 7947 SDValue N1 = N->getOperand(1); 7948 const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7949 const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7950 7951 if (N0CFP && N1CFP) { 7952 const APFloat &C0 = N0CFP->getValueAPF(); 7953 const APFloat &C1 = N1CFP->getValueAPF(); 7954 return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0)); 7955 } 7956 7957 if (N0CFP) { 7958 EVT VT = N->getValueType(0); 7959 // Canonicalize to constant on RHS. 7960 return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); 7961 } 7962 7963 return SDValue(); 7964} 7965 7966SDValue DAGCombiner::visitFABS(SDNode *N) { 7967 SDValue N0 = N->getOperand(0); 7968 EVT VT = N->getValueType(0); 7969 7970 if (VT.isVector()) { 7971 SDValue FoldedVOp = SimplifyVUnaryOp(N); 7972 if (FoldedVOp.getNode()) return FoldedVOp; 7973 } 7974 7975 // fold (fabs c1) -> fabs(c1) 7976 if (isa<ConstantFPSDNode>(N0)) 7977 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 7978 7979 // fold (fabs (fabs x)) -> (fabs x) 7980 if (N0.getOpcode() == ISD::FABS) 7981 return N->getOperand(0); 7982 7983 // fold (fabs (fneg x)) -> (fabs x) 7984 // fold (fabs (fcopysign x, y)) -> (fabs x) 7985 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) 7986 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); 7987 7988 // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading 7989 // constant pool values. 7990 if (!TLI.isFAbsFree(VT) && 7991 N0.getOpcode() == ISD::BITCAST && 7992 N0.getNode()->hasOneUse()) { 7993 SDValue Int = N0.getOperand(0); 7994 EVT IntVT = Int.getValueType(); 7995 if (IntVT.isInteger() && !IntVT.isVector()) { 7996 APInt SignMask; 7997 if (N0.getValueType().isVector()) { 7998 // For a vector, get a mask such as 0x7f... per scalar element 7999 // and splat it. 8000 SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); 8001 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 8002 } else { 8003 // For a scalar, just generate 0x7f... 8004 SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); 8005 } 8006 Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, 8007 DAG.getConstant(SignMask, IntVT)); 8008 AddToWorklist(Int.getNode()); 8009 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); 8010 } 8011 } 8012 8013 return SDValue(); 8014} 8015 8016SDValue DAGCombiner::visitBRCOND(SDNode *N) { 8017 SDValue Chain = N->getOperand(0); 8018 SDValue N1 = N->getOperand(1); 8019 SDValue N2 = N->getOperand(2); 8020 8021 // If N is a constant we could fold this into a fallthrough or unconditional 8022 // branch. However that doesn't happen very often in normal code, because 8023 // Instcombine/SimplifyCFG should have handled the available opportunities. 8024 // If we did this folding here, it would be necessary to update the 8025 // MachineBasicBlock CFG, which is awkward. 8026 8027 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal 8028 // on the target. 8029 if (N1.getOpcode() == ISD::SETCC && 8030 TLI.isOperationLegalOrCustom(ISD::BR_CC, 8031 N1.getOperand(0).getValueType())) { 8032 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 8033 Chain, N1.getOperand(2), 8034 N1.getOperand(0), N1.getOperand(1), N2); 8035 } 8036 8037 if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || 8038 ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && 8039 (N1.getOperand(0).hasOneUse() && 8040 N1.getOperand(0).getOpcode() == ISD::SRL))) { 8041 SDNode *Trunc = nullptr; 8042 if (N1.getOpcode() == ISD::TRUNCATE) { 8043 // Look pass the truncate. 8044 Trunc = N1.getNode(); 8045 N1 = N1.getOperand(0); 8046 } 8047 8048 // Match this pattern so that we can generate simpler code: 8049 // 8050 // %a = ... 8051 // %b = and i32 %a, 2 8052 // %c = srl i32 %b, 1 8053 // brcond i32 %c ... 8054 // 8055 // into 8056 // 8057 // %a = ... 8058 // %b = and i32 %a, 2 8059 // %c = setcc eq %b, 0 8060 // brcond %c ... 8061 // 8062 // This applies only when the AND constant value has one bit set and the 8063 // SRL constant is equal to the log2 of the AND constant. The back-end is 8064 // smart enough to convert the result into a TEST/JMP sequence. 8065 SDValue Op0 = N1.getOperand(0); 8066 SDValue Op1 = N1.getOperand(1); 8067 8068 if (Op0.getOpcode() == ISD::AND && 8069 Op1.getOpcode() == ISD::Constant) { 8070 SDValue AndOp1 = Op0.getOperand(1); 8071 8072 if (AndOp1.getOpcode() == ISD::Constant) { 8073 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); 8074 8075 if (AndConst.isPowerOf2() && 8076 cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { 8077 SDValue SetCC = 8078 DAG.getSetCC(SDLoc(N), 8079 getSetCCResultType(Op0.getValueType()), 8080 Op0, DAG.getConstant(0, Op0.getValueType()), 8081 ISD::SETNE); 8082 8083 SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N), 8084 MVT::Other, Chain, SetCC, N2); 8085 // Don't add the new BRCond into the worklist or else SimplifySelectCC 8086 // will convert it back to (X & C1) >> C2. 8087 CombineTo(N, NewBRCond, false); 8088 // Truncate is dead. 8089 if (Trunc) 8090 deleteAndRecombine(Trunc); 8091 // Replace the uses of SRL with SETCC 8092 WorklistRemover DeadNodes(*this); 8093 DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 8094 deleteAndRecombine(N1.getNode()); 8095 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8096 } 8097 } 8098 } 8099 8100 if (Trunc) 8101 // Restore N1 if the above transformation doesn't match. 8102 N1 = N->getOperand(1); 8103 } 8104 8105 // Transform br(xor(x, y)) -> br(x != y) 8106 // Transform br(xor(xor(x,y), 1)) -> br (x == y) 8107 if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { 8108 SDNode *TheXor = N1.getNode(); 8109 SDValue Op0 = TheXor->getOperand(0); 8110 SDValue Op1 = TheXor->getOperand(1); 8111 if (Op0.getOpcode() == Op1.getOpcode()) { 8112 // Avoid missing important xor optimizations. 8113 SDValue Tmp = visitXOR(TheXor); 8114 if (Tmp.getNode()) { 8115 if (Tmp.getNode() != TheXor) { 8116 DEBUG(dbgs() << "\nReplacing.8 "; 8117 TheXor->dump(&DAG); 8118 dbgs() << "\nWith: "; 8119 Tmp.getNode()->dump(&DAG); 8120 dbgs() << '\n'); 8121 WorklistRemover DeadNodes(*this); 8122 DAG.ReplaceAllUsesOfValueWith(N1, Tmp); 8123 deleteAndRecombine(TheXor); 8124 return DAG.getNode(ISD::BRCOND, SDLoc(N), 8125 MVT::Other, Chain, Tmp, N2); 8126 } 8127 8128 // visitXOR has changed XOR's operands or replaced the XOR completely, 8129 // bail out. 8130 return SDValue(N, 0); 8131 } 8132 } 8133 8134 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { 8135 bool Equal = false; 8136 if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0)) 8137 if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && 8138 Op0.getOpcode() == ISD::XOR) { 8139 TheXor = Op0.getNode(); 8140 Equal = true; 8141 } 8142 8143 EVT SetCCVT = N1.getValueType(); 8144 if (LegalTypes) 8145 SetCCVT = getSetCCResultType(SetCCVT); 8146 SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), 8147 SetCCVT, 8148 Op0, Op1, 8149 Equal ? ISD::SETEQ : ISD::SETNE); 8150 // Replace the uses of XOR with SETCC 8151 WorklistRemover DeadNodes(*this); 8152 DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 8153 deleteAndRecombine(N1.getNode()); 8154 return DAG.getNode(ISD::BRCOND, SDLoc(N), 8155 MVT::Other, Chain, SetCC, N2); 8156 } 8157 } 8158 8159 return SDValue(); 8160} 8161 8162// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. 8163// 8164SDValue DAGCombiner::visitBR_CC(SDNode *N) { 8165 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); 8166 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); 8167 8168 // If N is a constant we could fold this into a fallthrough or unconditional 8169 // branch. However that doesn't happen very often in normal code, because 8170 // Instcombine/SimplifyCFG should have handled the available opportunities. 8171 // If we did this folding here, it would be necessary to update the 8172 // MachineBasicBlock CFG, which is awkward. 8173 8174 // Use SimplifySetCC to simplify SETCC's. 8175 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), 8176 CondLHS, CondRHS, CC->get(), SDLoc(N), 8177 false); 8178 if (Simp.getNode()) AddToWorklist(Simp.getNode()); 8179 8180 // fold to a simpler setcc 8181 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) 8182 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 8183 N->getOperand(0), Simp.getOperand(2), 8184 Simp.getOperand(0), Simp.getOperand(1), 8185 N->getOperand(4)); 8186 8187 return SDValue(); 8188} 8189 8190/// Return true if 'Use' is a load or a store that uses N as its base pointer 8191/// and that N may be folded in the load / store addressing mode. 8192static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, 8193 SelectionDAG &DAG, 8194 const TargetLowering &TLI) { 8195 EVT VT; 8196 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { 8197 if (LD->isIndexed() || LD->getBasePtr().getNode() != N) 8198 return false; 8199 VT = Use->getValueType(0); 8200 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { 8201 if (ST->isIndexed() || ST->getBasePtr().getNode() != N) 8202 return false; 8203 VT = ST->getValue().getValueType(); 8204 } else 8205 return false; 8206 8207 TargetLowering::AddrMode AM; 8208 if (N->getOpcode() == ISD::ADD) { 8209 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8210 if (Offset) 8211 // [reg +/- imm] 8212 AM.BaseOffs = Offset->getSExtValue(); 8213 else 8214 // [reg +/- reg] 8215 AM.Scale = 1; 8216 } else if (N->getOpcode() == ISD::SUB) { 8217 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8218 if (Offset) 8219 // [reg +/- imm] 8220 AM.BaseOffs = -Offset->getSExtValue(); 8221 else 8222 // [reg +/- reg] 8223 AM.Scale = 1; 8224 } else 8225 return false; 8226 8227 return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); 8228} 8229 8230/// Try turning a load/store into a pre-indexed load/store when the base 8231/// pointer is an add or subtract and it has other uses besides the load/store. 8232/// After the transformation, the new indexed load/store has effectively folded 8233/// the add/subtract in and all of its other uses are redirected to the 8234/// new load/store. 8235bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { 8236 if (Level < AfterLegalizeDAG) 8237 return false; 8238 8239 bool isLoad = true; 8240 SDValue Ptr; 8241 EVT VT; 8242 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 8243 if (LD->isIndexed()) 8244 return false; 8245 VT = LD->getMemoryVT(); 8246 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && 8247 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) 8248 return false; 8249 Ptr = LD->getBasePtr(); 8250 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 8251 if (ST->isIndexed()) 8252 return false; 8253 VT = ST->getMemoryVT(); 8254 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && 8255 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) 8256 return false; 8257 Ptr = ST->getBasePtr(); 8258 isLoad = false; 8259 } else { 8260 return false; 8261 } 8262 8263 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail 8264 // out. There is no reason to make this a preinc/predec. 8265 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || 8266 Ptr.getNode()->hasOneUse()) 8267 return false; 8268 8269 // Ask the target to do addressing mode selection. 8270 SDValue BasePtr; 8271 SDValue Offset; 8272 ISD::MemIndexedMode AM = ISD::UNINDEXED; 8273 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) 8274 return false; 8275 8276 // Backends without true r+i pre-indexed forms may need to pass a 8277 // constant base with a variable offset so that constant coercion 8278 // will work with the patterns in canonical form. 8279 bool Swapped = false; 8280 if (isa<ConstantSDNode>(BasePtr)) { 8281 std::swap(BasePtr, Offset); 8282 Swapped = true; 8283 } 8284 8285 // Don't create a indexed load / store with zero offset. 8286 if (isa<ConstantSDNode>(Offset) && 8287 cast<ConstantSDNode>(Offset)->isNullValue()) 8288 return false; 8289 8290 // Try turning it into a pre-indexed load / store except when: 8291 // 1) The new base ptr is a frame index. 8292 // 2) If N is a store and the new base ptr is either the same as or is a 8293 // predecessor of the value being stored. 8294 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded 8295 // that would create a cycle. 8296 // 4) All uses are load / store ops that use it as old base ptr. 8297 8298 // Check #1. Preinc'ing a frame index would require copying the stack pointer 8299 // (plus the implicit offset) to a register to preinc anyway. 8300 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 8301 return false; 8302 8303 // Check #2. 8304 if (!isLoad) { 8305 SDValue Val = cast<StoreSDNode>(N)->getValue(); 8306 if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) 8307 return false; 8308 } 8309 8310 // If the offset is a constant, there may be other adds of constants that 8311 // can be folded with this one. We should do this to avoid having to keep 8312 // a copy of the original base pointer. 8313 SmallVector<SDNode *, 16> OtherUses; 8314 if (isa<ConstantSDNode>(Offset)) 8315 for (SDNode *Use : BasePtr.getNode()->uses()) { 8316 if (Use == Ptr.getNode()) 8317 continue; 8318 8319 if (Use->isPredecessorOf(N)) 8320 continue; 8321 8322 if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) { 8323 OtherUses.clear(); 8324 break; 8325 } 8326 8327 SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1); 8328 if (Op1.getNode() == BasePtr.getNode()) 8329 std::swap(Op0, Op1); 8330 assert(Op0.getNode() == BasePtr.getNode() && 8331 "Use of ADD/SUB but not an operand"); 8332 8333 if (!isa<ConstantSDNode>(Op1)) { 8334 OtherUses.clear(); 8335 break; 8336 } 8337 8338 // FIXME: In some cases, we can be smarter about this. 8339 if (Op1.getValueType() != Offset.getValueType()) { 8340 OtherUses.clear(); 8341 break; 8342 } 8343 8344 OtherUses.push_back(Use); 8345 } 8346 8347 if (Swapped) 8348 std::swap(BasePtr, Offset); 8349 8350 // Now check for #3 and #4. 8351 bool RealUse = false; 8352 8353 // Caches for hasPredecessorHelper 8354 SmallPtrSet<const SDNode *, 32> Visited; 8355 SmallVector<const SDNode *, 16> Worklist; 8356 8357 for (SDNode *Use : Ptr.getNode()->uses()) { 8358 if (Use == N) 8359 continue; 8360 if (N->hasPredecessorHelper(Use, Visited, Worklist)) 8361 return false; 8362 8363 // If Ptr may be folded in addressing mode of other use, then it's 8364 // not profitable to do this transformation. 8365 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) 8366 RealUse = true; 8367 } 8368 8369 if (!RealUse) 8370 return false; 8371 8372 SDValue Result; 8373 if (isLoad) 8374 Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 8375 BasePtr, Offset, AM); 8376 else 8377 Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 8378 BasePtr, Offset, AM); 8379 ++PreIndexedNodes; 8380 ++NodesCombined; 8381 DEBUG(dbgs() << "\nReplacing.4 "; 8382 N->dump(&DAG); 8383 dbgs() << "\nWith: "; 8384 Result.getNode()->dump(&DAG); 8385 dbgs() << '\n'); 8386 WorklistRemover DeadNodes(*this); 8387 if (isLoad) { 8388 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 8389 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 8390 } else { 8391 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 8392 } 8393 8394 // Finally, since the node is now dead, remove it from the graph. 8395 deleteAndRecombine(N); 8396 8397 if (Swapped) 8398 std::swap(BasePtr, Offset); 8399 8400 // Replace other uses of BasePtr that can be updated to use Ptr 8401 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { 8402 unsigned OffsetIdx = 1; 8403 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) 8404 OffsetIdx = 0; 8405 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == 8406 BasePtr.getNode() && "Expected BasePtr operand"); 8407 8408 // We need to replace ptr0 in the following expression: 8409 // x0 * offset0 + y0 * ptr0 = t0 8410 // knowing that 8411 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) 8412 // 8413 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the 8414 // indexed load/store and the expresion that needs to be re-written. 8415 // 8416 // Therefore, we have: 8417 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 8418 8419 ConstantSDNode *CN = 8420 cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); 8421 int X0, X1, Y0, Y1; 8422 APInt Offset0 = CN->getAPIntValue(); 8423 APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); 8424 8425 X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; 8426 Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; 8427 X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; 8428 Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; 8429 8430 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; 8431 8432 APInt CNV = Offset0; 8433 if (X0 < 0) CNV = -CNV; 8434 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; 8435 else CNV = CNV - Offset1; 8436 8437 // We can now generate the new expression. 8438 SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0)); 8439 SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); 8440 8441 SDValue NewUse = DAG.getNode(Opcode, 8442 SDLoc(OtherUses[i]), 8443 OtherUses[i]->getValueType(0), NewOp1, NewOp2); 8444 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); 8445 deleteAndRecombine(OtherUses[i]); 8446 } 8447 8448 // Replace the uses of Ptr with uses of the updated base value. 8449 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); 8450 deleteAndRecombine(Ptr.getNode()); 8451 8452 return true; 8453} 8454 8455/// Try to combine a load/store with a add/sub of the base pointer node into a 8456/// post-indexed load/store. The transformation folded the add/subtract into the 8457/// new indexed load/store effectively and all of its uses are redirected to the 8458/// new load/store. 8459bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { 8460 if (Level < AfterLegalizeDAG) 8461 return false; 8462 8463 bool isLoad = true; 8464 SDValue Ptr; 8465 EVT VT; 8466 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 8467 if (LD->isIndexed()) 8468 return false; 8469 VT = LD->getMemoryVT(); 8470 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && 8471 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) 8472 return false; 8473 Ptr = LD->getBasePtr(); 8474 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 8475 if (ST->isIndexed()) 8476 return false; 8477 VT = ST->getMemoryVT(); 8478 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && 8479 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) 8480 return false; 8481 Ptr = ST->getBasePtr(); 8482 isLoad = false; 8483 } else { 8484 return false; 8485 } 8486 8487 if (Ptr.getNode()->hasOneUse()) 8488 return false; 8489 8490 for (SDNode *Op : Ptr.getNode()->uses()) { 8491 if (Op == N || 8492 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) 8493 continue; 8494 8495 SDValue BasePtr; 8496 SDValue Offset; 8497 ISD::MemIndexedMode AM = ISD::UNINDEXED; 8498 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { 8499 // Don't create a indexed load / store with zero offset. 8500 if (isa<ConstantSDNode>(Offset) && 8501 cast<ConstantSDNode>(Offset)->isNullValue()) 8502 continue; 8503 8504 // Try turning it into a post-indexed load / store except when 8505 // 1) All uses are load / store ops that use it as base ptr (and 8506 // it may be folded as addressing mmode). 8507 // 2) Op must be independent of N, i.e. Op is neither a predecessor 8508 // nor a successor of N. Otherwise, if Op is folded that would 8509 // create a cycle. 8510 8511 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 8512 continue; 8513 8514 // Check for #1. 8515 bool TryNext = false; 8516 for (SDNode *Use : BasePtr.getNode()->uses()) { 8517 if (Use == Ptr.getNode()) 8518 continue; 8519 8520 // If all the uses are load / store addresses, then don't do the 8521 // transformation. 8522 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ 8523 bool RealUse = false; 8524 for (SDNode *UseUse : Use->uses()) { 8525 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 8526 RealUse = true; 8527 } 8528 8529 if (!RealUse) { 8530 TryNext = true; 8531 break; 8532 } 8533 } 8534 } 8535 8536 if (TryNext) 8537 continue; 8538 8539 // Check for #2 8540 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { 8541 SDValue Result = isLoad 8542 ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 8543 BasePtr, Offset, AM) 8544 : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 8545 BasePtr, Offset, AM); 8546 ++PostIndexedNodes; 8547 ++NodesCombined; 8548 DEBUG(dbgs() << "\nReplacing.5 "; 8549 N->dump(&DAG); 8550 dbgs() << "\nWith: "; 8551 Result.getNode()->dump(&DAG); 8552 dbgs() << '\n'); 8553 WorklistRemover DeadNodes(*this); 8554 if (isLoad) { 8555 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 8556 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 8557 } else { 8558 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 8559 } 8560 8561 // Finally, since the node is now dead, remove it from the graph. 8562 deleteAndRecombine(N); 8563 8564 // Replace the uses of Use with uses of the updated base value. 8565 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), 8566 Result.getValue(isLoad ? 1 : 0)); 8567 deleteAndRecombine(Op); 8568 return true; 8569 } 8570 } 8571 } 8572 8573 return false; 8574} 8575 8576/// \brief Return the base-pointer arithmetic from an indexed \p LD. 8577SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { 8578 ISD::MemIndexedMode AM = LD->getAddressingMode(); 8579 assert(AM != ISD::UNINDEXED); 8580 SDValue BP = LD->getOperand(1); 8581 SDValue Inc = LD->getOperand(2); 8582 8583 // Some backends use TargetConstants for load offsets, but don't expect 8584 // TargetConstants in general ADD nodes. We can convert these constants into 8585 // regular Constants (if the constant is not opaque). 8586 assert((Inc.getOpcode() != ISD::TargetConstant || 8587 !cast<ConstantSDNode>(Inc)->isOpaque()) && 8588 "Cannot split out indexing using opaque target constants"); 8589 if (Inc.getOpcode() == ISD::TargetConstant) { 8590 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc); 8591 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), 8592 ConstInc->getValueType(0)); 8593 } 8594 8595 unsigned Opc = 8596 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); 8597 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); 8598} 8599 8600SDValue DAGCombiner::visitLOAD(SDNode *N) { 8601 LoadSDNode *LD = cast<LoadSDNode>(N); 8602 SDValue Chain = LD->getChain(); 8603 SDValue Ptr = LD->getBasePtr(); 8604 8605 // If load is not volatile and there are no uses of the loaded value (and 8606 // the updated indexed value in case of indexed loads), change uses of the 8607 // chain value into uses of the chain input (i.e. delete the dead load). 8608 if (!LD->isVolatile()) { 8609 if (N->getValueType(1) == MVT::Other) { 8610 // Unindexed loads. 8611 if (!N->hasAnyUseOfValue(0)) { 8612 // It's not safe to use the two value CombineTo variant here. e.g. 8613 // v1, chain2 = load chain1, loc 8614 // v2, chain3 = load chain2, loc 8615 // v3 = add v2, c 8616 // Now we replace use of chain2 with chain1. This makes the second load 8617 // isomorphic to the one we are deleting, and thus makes this load live. 8618 DEBUG(dbgs() << "\nReplacing.6 "; 8619 N->dump(&DAG); 8620 dbgs() << "\nWith chain: "; 8621 Chain.getNode()->dump(&DAG); 8622 dbgs() << "\n"); 8623 WorklistRemover DeadNodes(*this); 8624 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 8625 8626 if (N->use_empty()) 8627 deleteAndRecombine(N); 8628 8629 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8630 } 8631 } else { 8632 // Indexed loads. 8633 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); 8634 8635 // If this load has an opaque TargetConstant offset, then we cannot split 8636 // the indexing into an add/sub directly (that TargetConstant may not be 8637 // valid for a different type of node, and we cannot convert an opaque 8638 // target constant into a regular constant). 8639 bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && 8640 cast<ConstantSDNode>(LD->getOperand(2))->isOpaque(); 8641 8642 if (!N->hasAnyUseOfValue(0) && 8643 ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) { 8644 SDValue Undef = DAG.getUNDEF(N->getValueType(0)); 8645 SDValue Index; 8646 if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { 8647 Index = SplitIndexingFromLoad(LD); 8648 // Try to fold the base pointer arithmetic into subsequent loads and 8649 // stores. 8650 AddUsersToWorklist(N); 8651 } else 8652 Index = DAG.getUNDEF(N->getValueType(1)); 8653 DEBUG(dbgs() << "\nReplacing.7 "; 8654 N->dump(&DAG); 8655 dbgs() << "\nWith: "; 8656 Undef.getNode()->dump(&DAG); 8657 dbgs() << " and 2 other values\n"); 8658 WorklistRemover DeadNodes(*this); 8659 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); 8660 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); 8661 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); 8662 deleteAndRecombine(N); 8663 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8664 } 8665 } 8666 } 8667 8668 // If this load is directly stored, replace the load value with the stored 8669 // value. 8670 // TODO: Handle store large -> read small portion. 8671 // TODO: Handle TRUNCSTORE/LOADEXT 8672 if (ISD::isNormalLoad(N) && !LD->isVolatile()) { 8673 if (ISD::isNON_TRUNCStore(Chain.getNode())) { 8674 StoreSDNode *PrevST = cast<StoreSDNode>(Chain); 8675 if (PrevST->getBasePtr() == Ptr && 8676 PrevST->getValue().getValueType() == N->getValueType(0)) 8677 return CombineTo(N, Chain.getOperand(1), Chain); 8678 } 8679 } 8680 8681 // Try to infer better alignment information than the load already has. 8682 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { 8683 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 8684 if (Align > LD->getMemOperand()->getBaseAlignment()) { 8685 SDValue NewLoad = 8686 DAG.getExtLoad(LD->getExtensionType(), SDLoc(N), 8687 LD->getValueType(0), 8688 Chain, Ptr, LD->getPointerInfo(), 8689 LD->getMemoryVT(), 8690 LD->isVolatile(), LD->isNonTemporal(), 8691 LD->isInvariant(), Align, LD->getAAInfo()); 8692 return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); 8693 } 8694 } 8695 } 8696 8697 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 8698 : DAG.getSubtarget().useAA(); 8699#ifndef NDEBUG 8700 if (CombinerAAOnlyFunc.getNumOccurrences() && 8701 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 8702 UseAA = false; 8703#endif 8704 if (UseAA && LD->isUnindexed()) { 8705 // Walk up chain skipping non-aliasing memory nodes. 8706 SDValue BetterChain = FindBetterChain(N, Chain); 8707 8708 // If there is a better chain. 8709 if (Chain != BetterChain) { 8710 SDValue ReplLoad; 8711 8712 // Replace the chain to void dependency. 8713 if (LD->getExtensionType() == ISD::NON_EXTLOAD) { 8714 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), 8715 BetterChain, Ptr, LD->getMemOperand()); 8716 } else { 8717 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), 8718 LD->getValueType(0), 8719 BetterChain, Ptr, LD->getMemoryVT(), 8720 LD->getMemOperand()); 8721 } 8722 8723 // Create token factor to keep old chain connected. 8724 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), 8725 MVT::Other, Chain, ReplLoad.getValue(1)); 8726 8727 // Make sure the new and old chains are cleaned up. 8728 AddToWorklist(Token.getNode()); 8729 8730 // Replace uses with load result and token factor. Don't add users 8731 // to work list. 8732 return CombineTo(N, ReplLoad.getValue(0), Token, false); 8733 } 8734 } 8735 8736 // Try transforming N to an indexed load. 8737 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 8738 return SDValue(N, 0); 8739 8740 // Try to slice up N to more direct loads if the slices are mapped to 8741 // different register banks or pairing can take place. 8742 if (SliceUpLoad(N)) 8743 return SDValue(N, 0); 8744 8745 return SDValue(); 8746} 8747 8748namespace { 8749/// \brief Helper structure used to slice a load in smaller loads. 8750/// Basically a slice is obtained from the following sequence: 8751/// Origin = load Ty1, Base 8752/// Shift = srl Ty1 Origin, CstTy Amount 8753/// Inst = trunc Shift to Ty2 8754/// 8755/// Then, it will be rewriten into: 8756/// Slice = load SliceTy, Base + SliceOffset 8757/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 8758/// 8759/// SliceTy is deduced from the number of bits that are actually used to 8760/// build Inst. 8761struct LoadedSlice { 8762 /// \brief Helper structure used to compute the cost of a slice. 8763 struct Cost { 8764 /// Are we optimizing for code size. 8765 bool ForCodeSize; 8766 /// Various cost. 8767 unsigned Loads; 8768 unsigned Truncates; 8769 unsigned CrossRegisterBanksCopies; 8770 unsigned ZExts; 8771 unsigned Shift; 8772 8773 Cost(bool ForCodeSize = false) 8774 : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), 8775 CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} 8776 8777 /// \brief Get the cost of one isolated slice. 8778 Cost(const LoadedSlice &LS, bool ForCodeSize = false) 8779 : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), 8780 CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { 8781 EVT TruncType = LS.Inst->getValueType(0); 8782 EVT LoadedType = LS.getLoadedType(); 8783 if (TruncType != LoadedType && 8784 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) 8785 ZExts = 1; 8786 } 8787 8788 /// \brief Account for slicing gain in the current cost. 8789 /// Slicing provide a few gains like removing a shift or a 8790 /// truncate. This method allows to grow the cost of the original 8791 /// load with the gain from this slice. 8792 void addSliceGain(const LoadedSlice &LS) { 8793 // Each slice saves a truncate. 8794 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); 8795 if (!TLI.isTruncateFree(LS.Inst->getValueType(0), 8796 LS.Inst->getOperand(0).getValueType())) 8797 ++Truncates; 8798 // If there is a shift amount, this slice gets rid of it. 8799 if (LS.Shift) 8800 ++Shift; 8801 // If this slice can merge a cross register bank copy, account for it. 8802 if (LS.canMergeExpensiveCrossRegisterBankCopy()) 8803 ++CrossRegisterBanksCopies; 8804 } 8805 8806 Cost &operator+=(const Cost &RHS) { 8807 Loads += RHS.Loads; 8808 Truncates += RHS.Truncates; 8809 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; 8810 ZExts += RHS.ZExts; 8811 Shift += RHS.Shift; 8812 return *this; 8813 } 8814 8815 bool operator==(const Cost &RHS) const { 8816 return Loads == RHS.Loads && Truncates == RHS.Truncates && 8817 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && 8818 ZExts == RHS.ZExts && Shift == RHS.Shift; 8819 } 8820 8821 bool operator!=(const Cost &RHS) const { return !(*this == RHS); } 8822 8823 bool operator<(const Cost &RHS) const { 8824 // Assume cross register banks copies are as expensive as loads. 8825 // FIXME: Do we want some more target hooks? 8826 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; 8827 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; 8828 // Unless we are optimizing for code size, consider the 8829 // expensive operation first. 8830 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) 8831 return ExpensiveOpsLHS < ExpensiveOpsRHS; 8832 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < 8833 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); 8834 } 8835 8836 bool operator>(const Cost &RHS) const { return RHS < *this; } 8837 8838 bool operator<=(const Cost &RHS) const { return !(RHS < *this); } 8839 8840 bool operator>=(const Cost &RHS) const { return !(*this < RHS); } 8841 }; 8842 // The last instruction that represent the slice. This should be a 8843 // truncate instruction. 8844 SDNode *Inst; 8845 // The original load instruction. 8846 LoadSDNode *Origin; 8847 // The right shift amount in bits from the original load. 8848 unsigned Shift; 8849 // The DAG from which Origin came from. 8850 // This is used to get some contextual information about legal types, etc. 8851 SelectionDAG *DAG; 8852 8853 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr, 8854 unsigned Shift = 0, SelectionDAG *DAG = nullptr) 8855 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} 8856 8857 LoadedSlice(const LoadedSlice &LS) 8858 : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {} 8859 8860 /// \brief Get the bits used in a chunk of bits \p BitWidth large. 8861 /// \return Result is \p BitWidth and has used bits set to 1 and 8862 /// not used bits set to 0. 8863 APInt getUsedBits() const { 8864 // Reproduce the trunc(lshr) sequence: 8865 // - Start from the truncated value. 8866 // - Zero extend to the desired bit width. 8867 // - Shift left. 8868 assert(Origin && "No original load to compare against."); 8869 unsigned BitWidth = Origin->getValueSizeInBits(0); 8870 assert(Inst && "This slice is not bound to an instruction"); 8871 assert(Inst->getValueSizeInBits(0) <= BitWidth && 8872 "Extracted slice is bigger than the whole type!"); 8873 APInt UsedBits(Inst->getValueSizeInBits(0), 0); 8874 UsedBits.setAllBits(); 8875 UsedBits = UsedBits.zext(BitWidth); 8876 UsedBits <<= Shift; 8877 return UsedBits; 8878 } 8879 8880 /// \brief Get the size of the slice to be loaded in bytes. 8881 unsigned getLoadedSize() const { 8882 unsigned SliceSize = getUsedBits().countPopulation(); 8883 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); 8884 return SliceSize / 8; 8885 } 8886 8887 /// \brief Get the type that will be loaded for this slice. 8888 /// Note: This may not be the final type for the slice. 8889 EVT getLoadedType() const { 8890 assert(DAG && "Missing context"); 8891 LLVMContext &Ctxt = *DAG->getContext(); 8892 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); 8893 } 8894 8895 /// \brief Get the alignment of the load used for this slice. 8896 unsigned getAlignment() const { 8897 unsigned Alignment = Origin->getAlignment(); 8898 unsigned Offset = getOffsetFromBase(); 8899 if (Offset != 0) 8900 Alignment = MinAlign(Alignment, Alignment + Offset); 8901 return Alignment; 8902 } 8903 8904 /// \brief Check if this slice can be rewritten with legal operations. 8905 bool isLegal() const { 8906 // An invalid slice is not legal. 8907 if (!Origin || !Inst || !DAG) 8908 return false; 8909 8910 // Offsets are for indexed load only, we do not handle that. 8911 if (Origin->getOffset().getOpcode() != ISD::UNDEF) 8912 return false; 8913 8914 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 8915 8916 // Check that the type is legal. 8917 EVT SliceType = getLoadedType(); 8918 if (!TLI.isTypeLegal(SliceType)) 8919 return false; 8920 8921 // Check that the load is legal for this type. 8922 if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) 8923 return false; 8924 8925 // Check that the offset can be computed. 8926 // 1. Check its type. 8927 EVT PtrType = Origin->getBasePtr().getValueType(); 8928 if (PtrType == MVT::Untyped || PtrType.isExtended()) 8929 return false; 8930 8931 // 2. Check that it fits in the immediate. 8932 if (!TLI.isLegalAddImmediate(getOffsetFromBase())) 8933 return false; 8934 8935 // 3. Check that the computation is legal. 8936 if (!TLI.isOperationLegal(ISD::ADD, PtrType)) 8937 return false; 8938 8939 // Check that the zext is legal if it needs one. 8940 EVT TruncateType = Inst->getValueType(0); 8941 if (TruncateType != SliceType && 8942 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) 8943 return false; 8944 8945 return true; 8946 } 8947 8948 /// \brief Get the offset in bytes of this slice in the original chunk of 8949 /// bits. 8950 /// \pre DAG != nullptr. 8951 uint64_t getOffsetFromBase() const { 8952 assert(DAG && "Missing context."); 8953 bool IsBigEndian = 8954 DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian(); 8955 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); 8956 uint64_t Offset = Shift / 8; 8957 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; 8958 assert(!(Origin->getValueSizeInBits(0) & 0x7) && 8959 "The size of the original loaded type is not a multiple of a" 8960 " byte."); 8961 // If Offset is bigger than TySizeInBytes, it means we are loading all 8962 // zeros. This should have been optimized before in the process. 8963 assert(TySizeInBytes > Offset && 8964 "Invalid shift amount for given loaded size"); 8965 if (IsBigEndian) 8966 Offset = TySizeInBytes - Offset - getLoadedSize(); 8967 return Offset; 8968 } 8969 8970 /// \brief Generate the sequence of instructions to load the slice 8971 /// represented by this object and redirect the uses of this slice to 8972 /// this new sequence of instructions. 8973 /// \pre this->Inst && this->Origin are valid Instructions and this 8974 /// object passed the legal check: LoadedSlice::isLegal returned true. 8975 /// \return The last instruction of the sequence used to load the slice. 8976 SDValue loadSlice() const { 8977 assert(Inst && Origin && "Unable to replace a non-existing slice."); 8978 const SDValue &OldBaseAddr = Origin->getBasePtr(); 8979 SDValue BaseAddr = OldBaseAddr; 8980 // Get the offset in that chunk of bytes w.r.t. the endianess. 8981 int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); 8982 assert(Offset >= 0 && "Offset too big to fit in int64_t!"); 8983 if (Offset) { 8984 // BaseAddr = BaseAddr + Offset. 8985 EVT ArithType = BaseAddr.getValueType(); 8986 BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr, 8987 DAG->getConstant(Offset, ArithType)); 8988 } 8989 8990 // Create the type of the loaded slice according to its size. 8991 EVT SliceType = getLoadedType(); 8992 8993 // Create the load for the slice. 8994 SDValue LastInst = DAG->getLoad( 8995 SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, 8996 Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), 8997 Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); 8998 // If the final type is not the same as the loaded type, this means that 8999 // we have to pad with zero. Create a zero extend for that. 9000 EVT FinalType = Inst->getValueType(0); 9001 if (SliceType != FinalType) 9002 LastInst = 9003 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); 9004 return LastInst; 9005 } 9006 9007 /// \brief Check if this slice can be merged with an expensive cross register 9008 /// bank copy. E.g., 9009 /// i = load i32 9010 /// f = bitcast i32 i to float 9011 bool canMergeExpensiveCrossRegisterBankCopy() const { 9012 if (!Inst || !Inst->hasOneUse()) 9013 return false; 9014 SDNode *Use = *Inst->use_begin(); 9015 if (Use->getOpcode() != ISD::BITCAST) 9016 return false; 9017 assert(DAG && "Missing context"); 9018 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 9019 EVT ResVT = Use->getValueType(0); 9020 const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); 9021 const TargetRegisterClass *ArgRC = 9022 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); 9023 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) 9024 return false; 9025 9026 // At this point, we know that we perform a cross-register-bank copy. 9027 // Check if it is expensive. 9028 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo(); 9029 // Assume bitcasts are cheap, unless both register classes do not 9030 // explicitly share a common sub class. 9031 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) 9032 return false; 9033 9034 // Check if it will be merged with the load. 9035 // 1. Check the alignment constraint. 9036 unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment( 9037 ResVT.getTypeForEVT(*DAG->getContext())); 9038 9039 if (RequiredAlignment > getAlignment()) 9040 return false; 9041 9042 // 2. Check that the load is a legal operation for that type. 9043 if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) 9044 return false; 9045 9046 // 3. Check that we do not have a zext in the way. 9047 if (Inst->getValueType(0) != getLoadedType()) 9048 return false; 9049 9050 return true; 9051 } 9052}; 9053} 9054 9055/// \brief Check that all bits set in \p UsedBits form a dense region, i.e., 9056/// \p UsedBits looks like 0..0 1..1 0..0. 9057static bool areUsedBitsDense(const APInt &UsedBits) { 9058 // If all the bits are one, this is dense! 9059 if (UsedBits.isAllOnesValue()) 9060 return true; 9061 9062 // Get rid of the unused bits on the right. 9063 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); 9064 // Get rid of the unused bits on the left. 9065 if (NarrowedUsedBits.countLeadingZeros()) 9066 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); 9067 // Check that the chunk of bits is completely used. 9068 return NarrowedUsedBits.isAllOnesValue(); 9069} 9070 9071/// \brief Check whether or not \p First and \p Second are next to each other 9072/// in memory. This means that there is no hole between the bits loaded 9073/// by \p First and the bits loaded by \p Second. 9074static bool areSlicesNextToEachOther(const LoadedSlice &First, 9075 const LoadedSlice &Second) { 9076 assert(First.Origin == Second.Origin && First.Origin && 9077 "Unable to match different memory origins."); 9078 APInt UsedBits = First.getUsedBits(); 9079 assert((UsedBits & Second.getUsedBits()) == 0 && 9080 "Slices are not supposed to overlap."); 9081 UsedBits |= Second.getUsedBits(); 9082 return areUsedBitsDense(UsedBits); 9083} 9084 9085/// \brief Adjust the \p GlobalLSCost according to the target 9086/// paring capabilities and the layout of the slices. 9087/// \pre \p GlobalLSCost should account for at least as many loads as 9088/// there is in the slices in \p LoadedSlices. 9089static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, 9090 LoadedSlice::Cost &GlobalLSCost) { 9091 unsigned NumberOfSlices = LoadedSlices.size(); 9092 // If there is less than 2 elements, no pairing is possible. 9093 if (NumberOfSlices < 2) 9094 return; 9095 9096 // Sort the slices so that elements that are likely to be next to each 9097 // other in memory are next to each other in the list. 9098 std::sort(LoadedSlices.begin(), LoadedSlices.end(), 9099 [](const LoadedSlice &LHS, const LoadedSlice &RHS) { 9100 assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); 9101 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); 9102 }); 9103 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); 9104 // First (resp. Second) is the first (resp. Second) potentially candidate 9105 // to be placed in a paired load. 9106 const LoadedSlice *First = nullptr; 9107 const LoadedSlice *Second = nullptr; 9108 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, 9109 // Set the beginning of the pair. 9110 First = Second) { 9111 9112 Second = &LoadedSlices[CurrSlice]; 9113 9114 // If First is NULL, it means we start a new pair. 9115 // Get to the next slice. 9116 if (!First) 9117 continue; 9118 9119 EVT LoadedType = First->getLoadedType(); 9120 9121 // If the types of the slices are different, we cannot pair them. 9122 if (LoadedType != Second->getLoadedType()) 9123 continue; 9124 9125 // Check if the target supplies paired loads for this type. 9126 unsigned RequiredAlignment = 0; 9127 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { 9128 // move to the next pair, this type is hopeless. 9129 Second = nullptr; 9130 continue; 9131 } 9132 // Check if we meet the alignment requirement. 9133 if (RequiredAlignment > First->getAlignment()) 9134 continue; 9135 9136 // Check that both loads are next to each other in memory. 9137 if (!areSlicesNextToEachOther(*First, *Second)) 9138 continue; 9139 9140 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); 9141 --GlobalLSCost.Loads; 9142 // Move to the next pair. 9143 Second = nullptr; 9144 } 9145} 9146 9147/// \brief Check the profitability of all involved LoadedSlice. 9148/// Currently, it is considered profitable if there is exactly two 9149/// involved slices (1) which are (2) next to each other in memory, and 9150/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). 9151/// 9152/// Note: The order of the elements in \p LoadedSlices may be modified, but not 9153/// the elements themselves. 9154/// 9155/// FIXME: When the cost model will be mature enough, we can relax 9156/// constraints (1) and (2). 9157static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, 9158 const APInt &UsedBits, bool ForCodeSize) { 9159 unsigned NumberOfSlices = LoadedSlices.size(); 9160 if (StressLoadSlicing) 9161 return NumberOfSlices > 1; 9162 9163 // Check (1). 9164 if (NumberOfSlices != 2) 9165 return false; 9166 9167 // Check (2). 9168 if (!areUsedBitsDense(UsedBits)) 9169 return false; 9170 9171 // Check (3). 9172 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); 9173 // The original code has one big load. 9174 OrigCost.Loads = 1; 9175 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { 9176 const LoadedSlice &LS = LoadedSlices[CurrSlice]; 9177 // Accumulate the cost of all the slices. 9178 LoadedSlice::Cost SliceCost(LS, ForCodeSize); 9179 GlobalSlicingCost += SliceCost; 9180 9181 // Account as cost in the original configuration the gain obtained 9182 // with the current slices. 9183 OrigCost.addSliceGain(LS); 9184 } 9185 9186 // If the target supports paired load, adjust the cost accordingly. 9187 adjustCostForPairing(LoadedSlices, GlobalSlicingCost); 9188 return OrigCost > GlobalSlicingCost; 9189} 9190 9191/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) 9192/// operations, split it in the various pieces being extracted. 9193/// 9194/// This sort of thing is introduced by SROA. 9195/// This slicing takes care not to insert overlapping loads. 9196/// \pre LI is a simple load (i.e., not an atomic or volatile load). 9197bool DAGCombiner::SliceUpLoad(SDNode *N) { 9198 if (Level < AfterLegalizeDAG) 9199 return false; 9200 9201 LoadSDNode *LD = cast<LoadSDNode>(N); 9202 if (LD->isVolatile() || !ISD::isNormalLoad(LD) || 9203 !LD->getValueType(0).isInteger()) 9204 return false; 9205 9206 // Keep track of already used bits to detect overlapping values. 9207 // In that case, we will just abort the transformation. 9208 APInt UsedBits(LD->getValueSizeInBits(0), 0); 9209 9210 SmallVector<LoadedSlice, 4> LoadedSlices; 9211 9212 // Check if this load is used as several smaller chunks of bits. 9213 // Basically, look for uses in trunc or trunc(lshr) and record a new chain 9214 // of computation for each trunc. 9215 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); 9216 UI != UIEnd; ++UI) { 9217 // Skip the uses of the chain. 9218 if (UI.getUse().getResNo() != 0) 9219 continue; 9220 9221 SDNode *User = *UI; 9222 unsigned Shift = 0; 9223 9224 // Check if this is a trunc(lshr). 9225 if (User->getOpcode() == ISD::SRL && User->hasOneUse() && 9226 isa<ConstantSDNode>(User->getOperand(1))) { 9227 Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue(); 9228 User = *User->use_begin(); 9229 } 9230 9231 // At this point, User is a Truncate, iff we encountered, trunc or 9232 // trunc(lshr). 9233 if (User->getOpcode() != ISD::TRUNCATE) 9234 return false; 9235 9236 // The width of the type must be a power of 2 and greater than 8-bits. 9237 // Otherwise the load cannot be represented in LLVM IR. 9238 // Moreover, if we shifted with a non-8-bits multiple, the slice 9239 // will be across several bytes. We do not support that. 9240 unsigned Width = User->getValueSizeInBits(0); 9241 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) 9242 return 0; 9243 9244 // Build the slice for this chain of computations. 9245 LoadedSlice LS(User, LD, Shift, &DAG); 9246 APInt CurrentUsedBits = LS.getUsedBits(); 9247 9248 // Check if this slice overlaps with another. 9249 if ((CurrentUsedBits & UsedBits) != 0) 9250 return false; 9251 // Update the bits used globally. 9252 UsedBits |= CurrentUsedBits; 9253 9254 // Check if the new slice would be legal. 9255 if (!LS.isLegal()) 9256 return false; 9257 9258 // Record the slice. 9259 LoadedSlices.push_back(LS); 9260 } 9261 9262 // Abort slicing if it does not seem to be profitable. 9263 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) 9264 return false; 9265 9266 ++SlicedLoads; 9267 9268 // Rewrite each chain to use an independent load. 9269 // By construction, each chain can be represented by a unique load. 9270 9271 // Prepare the argument for the new token factor for all the slices. 9272 SmallVector<SDValue, 8> ArgChains; 9273 for (SmallVectorImpl<LoadedSlice>::const_iterator 9274 LSIt = LoadedSlices.begin(), 9275 LSItEnd = LoadedSlices.end(); 9276 LSIt != LSItEnd; ++LSIt) { 9277 SDValue SliceInst = LSIt->loadSlice(); 9278 CombineTo(LSIt->Inst, SliceInst, true); 9279 if (SliceInst.getNode()->getOpcode() != ISD::LOAD) 9280 SliceInst = SliceInst.getOperand(0); 9281 assert(SliceInst->getOpcode() == ISD::LOAD && 9282 "It takes more than a zext to get to the loaded slice!!"); 9283 ArgChains.push_back(SliceInst.getValue(1)); 9284 } 9285 9286 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, 9287 ArgChains); 9288 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 9289 return true; 9290} 9291 9292/// Check to see if V is (and load (ptr), imm), where the load is having 9293/// specific bytes cleared out. If so, return the byte size being masked out 9294/// and the shift amount. 9295static std::pair<unsigned, unsigned> 9296CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { 9297 std::pair<unsigned, unsigned> Result(0, 0); 9298 9299 // Check for the structure we're looking for. 9300 if (V->getOpcode() != ISD::AND || 9301 !isa<ConstantSDNode>(V->getOperand(1)) || 9302 !ISD::isNormalLoad(V->getOperand(0).getNode())) 9303 return Result; 9304 9305 // Check the chain and pointer. 9306 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); 9307 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. 9308 9309 // The store should be chained directly to the load or be an operand of a 9310 // tokenfactor. 9311 if (LD == Chain.getNode()) 9312 ; // ok. 9313 else if (Chain->getOpcode() != ISD::TokenFactor) 9314 return Result; // Fail. 9315 else { 9316 bool isOk = false; 9317 for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) 9318 if (Chain->getOperand(i).getNode() == LD) { 9319 isOk = true; 9320 break; 9321 } 9322 if (!isOk) return Result; 9323 } 9324 9325 // This only handles simple types. 9326 if (V.getValueType() != MVT::i16 && 9327 V.getValueType() != MVT::i32 && 9328 V.getValueType() != MVT::i64) 9329 return Result; 9330 9331 // Check the constant mask. Invert it so that the bits being masked out are 9332 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits 9333 // follow the sign bit for uniformity. 9334 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); 9335 unsigned NotMaskLZ = countLeadingZeros(NotMask); 9336 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. 9337 unsigned NotMaskTZ = countTrailingZeros(NotMask); 9338 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. 9339 if (NotMaskLZ == 64) return Result; // All zero mask. 9340 9341 // See if we have a continuous run of bits. If so, we have 0*1+0* 9342 if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) 9343 return Result; 9344 9345 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. 9346 if (V.getValueType() != MVT::i64 && NotMaskLZ) 9347 NotMaskLZ -= 64-V.getValueSizeInBits(); 9348 9349 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; 9350 switch (MaskedBytes) { 9351 case 1: 9352 case 2: 9353 case 4: break; 9354 default: return Result; // All one mask, or 5-byte mask. 9355 } 9356 9357 // Verify that the first bit starts at a multiple of mask so that the access 9358 // is aligned the same as the access width. 9359 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; 9360 9361 Result.first = MaskedBytes; 9362 Result.second = NotMaskTZ/8; 9363 return Result; 9364} 9365 9366 9367/// Check to see if IVal is something that provides a value as specified by 9368/// MaskInfo. If so, replace the specified store with a narrower store of 9369/// truncated IVal. 9370static SDNode * 9371ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, 9372 SDValue IVal, StoreSDNode *St, 9373 DAGCombiner *DC) { 9374 unsigned NumBytes = MaskInfo.first; 9375 unsigned ByteShift = MaskInfo.second; 9376 SelectionDAG &DAG = DC->getDAG(); 9377 9378 // Check to see if IVal is all zeros in the part being masked in by the 'or' 9379 // that uses this. If not, this is not a replacement. 9380 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), 9381 ByteShift*8, (ByteShift+NumBytes)*8); 9382 if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; 9383 9384 // Check that it is legal on the target to do this. It is legal if the new 9385 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type 9386 // legalization. 9387 MVT VT = MVT::getIntegerVT(NumBytes*8); 9388 if (!DC->isTypeLegal(VT)) 9389 return nullptr; 9390 9391 // Okay, we can do this! Replace the 'St' store with a store of IVal that is 9392 // shifted by ByteShift and truncated down to NumBytes. 9393 if (ByteShift) 9394 IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal, 9395 DAG.getConstant(ByteShift*8, 9396 DC->getShiftAmountTy(IVal.getValueType()))); 9397 9398 // Figure out the offset for the store and the alignment of the access. 9399 unsigned StOffset; 9400 unsigned NewAlign = St->getAlignment(); 9401 9402 if (DAG.getTargetLoweringInfo().isLittleEndian()) 9403 StOffset = ByteShift; 9404 else 9405 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; 9406 9407 SDValue Ptr = St->getBasePtr(); 9408 if (StOffset) { 9409 Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(), 9410 Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); 9411 NewAlign = MinAlign(NewAlign, StOffset); 9412 } 9413 9414 // Truncate down to the new size. 9415 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); 9416 9417 ++OpsNarrowed; 9418 return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr, 9419 St->getPointerInfo().getWithOffset(StOffset), 9420 false, false, NewAlign).getNode(); 9421} 9422 9423 9424/// Look for sequence of load / op / store where op is one of 'or', 'xor', and 9425/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try 9426/// narrowing the load and store if it would end up being a win for performance 9427/// or code size. 9428SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { 9429 StoreSDNode *ST = cast<StoreSDNode>(N); 9430 if (ST->isVolatile()) 9431 return SDValue(); 9432 9433 SDValue Chain = ST->getChain(); 9434 SDValue Value = ST->getValue(); 9435 SDValue Ptr = ST->getBasePtr(); 9436 EVT VT = Value.getValueType(); 9437 9438 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) 9439 return SDValue(); 9440 9441 unsigned Opc = Value.getOpcode(); 9442 9443 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst 9444 // is a byte mask indicating a consecutive number of bytes, check to see if 9445 // Y is known to provide just those bytes. If so, we try to replace the 9446 // load + replace + store sequence with a single (narrower) store, which makes 9447 // the load dead. 9448 if (Opc == ISD::OR) { 9449 std::pair<unsigned, unsigned> MaskedLoad; 9450 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); 9451 if (MaskedLoad.first) 9452 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 9453 Value.getOperand(1), ST,this)) 9454 return SDValue(NewST, 0); 9455 9456 // Or is commutative, so try swapping X and Y. 9457 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); 9458 if (MaskedLoad.first) 9459 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 9460 Value.getOperand(0), ST,this)) 9461 return SDValue(NewST, 0); 9462 } 9463 9464 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || 9465 Value.getOperand(1).getOpcode() != ISD::Constant) 9466 return SDValue(); 9467 9468 SDValue N0 = Value.getOperand(0); 9469 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 9470 Chain == SDValue(N0.getNode(), 1)) { 9471 LoadSDNode *LD = cast<LoadSDNode>(N0); 9472 if (LD->getBasePtr() != Ptr || 9473 LD->getPointerInfo().getAddrSpace() != 9474 ST->getPointerInfo().getAddrSpace()) 9475 return SDValue(); 9476 9477 // Find the type to narrow it the load / op / store to. 9478 SDValue N1 = Value.getOperand(1); 9479 unsigned BitWidth = N1.getValueSizeInBits(); 9480 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); 9481 if (Opc == ISD::AND) 9482 Imm ^= APInt::getAllOnesValue(BitWidth); 9483 if (Imm == 0 || Imm.isAllOnesValue()) 9484 return SDValue(); 9485 unsigned ShAmt = Imm.countTrailingZeros(); 9486 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; 9487 unsigned NewBW = NextPowerOf2(MSB - ShAmt); 9488 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 9489 // The narrowing should be profitable, the load/store operation should be 9490 // legal (or custom) and the store size should be equal to the NewVT width. 9491 while (NewBW < BitWidth && 9492 !(TLI.isOperationLegalOrCustom(Opc, NewVT) && 9493 TLI.isNarrowingProfitable(VT, NewVT))) { 9494 NewBW = NextPowerOf2(NewBW); 9495 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 9496 } 9497 if (NewBW >= BitWidth) 9498 return SDValue(); 9499 9500 // If the lsb changed does not start at the type bitwidth boundary, 9501 // start at the previous one. 9502 if (ShAmt % NewBW) 9503 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; 9504 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, 9505 std::min(BitWidth, ShAmt + NewBW)); 9506 if ((Imm & Mask) == Imm) { 9507 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); 9508 if (Opc == ISD::AND) 9509 NewImm ^= APInt::getAllOnesValue(NewBW); 9510 uint64_t PtrOff = ShAmt / 8; 9511 // For big endian targets, we need to adjust the offset to the pointer to 9512 // load the correct bytes. 9513 if (TLI.isBigEndian()) 9514 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; 9515 9516 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); 9517 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); 9518 if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) 9519 return SDValue(); 9520 9521 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), 9522 Ptr.getValueType(), Ptr, 9523 DAG.getConstant(PtrOff, Ptr.getValueType())); 9524 SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), 9525 LD->getChain(), NewPtr, 9526 LD->getPointerInfo().getWithOffset(PtrOff), 9527 LD->isVolatile(), LD->isNonTemporal(), 9528 LD->isInvariant(), NewAlign, 9529 LD->getAAInfo()); 9530 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, 9531 DAG.getConstant(NewImm, NewVT)); 9532 SDValue NewST = DAG.getStore(Chain, SDLoc(N), 9533 NewVal, NewPtr, 9534 ST->getPointerInfo().getWithOffset(PtrOff), 9535 false, false, NewAlign); 9536 9537 AddToWorklist(NewPtr.getNode()); 9538 AddToWorklist(NewLD.getNode()); 9539 AddToWorklist(NewVal.getNode()); 9540 WorklistRemover DeadNodes(*this); 9541 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); 9542 ++OpsNarrowed; 9543 return NewST; 9544 } 9545 } 9546 9547 return SDValue(); 9548} 9549 9550/// For a given floating point load / store pair, if the load value isn't used 9551/// by any other operations, then consider transforming the pair to integer 9552/// load / store operations if the target deems the transformation profitable. 9553SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { 9554 StoreSDNode *ST = cast<StoreSDNode>(N); 9555 SDValue Chain = ST->getChain(); 9556 SDValue Value = ST->getValue(); 9557 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && 9558 Value.hasOneUse() && 9559 Chain == SDValue(Value.getNode(), 1)) { 9560 LoadSDNode *LD = cast<LoadSDNode>(Value); 9561 EVT VT = LD->getMemoryVT(); 9562 if (!VT.isFloatingPoint() || 9563 VT != ST->getMemoryVT() || 9564 LD->isNonTemporal() || 9565 ST->isNonTemporal() || 9566 LD->getPointerInfo().getAddrSpace() != 0 || 9567 ST->getPointerInfo().getAddrSpace() != 0) 9568 return SDValue(); 9569 9570 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); 9571 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || 9572 !TLI.isOperationLegal(ISD::STORE, IntVT) || 9573 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || 9574 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) 9575 return SDValue(); 9576 9577 unsigned LDAlign = LD->getAlignment(); 9578 unsigned STAlign = ST->getAlignment(); 9579 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); 9580 unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy); 9581 if (LDAlign < ABIAlign || STAlign < ABIAlign) 9582 return SDValue(); 9583 9584 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), 9585 LD->getChain(), LD->getBasePtr(), 9586 LD->getPointerInfo(), 9587 false, false, false, LDAlign); 9588 9589 SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N), 9590 NewLD, ST->getBasePtr(), 9591 ST->getPointerInfo(), 9592 false, false, STAlign); 9593 9594 AddToWorklist(NewLD.getNode()); 9595 AddToWorklist(NewST.getNode()); 9596 WorklistRemover DeadNodes(*this); 9597 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); 9598 ++LdStFP2Int; 9599 return NewST; 9600 } 9601 9602 return SDValue(); 9603} 9604 9605/// Helper struct to parse and store a memory address as base + index + offset. 9606/// We ignore sign extensions when it is safe to do so. 9607/// The following two expressions are not equivalent. To differentiate we need 9608/// to store whether there was a sign extension involved in the index 9609/// computation. 9610/// (load (i64 add (i64 copyfromreg %c) 9611/// (i64 signextend (add (i8 load %index) 9612/// (i8 1)))) 9613/// vs 9614/// 9615/// (load (i64 add (i64 copyfromreg %c) 9616/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) 9617/// (i32 1))))) 9618struct BaseIndexOffset { 9619 SDValue Base; 9620 SDValue Index; 9621 int64_t Offset; 9622 bool IsIndexSignExt; 9623 9624 BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} 9625 9626 BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, 9627 bool IsIndexSignExt) : 9628 Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} 9629 9630 bool equalBaseIndex(const BaseIndexOffset &Other) { 9631 return Other.Base == Base && Other.Index == Index && 9632 Other.IsIndexSignExt == IsIndexSignExt; 9633 } 9634 9635 /// Parses tree in Ptr for base, index, offset addresses. 9636 static BaseIndexOffset match(SDValue Ptr) { 9637 bool IsIndexSignExt = false; 9638 9639 // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD 9640 // instruction, then it could be just the BASE or everything else we don't 9641 // know how to handle. Just use Ptr as BASE and give up. 9642 if (Ptr->getOpcode() != ISD::ADD) 9643 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 9644 9645 // We know that we have at least an ADD instruction. Try to pattern match 9646 // the simple case of BASE + OFFSET. 9647 if (isa<ConstantSDNode>(Ptr->getOperand(1))) { 9648 int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); 9649 return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, 9650 IsIndexSignExt); 9651 } 9652 9653 // Inside a loop the current BASE pointer is calculated using an ADD and a 9654 // MUL instruction. In this case Ptr is the actual BASE pointer. 9655 // (i64 add (i64 %array_ptr) 9656 // (i64 mul (i64 %induction_var) 9657 // (i64 %element_size))) 9658 if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) 9659 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 9660 9661 // Look at Base + Index + Offset cases. 9662 SDValue Base = Ptr->getOperand(0); 9663 SDValue IndexOffset = Ptr->getOperand(1); 9664 9665 // Skip signextends. 9666 if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { 9667 IndexOffset = IndexOffset->getOperand(0); 9668 IsIndexSignExt = true; 9669 } 9670 9671 // Either the case of Base + Index (no offset) or something else. 9672 if (IndexOffset->getOpcode() != ISD::ADD) 9673 return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); 9674 9675 // Now we have the case of Base + Index + offset. 9676 SDValue Index = IndexOffset->getOperand(0); 9677 SDValue Offset = IndexOffset->getOperand(1); 9678 9679 if (!isa<ConstantSDNode>(Offset)) 9680 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 9681 9682 // Ignore signextends. 9683 if (Index->getOpcode() == ISD::SIGN_EXTEND) { 9684 Index = Index->getOperand(0); 9685 IsIndexSignExt = true; 9686 } else IsIndexSignExt = false; 9687 9688 int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); 9689 return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); 9690 } 9691}; 9692 9693/// Holds a pointer to an LSBaseSDNode as well as information on where it 9694/// is located in a sequence of memory operations connected by a chain. 9695struct MemOpLink { 9696 MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): 9697 MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } 9698 // Ptr to the mem node. 9699 LSBaseSDNode *MemNode; 9700 // Offset from the base ptr. 9701 int64_t OffsetFromBase; 9702 // What is the sequence number of this mem node. 9703 // Lowest mem operand in the DAG starts at zero. 9704 unsigned SequenceNum; 9705}; 9706 9707bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { 9708 EVT MemVT = St->getMemoryVT(); 9709 int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; 9710 bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes(). 9711 hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); 9712 9713 // Don't merge vectors into wider inputs. 9714 if (MemVT.isVector() || !MemVT.isSimple()) 9715 return false; 9716 9717 // Perform an early exit check. Do not bother looking at stored values that 9718 // are not constants or loads. 9719 SDValue StoredVal = St->getValue(); 9720 bool IsLoadSrc = isa<LoadSDNode>(StoredVal); 9721 if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) && 9722 !IsLoadSrc) 9723 return false; 9724 9725 // Only look at ends of store sequences. 9726 SDValue Chain = SDValue(St, 0); 9727 if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) 9728 return false; 9729 9730 // This holds the base pointer, index, and the offset in bytes from the base 9731 // pointer. 9732 BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); 9733 9734 // We must have a base and an offset. 9735 if (!BasePtr.Base.getNode()) 9736 return false; 9737 9738 // Do not handle stores to undef base pointers. 9739 if (BasePtr.Base.getOpcode() == ISD::UNDEF) 9740 return false; 9741 9742 // Save the LoadSDNodes that we find in the chain. 9743 // We need to make sure that these nodes do not interfere with 9744 // any of the store nodes. 9745 SmallVector<LSBaseSDNode*, 8> AliasLoadNodes; 9746 9747 // Save the StoreSDNodes that we find in the chain. 9748 SmallVector<MemOpLink, 8> StoreNodes; 9749 9750 // Walk up the chain and look for nodes with offsets from the same 9751 // base pointer. Stop when reaching an instruction with a different kind 9752 // or instruction which has a different base pointer. 9753 unsigned Seq = 0; 9754 StoreSDNode *Index = St; 9755 while (Index) { 9756 // If the chain has more than one use, then we can't reorder the mem ops. 9757 if (Index != St && !SDValue(Index, 0)->hasOneUse()) 9758 break; 9759 9760 // Find the base pointer and offset for this memory node. 9761 BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); 9762 9763 // Check that the base pointer is the same as the original one. 9764 if (!Ptr.equalBaseIndex(BasePtr)) 9765 break; 9766 9767 // Check that the alignment is the same. 9768 if (Index->getAlignment() != St->getAlignment()) 9769 break; 9770 9771 // The memory operands must not be volatile. 9772 if (Index->isVolatile() || Index->isIndexed()) 9773 break; 9774 9775 // No truncation. 9776 if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index)) 9777 if (St->isTruncatingStore()) 9778 break; 9779 9780 // The stored memory type must be the same. 9781 if (Index->getMemoryVT() != MemVT) 9782 break; 9783 9784 // We do not allow unaligned stores because we want to prevent overriding 9785 // stores. 9786 if (Index->getAlignment()*8 != MemVT.getSizeInBits()) 9787 break; 9788 9789 // We found a potential memory operand to merge. 9790 StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); 9791 9792 // Find the next memory operand in the chain. If the next operand in the 9793 // chain is a store then move up and continue the scan with the next 9794 // memory operand. If the next operand is a load save it and use alias 9795 // information to check if it interferes with anything. 9796 SDNode *NextInChain = Index->getChain().getNode(); 9797 while (1) { 9798 if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { 9799 // We found a store node. Use it for the next iteration. 9800 Index = STn; 9801 break; 9802 } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { 9803 if (Ldn->isVolatile()) { 9804 Index = nullptr; 9805 break; 9806 } 9807 9808 // Save the load node for later. Continue the scan. 9809 AliasLoadNodes.push_back(Ldn); 9810 NextInChain = Ldn->getChain().getNode(); 9811 continue; 9812 } else { 9813 Index = nullptr; 9814 break; 9815 } 9816 } 9817 } 9818 9819 // Check if there is anything to merge. 9820 if (StoreNodes.size() < 2) 9821 return false; 9822 9823 // Sort the memory operands according to their distance from the base pointer. 9824 std::sort(StoreNodes.begin(), StoreNodes.end(), 9825 [](MemOpLink LHS, MemOpLink RHS) { 9826 return LHS.OffsetFromBase < RHS.OffsetFromBase || 9827 (LHS.OffsetFromBase == RHS.OffsetFromBase && 9828 LHS.SequenceNum > RHS.SequenceNum); 9829 }); 9830 9831 // Scan the memory operations on the chain and find the first non-consecutive 9832 // store memory address. 9833 unsigned LastConsecutiveStore = 0; 9834 int64_t StartAddress = StoreNodes[0].OffsetFromBase; 9835 for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) { 9836 9837 // Check that the addresses are consecutive starting from the second 9838 // element in the list of stores. 9839 if (i > 0) { 9840 int64_t CurrAddress = StoreNodes[i].OffsetFromBase; 9841 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 9842 break; 9843 } 9844 9845 bool Alias = false; 9846 // Check if this store interferes with any of the loads that we found. 9847 for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld) 9848 if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) { 9849 Alias = true; 9850 break; 9851 } 9852 // We found a load that alias with this store. Stop the sequence. 9853 if (Alias) 9854 break; 9855 9856 // Mark this node as useful. 9857 LastConsecutiveStore = i; 9858 } 9859 9860 // The node with the lowest store address. 9861 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 9862 9863 // Store the constants into memory as one consecutive store. 9864 if (!IsLoadSrc) { 9865 unsigned LastLegalType = 0; 9866 unsigned LastLegalVectorType = 0; 9867 bool NonZero = false; 9868 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 9869 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 9870 SDValue StoredVal = St->getValue(); 9871 9872 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) { 9873 NonZero |= !C->isNullValue(); 9874 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) { 9875 NonZero |= !C->getConstantFPValue()->isNullValue(); 9876 } else { 9877 // Non-constant. 9878 break; 9879 } 9880 9881 // Find a legal type for the constant store. 9882 unsigned StoreBW = (i+1) * ElementSizeBytes * 8; 9883 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 9884 if (TLI.isTypeLegal(StoreTy)) 9885 LastLegalType = i+1; 9886 // Or check whether a truncstore is legal. 9887 else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == 9888 TargetLowering::TypePromoteInteger) { 9889 EVT LegalizedStoredValueTy = 9890 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); 9891 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy)) 9892 LastLegalType = i+1; 9893 } 9894 9895 // Find a legal type for the vector store. 9896 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); 9897 if (TLI.isTypeLegal(Ty)) 9898 LastLegalVectorType = i + 1; 9899 } 9900 9901 // We only use vectors if the constant is known to be zero and the 9902 // function is not marked with the noimplicitfloat attribute. 9903 if (NonZero || NoVectors) 9904 LastLegalVectorType = 0; 9905 9906 // Check if we found a legal integer type to store. 9907 if (LastLegalType == 0 && LastLegalVectorType == 0) 9908 return false; 9909 9910 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; 9911 unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; 9912 9913 // Make sure we have something to merge. 9914 if (NumElem < 2) 9915 return false; 9916 9917 unsigned EarliestNodeUsed = 0; 9918 for (unsigned i=0; i < NumElem; ++i) { 9919 // Find a chain for the new wide-store operand. Notice that some 9920 // of the store nodes that we found may not be selected for inclusion 9921 // in the wide store. The chain we use needs to be the chain of the 9922 // earliest store node which is *used* and replaced by the wide store. 9923 if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) 9924 EarliestNodeUsed = i; 9925 } 9926 9927 // The earliest Node in the DAG. 9928 LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; 9929 SDLoc DL(StoreNodes[0].MemNode); 9930 9931 SDValue StoredVal; 9932 if (UseVector) { 9933 // Find a legal type for the vector store. 9934 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); 9935 assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); 9936 StoredVal = DAG.getConstant(0, Ty); 9937 } else { 9938 unsigned StoreBW = NumElem * ElementSizeBytes * 8; 9939 APInt StoreInt(StoreBW, 0); 9940 9941 // Construct a single integer constant which is made of the smaller 9942 // constant inputs. 9943 bool IsLE = TLI.isLittleEndian(); 9944 for (unsigned i = 0; i < NumElem ; ++i) { 9945 unsigned Idx = IsLE ?(NumElem - 1 - i) : i; 9946 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); 9947 SDValue Val = St->getValue(); 9948 StoreInt<<=ElementSizeBytes*8; 9949 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { 9950 StoreInt|=C->getAPIntValue().zext(StoreBW); 9951 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { 9952 StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); 9953 } else { 9954 llvm_unreachable("Invalid constant element type"); 9955 } 9956 } 9957 9958 // Create the new Load and Store operations. 9959 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 9960 StoredVal = DAG.getConstant(StoreInt, StoreTy); 9961 } 9962 9963 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, 9964 FirstInChain->getBasePtr(), 9965 FirstInChain->getPointerInfo(), 9966 false, false, 9967 FirstInChain->getAlignment()); 9968 9969 // Replace the first store with the new store 9970 CombineTo(EarliestOp, NewStore); 9971 // Erase all other stores. 9972 for (unsigned i = 0; i < NumElem ; ++i) { 9973 if (StoreNodes[i].MemNode == EarliestOp) 9974 continue; 9975 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 9976 // ReplaceAllUsesWith will replace all uses that existed when it was 9977 // called, but graph optimizations may cause new ones to appear. For 9978 // example, the case in pr14333 looks like 9979 // 9980 // St's chain -> St -> another store -> X 9981 // 9982 // And the only difference from St to the other store is the chain. 9983 // When we change it's chain to be St's chain they become identical, 9984 // get CSEed and the net result is that X is now a use of St. 9985 // Since we know that St is redundant, just iterate. 9986 while (!St->use_empty()) 9987 DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); 9988 deleteAndRecombine(St); 9989 } 9990 9991 return true; 9992 } 9993 9994 // Below we handle the case of multiple consecutive stores that 9995 // come from multiple consecutive loads. We merge them into a single 9996 // wide load and a single wide store. 9997 9998 // Look for load nodes which are used by the stored values. 9999 SmallVector<MemOpLink, 8> LoadNodes; 10000 10001 // Find acceptable loads. Loads need to have the same chain (token factor), 10002 // must not be zext, volatile, indexed, and they must be consecutive. 10003 BaseIndexOffset LdBasePtr; 10004 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 10005 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 10006 LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); 10007 if (!Ld) break; 10008 10009 // Loads must only have one use. 10010 if (!Ld->hasNUsesOfValue(1, 0)) 10011 break; 10012 10013 // Check that the alignment is the same as the stores. 10014 if (Ld->getAlignment() != St->getAlignment()) 10015 break; 10016 10017 // The memory operands must not be volatile. 10018 if (Ld->isVolatile() || Ld->isIndexed()) 10019 break; 10020 10021 // We do not accept ext loads. 10022 if (Ld->getExtensionType() != ISD::NON_EXTLOAD) 10023 break; 10024 10025 // The stored memory type must be the same. 10026 if (Ld->getMemoryVT() != MemVT) 10027 break; 10028 10029 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); 10030 // If this is not the first ptr that we check. 10031 if (LdBasePtr.Base.getNode()) { 10032 // The base ptr must be the same. 10033 if (!LdPtr.equalBaseIndex(LdBasePtr)) 10034 break; 10035 } else { 10036 // Check that all other base pointers are the same as this one. 10037 LdBasePtr = LdPtr; 10038 } 10039 10040 // We found a potential memory operand to merge. 10041 LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0)); 10042 } 10043 10044 if (LoadNodes.size() < 2) 10045 return false; 10046 10047 // If we have load/store pair instructions and we only have two values, 10048 // don't bother. 10049 unsigned RequiredAlignment; 10050 if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && 10051 St->getAlignment() >= RequiredAlignment) 10052 return false; 10053 10054 // Scan the memory operations on the chain and find the first non-consecutive 10055 // load memory address. These variables hold the index in the store node 10056 // array. 10057 unsigned LastConsecutiveLoad = 0; 10058 // This variable refers to the size and not index in the array. 10059 unsigned LastLegalVectorType = 0; 10060 unsigned LastLegalIntegerType = 0; 10061 StartAddress = LoadNodes[0].OffsetFromBase; 10062 SDValue FirstChain = LoadNodes[0].MemNode->getChain(); 10063 for (unsigned i = 1; i < LoadNodes.size(); ++i) { 10064 // All loads much share the same chain. 10065 if (LoadNodes[i].MemNode->getChain() != FirstChain) 10066 break; 10067 10068 int64_t CurrAddress = LoadNodes[i].OffsetFromBase; 10069 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 10070 break; 10071 LastConsecutiveLoad = i; 10072 10073 // Find a legal type for the vector store. 10074 EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); 10075 if (TLI.isTypeLegal(StoreTy)) 10076 LastLegalVectorType = i + 1; 10077 10078 // Find a legal type for the integer store. 10079 unsigned StoreBW = (i+1) * ElementSizeBytes * 8; 10080 StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 10081 if (TLI.isTypeLegal(StoreTy)) 10082 LastLegalIntegerType = i + 1; 10083 // Or check whether a truncstore and extload is legal. 10084 else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == 10085 TargetLowering::TypePromoteInteger) { 10086 EVT LegalizedStoredValueTy = 10087 TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy); 10088 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && 10089 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && 10090 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && 10091 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy)) 10092 LastLegalIntegerType = i+1; 10093 } 10094 } 10095 10096 // Only use vector types if the vector type is larger than the integer type. 10097 // If they are the same, use integers. 10098 bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; 10099 unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); 10100 10101 // We add +1 here because the LastXXX variables refer to location while 10102 // the NumElem refers to array/index size. 10103 unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; 10104 NumElem = std::min(LastLegalType, NumElem); 10105 10106 if (NumElem < 2) 10107 return false; 10108 10109 // The earliest Node in the DAG. 10110 unsigned EarliestNodeUsed = 0; 10111 LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; 10112 for (unsigned i=1; i<NumElem; ++i) { 10113 // Find a chain for the new wide-store operand. Notice that some 10114 // of the store nodes that we found may not be selected for inclusion 10115 // in the wide store. The chain we use needs to be the chain of the 10116 // earliest store node which is *used* and replaced by the wide store. 10117 if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) 10118 EarliestNodeUsed = i; 10119 } 10120 10121 // Find if it is better to use vectors or integers to load and store 10122 // to memory. 10123 EVT JointMemOpVT; 10124 if (UseVectorTy) { 10125 JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); 10126 } else { 10127 unsigned StoreBW = NumElem * ElementSizeBytes * 8; 10128 JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 10129 } 10130 10131 SDLoc LoadDL(LoadNodes[0].MemNode); 10132 SDLoc StoreDL(StoreNodes[0].MemNode); 10133 10134 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); 10135 SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, 10136 FirstLoad->getChain(), 10137 FirstLoad->getBasePtr(), 10138 FirstLoad->getPointerInfo(), 10139 false, false, false, 10140 FirstLoad->getAlignment()); 10141 10142 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, 10143 FirstInChain->getBasePtr(), 10144 FirstInChain->getPointerInfo(), false, false, 10145 FirstInChain->getAlignment()); 10146 10147 // Replace one of the loads with the new load. 10148 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode); 10149 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), 10150 SDValue(NewLoad.getNode(), 1)); 10151 10152 // Remove the rest of the load chains. 10153 for (unsigned i = 1; i < NumElem ; ++i) { 10154 // Replace all chain users of the old load nodes with the chain of the new 10155 // load node. 10156 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); 10157 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); 10158 } 10159 10160 // Replace the first store with the new store. 10161 CombineTo(EarliestOp, NewStore); 10162 // Erase all other stores. 10163 for (unsigned i = 0; i < NumElem ; ++i) { 10164 // Remove all Store nodes. 10165 if (StoreNodes[i].MemNode == EarliestOp) 10166 continue; 10167 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 10168 DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); 10169 deleteAndRecombine(St); 10170 } 10171 10172 return true; 10173} 10174 10175SDValue DAGCombiner::visitSTORE(SDNode *N) { 10176 StoreSDNode *ST = cast<StoreSDNode>(N); 10177 SDValue Chain = ST->getChain(); 10178 SDValue Value = ST->getValue(); 10179 SDValue Ptr = ST->getBasePtr(); 10180 10181 // If this is a store of a bit convert, store the input value if the 10182 // resultant store does not need a higher alignment than the original. 10183 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && 10184 ST->isUnindexed()) { 10185 unsigned OrigAlign = ST->getAlignment(); 10186 EVT SVT = Value.getOperand(0).getValueType(); 10187 unsigned Align = TLI.getDataLayout()-> 10188 getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); 10189 if (Align <= OrigAlign && 10190 ((!LegalOperations && !ST->isVolatile()) || 10191 TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) 10192 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), 10193 Ptr, ST->getPointerInfo(), ST->isVolatile(), 10194 ST->isNonTemporal(), OrigAlign, 10195 ST->getAAInfo()); 10196 } 10197 10198 // Turn 'store undef, Ptr' -> nothing. 10199 if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) 10200 return Chain; 10201 10202 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' 10203 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { 10204 // NOTE: If the original store is volatile, this transform must not increase 10205 // the number of stores. For example, on x86-32 an f64 can be stored in one 10206 // processor operation but an i64 (which is not legal) requires two. So the 10207 // transform should not be done in this case. 10208 if (Value.getOpcode() != ISD::TargetConstantFP) { 10209 SDValue Tmp; 10210 switch (CFP->getSimpleValueType(0).SimpleTy) { 10211 default: llvm_unreachable("Unknown FP type"); 10212 case MVT::f16: // We don't do this for these yet. 10213 case MVT::f80: 10214 case MVT::f128: 10215 case MVT::ppcf128: 10216 break; 10217 case MVT::f32: 10218 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || 10219 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 10220 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). 10221 bitcastToAPInt().getZExtValue(), MVT::i32); 10222 return DAG.getStore(Chain, SDLoc(N), Tmp, 10223 Ptr, ST->getMemOperand()); 10224 } 10225 break; 10226 case MVT::f64: 10227 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && 10228 !ST->isVolatile()) || 10229 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { 10230 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). 10231 getZExtValue(), MVT::i64); 10232 return DAG.getStore(Chain, SDLoc(N), Tmp, 10233 Ptr, ST->getMemOperand()); 10234 } 10235 10236 if (!ST->isVolatile() && 10237 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 10238 // Many FP stores are not made apparent until after legalize, e.g. for 10239 // argument passing. Since this is so common, custom legalize the 10240 // 64-bit integer store into two 32-bit stores. 10241 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); 10242 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); 10243 SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); 10244 if (TLI.isBigEndian()) std::swap(Lo, Hi); 10245 10246 unsigned Alignment = ST->getAlignment(); 10247 bool isVolatile = ST->isVolatile(); 10248 bool isNonTemporal = ST->isNonTemporal(); 10249 AAMDNodes AAInfo = ST->getAAInfo(); 10250 10251 SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, 10252 Ptr, ST->getPointerInfo(), 10253 isVolatile, isNonTemporal, 10254 ST->getAlignment(), AAInfo); 10255 Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, 10256 DAG.getConstant(4, Ptr.getValueType())); 10257 Alignment = MinAlign(Alignment, 4U); 10258 SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, 10259 Ptr, ST->getPointerInfo().getWithOffset(4), 10260 isVolatile, isNonTemporal, 10261 Alignment, AAInfo); 10262 return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, 10263 St0, St1); 10264 } 10265 10266 break; 10267 } 10268 } 10269 } 10270 10271 // Try to infer better alignment information than the store already has. 10272 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { 10273 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 10274 if (Align > ST->getAlignment()) 10275 return DAG.getTruncStore(Chain, SDLoc(N), Value, 10276 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 10277 ST->isVolatile(), ST->isNonTemporal(), Align, 10278 ST->getAAInfo()); 10279 } 10280 } 10281 10282 // Try transforming a pair floating point load / store ops to integer 10283 // load / store ops. 10284 SDValue NewST = TransformFPLoadStorePair(N); 10285 if (NewST.getNode()) 10286 return NewST; 10287 10288 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 10289 : DAG.getSubtarget().useAA(); 10290#ifndef NDEBUG 10291 if (CombinerAAOnlyFunc.getNumOccurrences() && 10292 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 10293 UseAA = false; 10294#endif 10295 if (UseAA && ST->isUnindexed()) { 10296 // Walk up chain skipping non-aliasing memory nodes. 10297 SDValue BetterChain = FindBetterChain(N, Chain); 10298 10299 // If there is a better chain. 10300 if (Chain != BetterChain) { 10301 SDValue ReplStore; 10302 10303 // Replace the chain to avoid dependency. 10304 if (ST->isTruncatingStore()) { 10305 ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr, 10306 ST->getMemoryVT(), ST->getMemOperand()); 10307 } else { 10308 ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr, 10309 ST->getMemOperand()); 10310 } 10311 10312 // Create token to keep both nodes around. 10313 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), 10314 MVT::Other, Chain, ReplStore); 10315 10316 // Make sure the new and old chains are cleaned up. 10317 AddToWorklist(Token.getNode()); 10318 10319 // Don't add users to work list. 10320 return CombineTo(N, Token, false); 10321 } 10322 } 10323 10324 // Try transforming N to an indexed store. 10325 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 10326 return SDValue(N, 0); 10327 10328 // FIXME: is there such a thing as a truncating indexed store? 10329 if (ST->isTruncatingStore() && ST->isUnindexed() && 10330 Value.getValueType().isInteger()) { 10331 // See if we can simplify the input to this truncstore with knowledge that 10332 // only the low bits are being used. For example: 10333 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" 10334 SDValue Shorter = 10335 GetDemandedBits(Value, 10336 APInt::getLowBitsSet( 10337 Value.getValueType().getScalarType().getSizeInBits(), 10338 ST->getMemoryVT().getScalarType().getSizeInBits())); 10339 AddToWorklist(Value.getNode()); 10340 if (Shorter.getNode()) 10341 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, 10342 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 10343 10344 // Otherwise, see if we can simplify the operation with 10345 // SimplifyDemandedBits, which only works if the value has a single use. 10346 if (SimplifyDemandedBits(Value, 10347 APInt::getLowBitsSet( 10348 Value.getValueType().getScalarType().getSizeInBits(), 10349 ST->getMemoryVT().getScalarType().getSizeInBits()))) 10350 return SDValue(N, 0); 10351 } 10352 10353 // If this is a load followed by a store to the same location, then the store 10354 // is dead/noop. 10355 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { 10356 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && 10357 ST->isUnindexed() && !ST->isVolatile() && 10358 // There can't be any side effects between the load and store, such as 10359 // a call or store. 10360 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { 10361 // The store is dead, remove it. 10362 return Chain; 10363 } 10364 } 10365 10366 // If this is a store followed by a store with the same value to the same 10367 // location, then the store is dead/noop. 10368 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { 10369 if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() && 10370 ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() && 10371 ST1->isUnindexed() && !ST1->isVolatile()) { 10372 // The store is dead, remove it. 10373 return Chain; 10374 } 10375 } 10376 10377 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a 10378 // truncating store. We can do this even if this is already a truncstore. 10379 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) 10380 && Value.getNode()->hasOneUse() && ST->isUnindexed() && 10381 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), 10382 ST->getMemoryVT())) { 10383 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), 10384 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 10385 } 10386 10387 // Only perform this optimization before the types are legal, because we 10388 // don't want to perform this optimization on every DAGCombine invocation. 10389 if (!LegalTypes) { 10390 bool EverChanged = false; 10391 10392 do { 10393 // There can be multiple store sequences on the same chain. 10394 // Keep trying to merge store sequences until we are unable to do so 10395 // or until we merge the last store on the chain. 10396 bool Changed = MergeConsecutiveStores(ST); 10397 EverChanged |= Changed; 10398 if (!Changed) break; 10399 } while (ST->getOpcode() != ISD::DELETED_NODE); 10400 10401 if (EverChanged) 10402 return SDValue(N, 0); 10403 } 10404 10405 return ReduceLoadOpStoreWidth(N); 10406} 10407 10408SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { 10409 SDValue InVec = N->getOperand(0); 10410 SDValue InVal = N->getOperand(1); 10411 SDValue EltNo = N->getOperand(2); 10412 SDLoc dl(N); 10413 10414 // If the inserted element is an UNDEF, just use the input vector. 10415 if (InVal.getOpcode() == ISD::UNDEF) 10416 return InVec; 10417 10418 EVT VT = InVec.getValueType(); 10419 10420 // If we can't generate a legal BUILD_VECTOR, exit 10421 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 10422 return SDValue(); 10423 10424 // Check that we know which element is being inserted 10425 if (!isa<ConstantSDNode>(EltNo)) 10426 return SDValue(); 10427 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 10428 10429 // Canonicalize insert_vector_elt dag nodes. 10430 // Example: 10431 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) 10432 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) 10433 // 10434 // Do this only if the child insert_vector node has one use; also 10435 // do this only if indices are both constants and Idx1 < Idx0. 10436 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() 10437 && isa<ConstantSDNode>(InVec.getOperand(2))) { 10438 unsigned OtherElt = 10439 cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue(); 10440 if (Elt < OtherElt) { 10441 // Swap nodes. 10442 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT, 10443 InVec.getOperand(0), InVal, EltNo); 10444 AddToWorklist(NewOp.getNode()); 10445 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), 10446 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); 10447 } 10448 } 10449 10450 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 10451 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 10452 // vector elements. 10453 SmallVector<SDValue, 8> Ops; 10454 // Do not combine these two vectors if the output vector will not replace 10455 // the input vector. 10456 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { 10457 Ops.append(InVec.getNode()->op_begin(), 10458 InVec.getNode()->op_end()); 10459 } else if (InVec.getOpcode() == ISD::UNDEF) { 10460 unsigned NElts = VT.getVectorNumElements(); 10461 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 10462 } else { 10463 return SDValue(); 10464 } 10465 10466 // Insert the element 10467 if (Elt < Ops.size()) { 10468 // All the operands of BUILD_VECTOR must have the same type; 10469 // we enforce that here. 10470 EVT OpVT = Ops[0].getValueType(); 10471 if (InVal.getValueType() != OpVT) 10472 InVal = OpVT.bitsGT(InVal.getValueType()) ? 10473 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : 10474 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); 10475 Ops[Elt] = InVal; 10476 } 10477 10478 // Return the new vector 10479 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); 10480} 10481 10482SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 10483 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { 10484 EVT ResultVT = EVE->getValueType(0); 10485 EVT VecEltVT = InVecVT.getVectorElementType(); 10486 unsigned Align = OriginalLoad->getAlignment(); 10487 unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment( 10488 VecEltVT.getTypeForEVT(*DAG.getContext())); 10489 10490 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) 10491 return SDValue(); 10492 10493 Align = NewAlign; 10494 10495 SDValue NewPtr = OriginalLoad->getBasePtr(); 10496 SDValue Offset; 10497 EVT PtrType = NewPtr.getValueType(); 10498 MachinePointerInfo MPI; 10499 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { 10500 int Elt = ConstEltNo->getZExtValue(); 10501 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; 10502 if (TLI.isBigEndian()) 10503 PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; 10504 Offset = DAG.getConstant(PtrOff, PtrType); 10505 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); 10506 } else { 10507 Offset = DAG.getNode( 10508 ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, 10509 DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); 10510 if (TLI.isBigEndian()) 10511 Offset = DAG.getNode( 10512 ISD::SUB, SDLoc(EVE), EltNo.getValueType(), 10513 DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); 10514 MPI = OriginalLoad->getPointerInfo(); 10515 } 10516 NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); 10517 10518 // The replacement we need to do here is a little tricky: we need to 10519 // replace an extractelement of a load with a load. 10520 // Use ReplaceAllUsesOfValuesWith to do the replacement. 10521 // Note that this replacement assumes that the extractvalue is the only 10522 // use of the load; that's okay because we don't want to perform this 10523 // transformation in other cases anyway. 10524 SDValue Load; 10525 SDValue Chain; 10526 if (ResultVT.bitsGT(VecEltVT)) { 10527 // If the result type of vextract is wider than the load, then issue an 10528 // extending load instead. 10529 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, 10530 VecEltVT) 10531 ? ISD::ZEXTLOAD 10532 : ISD::EXTLOAD; 10533 Load = DAG.getExtLoad( 10534 ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI, 10535 VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), 10536 OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); 10537 Chain = Load.getValue(1); 10538 } else { 10539 Load = DAG.getLoad( 10540 VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, 10541 OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), 10542 OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); 10543 Chain = Load.getValue(1); 10544 if (ResultVT.bitsLT(VecEltVT)) 10545 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); 10546 else 10547 Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); 10548 } 10549 WorklistRemover DeadNodes(*this); 10550 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; 10551 SDValue To[] = { Load, Chain }; 10552 DAG.ReplaceAllUsesOfValuesWith(From, To, 2); 10553 // Since we're explicitly calling ReplaceAllUses, add the new node to the 10554 // worklist explicitly as well. 10555 AddToWorklist(Load.getNode()); 10556 AddUsersToWorklist(Load.getNode()); // Add users too 10557 // Make sure to revisit this node to clean it up; it will usually be dead. 10558 AddToWorklist(EVE); 10559 ++OpsNarrowed; 10560 return SDValue(EVE, 0); 10561} 10562 10563SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { 10564 // (vextract (scalar_to_vector val, 0) -> val 10565 SDValue InVec = N->getOperand(0); 10566 EVT VT = InVec.getValueType(); 10567 EVT NVT = N->getValueType(0); 10568 10569 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { 10570 // Check if the result type doesn't match the inserted element type. A 10571 // SCALAR_TO_VECTOR may truncate the inserted element and the 10572 // EXTRACT_VECTOR_ELT may widen the extracted vector. 10573 SDValue InOp = InVec.getOperand(0); 10574 if (InOp.getValueType() != NVT) { 10575 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 10576 return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); 10577 } 10578 return InOp; 10579 } 10580 10581 SDValue EltNo = N->getOperand(1); 10582 bool ConstEltNo = isa<ConstantSDNode>(EltNo); 10583 10584 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. 10585 // We only perform this optimization before the op legalization phase because 10586 // we may introduce new vector instructions which are not backed by TD 10587 // patterns. For example on AVX, extracting elements from a wide vector 10588 // without using extract_subvector. However, if we can find an underlying 10589 // scalar value, then we can always use that. 10590 if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE 10591 && ConstEltNo) { 10592 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 10593 int NumElem = VT.getVectorNumElements(); 10594 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); 10595 // Find the new index to extract from. 10596 int OrigElt = SVOp->getMaskElt(Elt); 10597 10598 // Extracting an undef index is undef. 10599 if (OrigElt == -1) 10600 return DAG.getUNDEF(NVT); 10601 10602 // Select the right vector half to extract from. 10603 SDValue SVInVec; 10604 if (OrigElt < NumElem) { 10605 SVInVec = InVec->getOperand(0); 10606 } else { 10607 SVInVec = InVec->getOperand(1); 10608 OrigElt -= NumElem; 10609 } 10610 10611 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { 10612 SDValue InOp = SVInVec.getOperand(OrigElt); 10613 if (InOp.getValueType() != NVT) { 10614 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 10615 InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); 10616 } 10617 10618 return InOp; 10619 } 10620 10621 // FIXME: We should handle recursing on other vector shuffles and 10622 // scalar_to_vector here as well. 10623 10624 if (!LegalOperations) { 10625 EVT IndexTy = TLI.getVectorIdxTy(); 10626 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, 10627 SVInVec, DAG.getConstant(OrigElt, IndexTy)); 10628 } 10629 } 10630 10631 bool BCNumEltsChanged = false; 10632 EVT ExtVT = VT.getVectorElementType(); 10633 EVT LVT = ExtVT; 10634 10635 // If the result of load has to be truncated, then it's not necessarily 10636 // profitable. 10637 if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) 10638 return SDValue(); 10639 10640 if (InVec.getOpcode() == ISD::BITCAST) { 10641 // Don't duplicate a load with other uses. 10642 if (!InVec.hasOneUse()) 10643 return SDValue(); 10644 10645 EVT BCVT = InVec.getOperand(0).getValueType(); 10646 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) 10647 return SDValue(); 10648 if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) 10649 BCNumEltsChanged = true; 10650 InVec = InVec.getOperand(0); 10651 ExtVT = BCVT.getVectorElementType(); 10652 } 10653 10654 // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) 10655 if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && 10656 ISD::isNormalLoad(InVec.getNode()) && 10657 !N->getOperand(1)->hasPredecessor(InVec.getNode())) { 10658 SDValue Index = N->getOperand(1); 10659 if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) 10660 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, 10661 OrigLoad); 10662 } 10663 10664 // Perform only after legalization to ensure build_vector / vector_shuffle 10665 // optimizations have already been done. 10666 if (!LegalOperations) return SDValue(); 10667 10668 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) 10669 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) 10670 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) 10671 10672 if (ConstEltNo) { 10673 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 10674 10675 LoadSDNode *LN0 = nullptr; 10676 const ShuffleVectorSDNode *SVN = nullptr; 10677 if (ISD::isNormalLoad(InVec.getNode())) { 10678 LN0 = cast<LoadSDNode>(InVec); 10679 } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && 10680 InVec.getOperand(0).getValueType() == ExtVT && 10681 ISD::isNormalLoad(InVec.getOperand(0).getNode())) { 10682 // Don't duplicate a load with other uses. 10683 if (!InVec.hasOneUse()) 10684 return SDValue(); 10685 10686 LN0 = cast<LoadSDNode>(InVec.getOperand(0)); 10687 } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { 10688 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) 10689 // => 10690 // (load $addr+1*size) 10691 10692 // Don't duplicate a load with other uses. 10693 if (!InVec.hasOneUse()) 10694 return SDValue(); 10695 10696 // If the bit convert changed the number of elements, it is unsafe 10697 // to examine the mask. 10698 if (BCNumEltsChanged) 10699 return SDValue(); 10700 10701 // Select the input vector, guarding against out of range extract vector. 10702 unsigned NumElems = VT.getVectorNumElements(); 10703 int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); 10704 InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); 10705 10706 if (InVec.getOpcode() == ISD::BITCAST) { 10707 // Don't duplicate a load with other uses. 10708 if (!InVec.hasOneUse()) 10709 return SDValue(); 10710 10711 InVec = InVec.getOperand(0); 10712 } 10713 if (ISD::isNormalLoad(InVec.getNode())) { 10714 LN0 = cast<LoadSDNode>(InVec); 10715 Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; 10716 EltNo = DAG.getConstant(Elt, EltNo.getValueType()); 10717 } 10718 } 10719 10720 // Make sure we found a non-volatile load and the extractelement is 10721 // the only use. 10722 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) 10723 return SDValue(); 10724 10725 // If Idx was -1 above, Elt is going to be -1, so just return undef. 10726 if (Elt == -1) 10727 return DAG.getUNDEF(LVT); 10728 10729 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); 10730 } 10731 10732 return SDValue(); 10733} 10734 10735// Simplify (build_vec (ext )) to (bitcast (build_vec )) 10736SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { 10737 // We perform this optimization post type-legalization because 10738 // the type-legalizer often scalarizes integer-promoted vectors. 10739 // Performing this optimization before may create bit-casts which 10740 // will be type-legalized to complex code sequences. 10741 // We perform this optimization only before the operation legalizer because we 10742 // may introduce illegal operations. 10743 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) 10744 return SDValue(); 10745 10746 unsigned NumInScalars = N->getNumOperands(); 10747 SDLoc dl(N); 10748 EVT VT = N->getValueType(0); 10749 10750 // Check to see if this is a BUILD_VECTOR of a bunch of values 10751 // which come from any_extend or zero_extend nodes. If so, we can create 10752 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR 10753 // optimizations. We do not handle sign-extend because we can't fill the sign 10754 // using shuffles. 10755 EVT SourceType = MVT::Other; 10756 bool AllAnyExt = true; 10757 10758 for (unsigned i = 0; i != NumInScalars; ++i) { 10759 SDValue In = N->getOperand(i); 10760 // Ignore undef inputs. 10761 if (In.getOpcode() == ISD::UNDEF) continue; 10762 10763 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; 10764 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; 10765 10766 // Abort if the element is not an extension. 10767 if (!ZeroExt && !AnyExt) { 10768 SourceType = MVT::Other; 10769 break; 10770 } 10771 10772 // The input is a ZeroExt or AnyExt. Check the original type. 10773 EVT InTy = In.getOperand(0).getValueType(); 10774 10775 // Check that all of the widened source types are the same. 10776 if (SourceType == MVT::Other) 10777 // First time. 10778 SourceType = InTy; 10779 else if (InTy != SourceType) { 10780 // Multiple income types. Abort. 10781 SourceType = MVT::Other; 10782 break; 10783 } 10784 10785 // Check if all of the extends are ANY_EXTENDs. 10786 AllAnyExt &= AnyExt; 10787 } 10788 10789 // In order to have valid types, all of the inputs must be extended from the 10790 // same source type and all of the inputs must be any or zero extend. 10791 // Scalar sizes must be a power of two. 10792 EVT OutScalarTy = VT.getScalarType(); 10793 bool ValidTypes = SourceType != MVT::Other && 10794 isPowerOf2_32(OutScalarTy.getSizeInBits()) && 10795 isPowerOf2_32(SourceType.getSizeInBits()); 10796 10797 // Create a new simpler BUILD_VECTOR sequence which other optimizations can 10798 // turn into a single shuffle instruction. 10799 if (!ValidTypes) 10800 return SDValue(); 10801 10802 bool isLE = TLI.isLittleEndian(); 10803 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); 10804 assert(ElemRatio > 1 && "Invalid element size ratio"); 10805 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): 10806 DAG.getConstant(0, SourceType); 10807 10808 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); 10809 SmallVector<SDValue, 8> Ops(NewBVElems, Filler); 10810 10811 // Populate the new build_vector 10812 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 10813 SDValue Cast = N->getOperand(i); 10814 assert((Cast.getOpcode() == ISD::ANY_EXTEND || 10815 Cast.getOpcode() == ISD::ZERO_EXTEND || 10816 Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); 10817 SDValue In; 10818 if (Cast.getOpcode() == ISD::UNDEF) 10819 In = DAG.getUNDEF(SourceType); 10820 else 10821 In = Cast->getOperand(0); 10822 unsigned Index = isLE ? (i * ElemRatio) : 10823 (i * ElemRatio + (ElemRatio - 1)); 10824 10825 assert(Index < Ops.size() && "Invalid index"); 10826 Ops[Index] = In; 10827 } 10828 10829 // The type of the new BUILD_VECTOR node. 10830 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); 10831 assert(VecVT.getSizeInBits() == VT.getSizeInBits() && 10832 "Invalid vector size"); 10833 // Check if the new vector type is legal. 10834 if (!isTypeLegal(VecVT)) return SDValue(); 10835 10836 // Make the new BUILD_VECTOR. 10837 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); 10838 10839 // The new BUILD_VECTOR node has the potential to be further optimized. 10840 AddToWorklist(BV.getNode()); 10841 // Bitcast to the desired type. 10842 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 10843} 10844 10845SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { 10846 EVT VT = N->getValueType(0); 10847 10848 unsigned NumInScalars = N->getNumOperands(); 10849 SDLoc dl(N); 10850 10851 EVT SrcVT = MVT::Other; 10852 unsigned Opcode = ISD::DELETED_NODE; 10853 unsigned NumDefs = 0; 10854 10855 for (unsigned i = 0; i != NumInScalars; ++i) { 10856 SDValue In = N->getOperand(i); 10857 unsigned Opc = In.getOpcode(); 10858 10859 if (Opc == ISD::UNDEF) 10860 continue; 10861 10862 // If all scalar values are floats and converted from integers. 10863 if (Opcode == ISD::DELETED_NODE && 10864 (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { 10865 Opcode = Opc; 10866 } 10867 10868 if (Opc != Opcode) 10869 return SDValue(); 10870 10871 EVT InVT = In.getOperand(0).getValueType(); 10872 10873 // If all scalar values are typed differently, bail out. It's chosen to 10874 // simplify BUILD_VECTOR of integer types. 10875 if (SrcVT == MVT::Other) 10876 SrcVT = InVT; 10877 if (SrcVT != InVT) 10878 return SDValue(); 10879 NumDefs++; 10880 } 10881 10882 // If the vector has just one element defined, it's not worth to fold it into 10883 // a vectorized one. 10884 if (NumDefs < 2) 10885 return SDValue(); 10886 10887 assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) 10888 && "Should only handle conversion from integer to float."); 10889 assert(SrcVT != MVT::Other && "Cannot determine source type!"); 10890 10891 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); 10892 10893 if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) 10894 return SDValue(); 10895 10896 SmallVector<SDValue, 8> Opnds; 10897 for (unsigned i = 0; i != NumInScalars; ++i) { 10898 SDValue In = N->getOperand(i); 10899 10900 if (In.getOpcode() == ISD::UNDEF) 10901 Opnds.push_back(DAG.getUNDEF(SrcVT)); 10902 else 10903 Opnds.push_back(In.getOperand(0)); 10904 } 10905 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds); 10906 AddToWorklist(BV.getNode()); 10907 10908 return DAG.getNode(Opcode, dl, VT, BV); 10909} 10910 10911SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { 10912 unsigned NumInScalars = N->getNumOperands(); 10913 SDLoc dl(N); 10914 EVT VT = N->getValueType(0); 10915 10916 // A vector built entirely of undefs is undef. 10917 if (ISD::allOperandsUndef(N)) 10918 return DAG.getUNDEF(VT); 10919 10920 SDValue V = reduceBuildVecExtToExtBuildVec(N); 10921 if (V.getNode()) 10922 return V; 10923 10924 V = reduceBuildVecConvertToConvertBuildVec(N); 10925 if (V.getNode()) 10926 return V; 10927 10928 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT 10929 // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from 10930 // at most two distinct vectors, turn this into a shuffle node. 10931 10932 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. 10933 if (!isTypeLegal(VT)) 10934 return SDValue(); 10935 10936 // May only combine to shuffle after legalize if shuffle is legal. 10937 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) 10938 return SDValue(); 10939 10940 SDValue VecIn1, VecIn2; 10941 bool UsesZeroVector = false; 10942 for (unsigned i = 0; i != NumInScalars; ++i) { 10943 SDValue Op = N->getOperand(i); 10944 // Ignore undef inputs. 10945 if (Op.getOpcode() == ISD::UNDEF) continue; 10946 10947 // See if we can combine this build_vector into a blend with a zero vector. 10948 if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant && 10949 cast<ConstantSDNode>(Op.getNode())->isNullValue()) || 10950 (Op.getOpcode() == ISD::ConstantFP && 10951 cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) { 10952 UsesZeroVector = true; 10953 continue; 10954 } 10955 10956 // If this input is something other than a EXTRACT_VECTOR_ELT with a 10957 // constant index, bail out. 10958 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 10959 !isa<ConstantSDNode>(Op.getOperand(1))) { 10960 VecIn1 = VecIn2 = SDValue(nullptr, 0); 10961 break; 10962 } 10963 10964 // We allow up to two distinct input vectors. 10965 SDValue ExtractedFromVec = Op.getOperand(0); 10966 if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) 10967 continue; 10968 10969 if (!VecIn1.getNode()) { 10970 VecIn1 = ExtractedFromVec; 10971 } else if (!VecIn2.getNode() && !UsesZeroVector) { 10972 VecIn2 = ExtractedFromVec; 10973 } else { 10974 // Too many inputs. 10975 VecIn1 = VecIn2 = SDValue(nullptr, 0); 10976 break; 10977 } 10978 } 10979 10980 // If everything is good, we can make a shuffle operation. 10981 if (VecIn1.getNode()) { 10982 unsigned InNumElements = VecIn1.getValueType().getVectorNumElements(); 10983 SmallVector<int, 8> Mask; 10984 for (unsigned i = 0; i != NumInScalars; ++i) { 10985 unsigned Opcode = N->getOperand(i).getOpcode(); 10986 if (Opcode == ISD::UNDEF) { 10987 Mask.push_back(-1); 10988 continue; 10989 } 10990 10991 // Operands can also be zero. 10992 if (Opcode != ISD::EXTRACT_VECTOR_ELT) { 10993 assert(UsesZeroVector && 10994 (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) && 10995 "Unexpected node found!"); 10996 Mask.push_back(NumInScalars+i); 10997 continue; 10998 } 10999 11000 // If extracting from the first vector, just use the index directly. 11001 SDValue Extract = N->getOperand(i); 11002 SDValue ExtVal = Extract.getOperand(1); 11003 unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 11004 if (Extract.getOperand(0) == VecIn1) { 11005 Mask.push_back(ExtIndex); 11006 continue; 11007 } 11008 11009 // Otherwise, use InIdx + InputVecSize 11010 Mask.push_back(InNumElements + ExtIndex); 11011 } 11012 11013 // Avoid introducing illegal shuffles with zero. 11014 if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT)) 11015 return SDValue(); 11016 11017 // We can't generate a shuffle node with mismatched input and output types. 11018 // Attempt to transform a single input vector to the correct type. 11019 if ((VT != VecIn1.getValueType())) { 11020 // If the input vector type has a different base type to the output 11021 // vector type, bail out. 11022 EVT VTElemType = VT.getVectorElementType(); 11023 if ((VecIn1.getValueType().getVectorElementType() != VTElemType) || 11024 (VecIn2.getNode() && 11025 (VecIn2.getValueType().getVectorElementType() != VTElemType))) 11026 return SDValue(); 11027 11028 // If the input vector is too small, widen it. 11029 // We only support widening of vectors which are half the size of the 11030 // output registers. For example XMM->YMM widening on X86 with AVX. 11031 EVT VecInT = VecIn1.getValueType(); 11032 if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) { 11033 // If we only have one small input, widen it by adding undef values. 11034 if (!VecIn2.getNode()) 11035 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, 11036 DAG.getUNDEF(VecIn1.getValueType())); 11037 else if (VecIn1.getValueType() == VecIn2.getValueType()) { 11038 // If we have two small inputs of the same type, try to concat them. 11039 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2); 11040 VecIn2 = SDValue(nullptr, 0); 11041 } else 11042 return SDValue(); 11043 } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { 11044 // If the input vector is too large, try to split it. 11045 // We don't support having two input vectors that are too large.
| 2791 } 2792 } 2793 2794 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 2795 // actually legal and isn't going to get expanded, else this is a false 2796 // optimisation. 2797 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 2798 Load->getValueType(0), 2799 Load->getMemoryVT()); 2800 2801 // Resize the constant to the same size as the original memory access before 2802 // extension. If it is still the AllOnesValue then this AND is completely 2803 // unneeded. 2804 Constant = 2805 Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); 2806 2807 bool B; 2808 switch (Load->getExtensionType()) { 2809 default: B = false; break; 2810 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 2811 case ISD::ZEXTLOAD: 2812 case ISD::NON_EXTLOAD: B = true; break; 2813 } 2814 2815 if (B && Constant.isAllOnesValue()) { 2816 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 2817 // preserve semantics once we get rid of the AND. 2818 SDValue NewLoad(Load, 0); 2819 if (Load->getExtensionType() == ISD::EXTLOAD) { 2820 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 2821 Load->getValueType(0), SDLoc(Load), 2822 Load->getChain(), Load->getBasePtr(), 2823 Load->getOffset(), Load->getMemoryVT(), 2824 Load->getMemOperand()); 2825 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 2826 if (Load->getNumValues() == 3) { 2827 // PRE/POST_INC loads have 3 values. 2828 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 2829 NewLoad.getValue(2) }; 2830 CombineTo(Load, To, 3, true); 2831 } else { 2832 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 2833 } 2834 } 2835 2836 // Fold the AND away, taking care not to fold to the old load node if we 2837 // replaced it. 2838 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 2839 2840 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2841 } 2842 } 2843 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 2844 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2845 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2846 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2847 2848 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2849 LL.getValueType().isInteger()) { 2850 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 2851 if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { 2852 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2853 LR.getValueType(), LL, RL); 2854 AddToWorklist(ORNode.getNode()); 2855 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 2856 } 2857 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 2858 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { 2859 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), 2860 LR.getValueType(), LL, RL); 2861 AddToWorklist(ANDNode.getNode()); 2862 return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); 2863 } 2864 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 2865 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { 2866 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2867 LR.getValueType(), LL, RL); 2868 AddToWorklist(ORNode.getNode()); 2869 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 2870 } 2871 } 2872 // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) 2873 if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && 2874 Op0 == Op1 && LL.getValueType().isInteger() && 2875 Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && 2876 cast<ConstantSDNode>(RR)->isAllOnesValue()) || 2877 (cast<ConstantSDNode>(LR)->isAllOnesValue() && 2878 cast<ConstantSDNode>(RR)->isNullValue()))) { 2879 SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), 2880 LL, DAG.getConstant(1, LL.getValueType())); 2881 AddToWorklist(ADDNode.getNode()); 2882 return DAG.getSetCC(SDLoc(N), VT, ADDNode, 2883 DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); 2884 } 2885 // canonicalize equivalent to ll == rl 2886 if (LL == RR && LR == RL) { 2887 Op1 = ISD::getSetCCSwappedOperands(Op1); 2888 std::swap(RL, RR); 2889 } 2890 if (LL == RL && LR == RR) { 2891 bool isInteger = LL.getValueType().isInteger(); 2892 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 2893 if (Result != ISD::SETCC_INVALID && 2894 (!LegalOperations || 2895 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 2896 TLI.isOperationLegal(ISD::SETCC, 2897 getSetCCResultType(N0.getSimpleValueType()))))) 2898 return DAG.getSetCC(SDLoc(N), N0.getValueType(), 2899 LL, LR, Result); 2900 } 2901 } 2902 2903 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 2904 if (N0.getOpcode() == N1.getOpcode()) { 2905 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 2906 if (Tmp.getNode()) return Tmp; 2907 } 2908 2909 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 2910 // fold (and (sra)) -> (and (srl)) when possible. 2911 if (!VT.isVector() && 2912 SimplifyDemandedBits(SDValue(N, 0))) 2913 return SDValue(N, 0); 2914 2915 // fold (zext_inreg (extload x)) -> (zextload x) 2916 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 2917 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 2918 EVT MemVT = LN0->getMemoryVT(); 2919 // If we zero all the possible extended bits, then we can turn this into 2920 // a zextload if we are running before legalize or the operation is legal. 2921 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2922 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2923 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2924 ((!LegalOperations && !LN0->isVolatile()) || 2925 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 2926 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 2927 LN0->getChain(), LN0->getBasePtr(), 2928 MemVT, LN0->getMemOperand()); 2929 AddToWorklist(N); 2930 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2931 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2932 } 2933 } 2934 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 2935 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 2936 N0.hasOneUse()) { 2937 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 2938 EVT MemVT = LN0->getMemoryVT(); 2939 // If we zero all the possible extended bits, then we can turn this into 2940 // a zextload if we are running before legalize or the operation is legal. 2941 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2942 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2943 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2944 ((!LegalOperations && !LN0->isVolatile()) || 2945 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 2946 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 2947 LN0->getChain(), LN0->getBasePtr(), 2948 MemVT, LN0->getMemOperand()); 2949 AddToWorklist(N); 2950 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2951 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2952 } 2953 } 2954 2955 // fold (and (load x), 255) -> (zextload x, i8) 2956 // fold (and (extload x, i16), 255) -> (zextload x, i8) 2957 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 2958 if (N1C && (N0.getOpcode() == ISD::LOAD || 2959 (N0.getOpcode() == ISD::ANY_EXTEND && 2960 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 2961 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; 2962 LoadSDNode *LN0 = HasAnyExt 2963 ? cast<LoadSDNode>(N0.getOperand(0)) 2964 : cast<LoadSDNode>(N0); 2965 if (LN0->getExtensionType() != ISD::SEXTLOAD && 2966 LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { 2967 uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); 2968 if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ 2969 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 2970 EVT LoadedVT = LN0->getMemoryVT(); 2971 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2972 2973 if (ExtVT == LoadedVT && 2974 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, 2975 ExtVT))) { 2976 2977 SDValue NewLoad = 2978 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 2979 LN0->getChain(), LN0->getBasePtr(), ExtVT, 2980 LN0->getMemOperand()); 2981 AddToWorklist(N); 2982 CombineTo(LN0, NewLoad, NewLoad.getValue(1)); 2983 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2984 } 2985 2986 // Do not change the width of a volatile load. 2987 // Do not generate loads of non-round integer types since these can 2988 // be expensive (and would be wrong if the type is not byte sized). 2989 if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && 2990 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, 2991 ExtVT))) { 2992 EVT PtrType = LN0->getOperand(1).getValueType(); 2993 2994 unsigned Alignment = LN0->getAlignment(); 2995 SDValue NewPtr = LN0->getBasePtr(); 2996 2997 // For big endian targets, we need to add an offset to the pointer 2998 // to load the correct bytes. For little endian systems, we merely 2999 // need to read fewer bytes from the same pointer. 3000 if (TLI.isBigEndian()) { 3001 unsigned LVTStoreBytes = LoadedVT.getStoreSize(); 3002 unsigned EVTStoreBytes = ExtVT.getStoreSize(); 3003 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; 3004 NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, 3005 NewPtr, DAG.getConstant(PtrOff, PtrType)); 3006 Alignment = MinAlign(Alignment, PtrOff); 3007 } 3008 3009 AddToWorklist(NewPtr.getNode()); 3010 3011 SDValue Load = 3012 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 3013 LN0->getChain(), NewPtr, 3014 LN0->getPointerInfo(), 3015 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 3016 LN0->isInvariant(), Alignment, LN0->getAAInfo()); 3017 AddToWorklist(N); 3018 CombineTo(LN0, Load, Load.getValue(1)); 3019 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3020 } 3021 } 3022 } 3023 } 3024 3025 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 3026 VT.getSizeInBits() <= 64) { 3027 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 3028 APInt ADDC = ADDI->getAPIntValue(); 3029 if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 3030 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 3031 // immediate for an add, but it is legal if its top c2 bits are set, 3032 // transform the ADD so the immediate doesn't need to be materialized 3033 // in a register. 3034 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 3035 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 3036 SRLI->getZExtValue()); 3037 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 3038 ADDC |= Mask; 3039 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 3040 SDValue NewAdd = 3041 DAG.getNode(ISD::ADD, SDLoc(N0), VT, 3042 N0.getOperand(0), DAG.getConstant(ADDC, VT)); 3043 CombineTo(N0.getNode(), NewAdd); 3044 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3045 } 3046 } 3047 } 3048 } 3049 } 3050 } 3051 3052 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) 3053 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { 3054 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 3055 N0.getOperand(1), false); 3056 if (BSwap.getNode()) 3057 return BSwap; 3058 } 3059 3060 return SDValue(); 3061} 3062 3063/// Match (a >> 8) | (a << 8) as (bswap a) >> 16. 3064SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 3065 bool DemandHighBits) { 3066 if (!LegalOperations) 3067 return SDValue(); 3068 3069 EVT VT = N->getValueType(0); 3070 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 3071 return SDValue(); 3072 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3073 return SDValue(); 3074 3075 // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) 3076 bool LookPassAnd0 = false; 3077 bool LookPassAnd1 = false; 3078 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 3079 std::swap(N0, N1); 3080 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 3081 std::swap(N0, N1); 3082 if (N0.getOpcode() == ISD::AND) { 3083 if (!N0.getNode()->hasOneUse()) 3084 return SDValue(); 3085 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3086 if (!N01C || N01C->getZExtValue() != 0xFF00) 3087 return SDValue(); 3088 N0 = N0.getOperand(0); 3089 LookPassAnd0 = true; 3090 } 3091 3092 if (N1.getOpcode() == ISD::AND) { 3093 if (!N1.getNode()->hasOneUse()) 3094 return SDValue(); 3095 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3096 if (!N11C || N11C->getZExtValue() != 0xFF) 3097 return SDValue(); 3098 N1 = N1.getOperand(0); 3099 LookPassAnd1 = true; 3100 } 3101 3102 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 3103 std::swap(N0, N1); 3104 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 3105 return SDValue(); 3106 if (!N0.getNode()->hasOneUse() || 3107 !N1.getNode()->hasOneUse()) 3108 return SDValue(); 3109 3110 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3111 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3112 if (!N01C || !N11C) 3113 return SDValue(); 3114 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) 3115 return SDValue(); 3116 3117 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 3118 SDValue N00 = N0->getOperand(0); 3119 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 3120 if (!N00.getNode()->hasOneUse()) 3121 return SDValue(); 3122 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 3123 if (!N001C || N001C->getZExtValue() != 0xFF) 3124 return SDValue(); 3125 N00 = N00.getOperand(0); 3126 LookPassAnd0 = true; 3127 } 3128 3129 SDValue N10 = N1->getOperand(0); 3130 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 3131 if (!N10.getNode()->hasOneUse()) 3132 return SDValue(); 3133 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 3134 if (!N101C || N101C->getZExtValue() != 0xFF00) 3135 return SDValue(); 3136 N10 = N10.getOperand(0); 3137 LookPassAnd1 = true; 3138 } 3139 3140 if (N00 != N10) 3141 return SDValue(); 3142 3143 // Make sure everything beyond the low halfword gets set to zero since the SRL 3144 // 16 will clear the top bits. 3145 unsigned OpSizeInBits = VT.getSizeInBits(); 3146 if (DemandHighBits && OpSizeInBits > 16) { 3147 // If the left-shift isn't masked out then the only way this is a bswap is 3148 // if all bits beyond the low 8 are 0. In that case the entire pattern 3149 // reduces to a left shift anyway: leave it for other parts of the combiner. 3150 if (!LookPassAnd0) 3151 return SDValue(); 3152 3153 // However, if the right shift isn't masked out then it might be because 3154 // it's not needed. See if we can spot that too. 3155 if (!LookPassAnd1 && 3156 !DAG.MaskedValueIsZero( 3157 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) 3158 return SDValue(); 3159 } 3160 3161 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); 3162 if (OpSizeInBits > 16) 3163 Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res, 3164 DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); 3165 return Res; 3166} 3167 3168/// Return true if the specified node is an element that makes up a 32-bit 3169/// packed halfword byteswap. 3170/// ((x & 0x000000ff) << 8) | 3171/// ((x & 0x0000ff00) >> 8) | 3172/// ((x & 0x00ff0000) << 8) | 3173/// ((x & 0xff000000) >> 8) 3174static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { 3175 if (!N.getNode()->hasOneUse()) 3176 return false; 3177 3178 unsigned Opc = N.getOpcode(); 3179 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 3180 return false; 3181 3182 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3183 if (!N1C) 3184 return false; 3185 3186 unsigned Num; 3187 switch (N1C->getZExtValue()) { 3188 default: 3189 return false; 3190 case 0xFF: Num = 0; break; 3191 case 0xFF00: Num = 1; break; 3192 case 0xFF0000: Num = 2; break; 3193 case 0xFF000000: Num = 3; break; 3194 } 3195 3196 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 3197 SDValue N0 = N.getOperand(0); 3198 if (Opc == ISD::AND) { 3199 if (Num == 0 || Num == 2) { 3200 // (x >> 8) & 0xff 3201 // (x >> 8) & 0xff0000 3202 if (N0.getOpcode() != ISD::SRL) 3203 return false; 3204 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3205 if (!C || C->getZExtValue() != 8) 3206 return false; 3207 } else { 3208 // (x << 8) & 0xff00 3209 // (x << 8) & 0xff000000 3210 if (N0.getOpcode() != ISD::SHL) 3211 return false; 3212 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3213 if (!C || C->getZExtValue() != 8) 3214 return false; 3215 } 3216 } else if (Opc == ISD::SHL) { 3217 // (x & 0xff) << 8 3218 // (x & 0xff0000) << 8 3219 if (Num != 0 && Num != 2) 3220 return false; 3221 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3222 if (!C || C->getZExtValue() != 8) 3223 return false; 3224 } else { // Opc == ISD::SRL 3225 // (x & 0xff00) >> 8 3226 // (x & 0xff000000) >> 8 3227 if (Num != 1 && Num != 3) 3228 return false; 3229 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3230 if (!C || C->getZExtValue() != 8) 3231 return false; 3232 } 3233 3234 if (Parts[Num]) 3235 return false; 3236 3237 Parts[Num] = N0.getOperand(0).getNode(); 3238 return true; 3239} 3240 3241/// Match a 32-bit packed halfword bswap. That is 3242/// ((x & 0x000000ff) << 8) | 3243/// ((x & 0x0000ff00) >> 8) | 3244/// ((x & 0x00ff0000) << 8) | 3245/// ((x & 0xff000000) >> 8) 3246/// => (rotl (bswap x), 16) 3247SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { 3248 if (!LegalOperations) 3249 return SDValue(); 3250 3251 EVT VT = N->getValueType(0); 3252 if (VT != MVT::i32) 3253 return SDValue(); 3254 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3255 return SDValue(); 3256 3257 // Look for either 3258 // (or (or (and), (and)), (or (and), (and))) 3259 // (or (or (or (and), (and)), (and)), (and)) 3260 if (N0.getOpcode() != ISD::OR) 3261 return SDValue(); 3262 SDValue N00 = N0.getOperand(0); 3263 SDValue N01 = N0.getOperand(1); 3264 SDNode *Parts[4] = {}; 3265 3266 if (N1.getOpcode() == ISD::OR && 3267 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { 3268 // (or (or (and), (and)), (or (and), (and))) 3269 SDValue N000 = N00.getOperand(0); 3270 if (!isBSwapHWordElement(N000, Parts)) 3271 return SDValue(); 3272 3273 SDValue N001 = N00.getOperand(1); 3274 if (!isBSwapHWordElement(N001, Parts)) 3275 return SDValue(); 3276 SDValue N010 = N01.getOperand(0); 3277 if (!isBSwapHWordElement(N010, Parts)) 3278 return SDValue(); 3279 SDValue N011 = N01.getOperand(1); 3280 if (!isBSwapHWordElement(N011, Parts)) 3281 return SDValue(); 3282 } else { 3283 // (or (or (or (and), (and)), (and)), (and)) 3284 if (!isBSwapHWordElement(N1, Parts)) 3285 return SDValue(); 3286 if (!isBSwapHWordElement(N01, Parts)) 3287 return SDValue(); 3288 if (N00.getOpcode() != ISD::OR) 3289 return SDValue(); 3290 SDValue N000 = N00.getOperand(0); 3291 if (!isBSwapHWordElement(N000, Parts)) 3292 return SDValue(); 3293 SDValue N001 = N00.getOperand(1); 3294 if (!isBSwapHWordElement(N001, Parts)) 3295 return SDValue(); 3296 } 3297 3298 // Make sure the parts are all coming from the same node. 3299 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) 3300 return SDValue(); 3301 3302 SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, 3303 SDValue(Parts[0],0)); 3304 3305 // Result of the bswap should be rotated by 16. If it's not legal, then 3306 // do (x << 16) | (x >> 16). 3307 SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); 3308 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 3309 return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt); 3310 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 3311 return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt); 3312 return DAG.getNode(ISD::OR, SDLoc(N), VT, 3313 DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt), 3314 DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); 3315} 3316 3317SDValue DAGCombiner::visitOR(SDNode *N) { 3318 SDValue N0 = N->getOperand(0); 3319 SDValue N1 = N->getOperand(1); 3320 SDValue LL, LR, RL, RR, CC0, CC1; 3321 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3322 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3323 EVT VT = N1.getValueType(); 3324 3325 // fold vector ops 3326 if (VT.isVector()) { 3327 SDValue FoldedVOp = SimplifyVBinOp(N); 3328 if (FoldedVOp.getNode()) return FoldedVOp; 3329 3330 // fold (or x, 0) -> x, vector edition 3331 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3332 return N1; 3333 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3334 return N0; 3335 3336 // fold (or x, -1) -> -1, vector edition 3337 if (ISD::isBuildVectorAllOnes(N0.getNode())) 3338 // do not return N0, because undef node may exist in N0 3339 return DAG.getConstant( 3340 APInt::getAllOnesValue( 3341 N0.getValueType().getScalarType().getSizeInBits()), 3342 N0.getValueType()); 3343 if (ISD::isBuildVectorAllOnes(N1.getNode())) 3344 // do not return N1, because undef node may exist in N1 3345 return DAG.getConstant( 3346 APInt::getAllOnesValue( 3347 N1.getValueType().getScalarType().getSizeInBits()), 3348 N1.getValueType()); 3349 3350 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) 3351 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) 3352 // Do this only if the resulting shuffle is legal. 3353 if (isa<ShuffleVectorSDNode>(N0) && 3354 isa<ShuffleVectorSDNode>(N1) && 3355 // Avoid folding a node with illegal type. 3356 TLI.isTypeLegal(VT) && 3357 N0->getOperand(1) == N1->getOperand(1) && 3358 ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { 3359 bool CanFold = true; 3360 unsigned NumElts = VT.getVectorNumElements(); 3361 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); 3362 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); 3363 // We construct two shuffle masks: 3364 // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand 3365 // and N1 as the second operand. 3366 // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand 3367 // and N0 as the second operand. 3368 // We do this because OR is commutable and therefore there might be 3369 // two ways to fold this node into a shuffle. 3370 SmallVector<int,4> Mask1; 3371 SmallVector<int,4> Mask2; 3372 3373 for (unsigned i = 0; i != NumElts && CanFold; ++i) { 3374 int M0 = SV0->getMaskElt(i); 3375 int M1 = SV1->getMaskElt(i); 3376 3377 // Both shuffle indexes are undef. Propagate Undef. 3378 if (M0 < 0 && M1 < 0) { 3379 Mask1.push_back(M0); 3380 Mask2.push_back(M0); 3381 continue; 3382 } 3383 3384 if (M0 < 0 || M1 < 0 || 3385 (M0 < (int)NumElts && M1 < (int)NumElts) || 3386 (M0 >= (int)NumElts && M1 >= (int)NumElts)) { 3387 CanFold = false; 3388 break; 3389 } 3390 3391 Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); 3392 Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); 3393 } 3394 3395 if (CanFold) { 3396 // Fold this sequence only if the resulting shuffle is 'legal'. 3397 if (TLI.isShuffleMaskLegal(Mask1, VT)) 3398 return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), 3399 N1->getOperand(0), &Mask1[0]); 3400 if (TLI.isShuffleMaskLegal(Mask2, VT)) 3401 return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), 3402 N0->getOperand(0), &Mask2[0]); 3403 } 3404 } 3405 } 3406 3407 // fold (or x, undef) -> -1 3408 if (!LegalOperations && 3409 (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { 3410 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; 3411 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); 3412 } 3413 // fold (or c1, c2) -> c1|c2 3414 if (N0C && N1C) 3415 return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); 3416 // canonicalize constant to RHS 3417 if (N0C && !N1C) 3418 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); 3419 // fold (or x, 0) -> x 3420 if (N1C && N1C->isNullValue()) 3421 return N0; 3422 // fold (or x, -1) -> -1 3423 if (N1C && N1C->isAllOnesValue()) 3424 return N1; 3425 // fold (or x, c) -> c iff (x & ~c) == 0 3426 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 3427 return N1; 3428 3429 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 3430 SDValue BSwap = MatchBSwapHWord(N, N0, N1); 3431 if (BSwap.getNode()) 3432 return BSwap; 3433 BSwap = MatchBSwapHWordLow(N, N0, N1); 3434 if (BSwap.getNode()) 3435 return BSwap; 3436 3437 // reassociate or 3438 SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); 3439 if (ROR.getNode()) 3440 return ROR; 3441 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 3442 // iff (c1 & c2) == 0. 3443 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3444 isa<ConstantSDNode>(N0.getOperand(1))) { 3445 ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); 3446 if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { 3447 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)) 3448 return DAG.getNode( 3449 ISD::AND, SDLoc(N), VT, 3450 DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); 3451 return SDValue(); 3452 } 3453 } 3454 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) 3455 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 3456 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 3457 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 3458 3459 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 3460 LL.getValueType().isInteger()) { 3461 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) 3462 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) 3463 if (cast<ConstantSDNode>(LR)->isNullValue() && 3464 (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { 3465 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), 3466 LR.getValueType(), LL, RL); 3467 AddToWorklist(ORNode.getNode()); 3468 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 3469 } 3470 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) 3471 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) 3472 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && 3473 (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { 3474 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), 3475 LR.getValueType(), LL, RL); 3476 AddToWorklist(ANDNode.getNode()); 3477 return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); 3478 } 3479 } 3480 // canonicalize equivalent to ll == rl 3481 if (LL == RR && LR == RL) { 3482 Op1 = ISD::getSetCCSwappedOperands(Op1); 3483 std::swap(RL, RR); 3484 } 3485 if (LL == RL && LR == RR) { 3486 bool isInteger = LL.getValueType().isInteger(); 3487 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); 3488 if (Result != ISD::SETCC_INVALID && 3489 (!LegalOperations || 3490 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 3491 TLI.isOperationLegal(ISD::SETCC, 3492 getSetCCResultType(N0.getValueType()))))) 3493 return DAG.getSetCC(SDLoc(N), N0.getValueType(), 3494 LL, LR, Result); 3495 } 3496 } 3497 3498 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 3499 if (N0.getOpcode() == N1.getOpcode()) { 3500 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3501 if (Tmp.getNode()) return Tmp; 3502 } 3503 3504 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 3505 if (N0.getOpcode() == ISD::AND && 3506 N1.getOpcode() == ISD::AND && 3507 N0.getOperand(1).getOpcode() == ISD::Constant && 3508 N1.getOperand(1).getOpcode() == ISD::Constant && 3509 // Don't increase # computations. 3510 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 3511 // We can only do this xform if we know that bits from X that are set in C2 3512 // but not in C1 are already zero. Likewise for Y. 3513 const APInt &LHSMask = 3514 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 3515 const APInt &RHSMask = 3516 cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); 3517 3518 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 3519 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 3520 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 3521 N0.getOperand(0), N1.getOperand(0)); 3522 return DAG.getNode(ISD::AND, SDLoc(N), VT, X, 3523 DAG.getConstant(LHSMask | RHSMask, VT)); 3524 } 3525 } 3526 3527 // See if this is some rotate idiom. 3528 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) 3529 return SDValue(Rot, 0); 3530 3531 // Simplify the operands using demanded-bits information. 3532 if (!VT.isVector() && 3533 SimplifyDemandedBits(SDValue(N, 0))) 3534 return SDValue(N, 0); 3535 3536 return SDValue(); 3537} 3538 3539/// Match "(X shl/srl V1) & V2" where V2 may not be present. 3540static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 3541 if (Op.getOpcode() == ISD::AND) { 3542 if (isa<ConstantSDNode>(Op.getOperand(1))) { 3543 Mask = Op.getOperand(1); 3544 Op = Op.getOperand(0); 3545 } else { 3546 return false; 3547 } 3548 } 3549 3550 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 3551 Shift = Op; 3552 return true; 3553 } 3554 3555 return false; 3556} 3557 3558// Return true if we can prove that, whenever Neg and Pos are both in the 3559// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that 3560// for two opposing shifts shift1 and shift2 and a value X with OpBits bits: 3561// 3562// (or (shift1 X, Neg), (shift2 X, Pos)) 3563// 3564// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate 3565// in direction shift1 by Neg. The range [0, OpSize) means that we only need 3566// to consider shift amounts with defined behavior. 3567static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) { 3568 // If OpSize is a power of 2 then: 3569 // 3570 // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1) 3571 // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize). 3572 // 3573 // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check 3574 // for the stronger condition: 3575 // 3576 // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A] 3577 // 3578 // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1) 3579 // we can just replace Neg with Neg' for the rest of the function. 3580 // 3581 // In other cases we check for the even stronger condition: 3582 // 3583 // Neg == OpSize - Pos [B] 3584 // 3585 // for all Neg and Pos. Note that the (or ...) then invokes undefined 3586 // behavior if Pos == 0 (and consequently Neg == OpSize). 3587 // 3588 // We could actually use [A] whenever OpSize is a power of 2, but the 3589 // only extra cases that it would match are those uninteresting ones 3590 // where Neg and Pos are never in range at the same time. E.g. for 3591 // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) 3592 // as well as (sub 32, Pos), but: 3593 // 3594 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) 3595 // 3596 // always invokes undefined behavior for 32-bit X. 3597 // 3598 // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise. 3599 unsigned MaskLoBits = 0; 3600 if (Neg.getOpcode() == ISD::AND && 3601 isPowerOf2_64(OpSize) && 3602 Neg.getOperand(1).getOpcode() == ISD::Constant && 3603 cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) { 3604 Neg = Neg.getOperand(0); 3605 MaskLoBits = Log2_64(OpSize); 3606 } 3607 3608 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. 3609 if (Neg.getOpcode() != ISD::SUB) 3610 return 0; 3611 ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0)); 3612 if (!NegC) 3613 return 0; 3614 SDValue NegOp1 = Neg.getOperand(1); 3615 3616 // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with 3617 // Pos'. The truncation is redundant for the purpose of the equality. 3618 if (MaskLoBits && 3619 Pos.getOpcode() == ISD::AND && 3620 Pos.getOperand(1).getOpcode() == ISD::Constant && 3621 cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1) 3622 Pos = Pos.getOperand(0); 3623 3624 // The condition we need is now: 3625 // 3626 // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask 3627 // 3628 // If NegOp1 == Pos then we need: 3629 // 3630 // OpSize & Mask == NegC & Mask 3631 // 3632 // (because "x & Mask" is a truncation and distributes through subtraction). 3633 APInt Width; 3634 if (Pos == NegOp1) 3635 Width = NegC->getAPIntValue(); 3636 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. 3637 // Then the condition we want to prove becomes: 3638 // 3639 // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask 3640 // 3641 // which, again because "x & Mask" is a truncation, becomes: 3642 // 3643 // NegC & Mask == (OpSize - PosC) & Mask 3644 // OpSize & Mask == (NegC + PosC) & Mask 3645 else if (Pos.getOpcode() == ISD::ADD && 3646 Pos.getOperand(0) == NegOp1 && 3647 Pos.getOperand(1).getOpcode() == ISD::Constant) 3648 Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() + 3649 NegC->getAPIntValue()); 3650 else 3651 return false; 3652 3653 // Now we just need to check that OpSize & Mask == Width & Mask. 3654 if (MaskLoBits) 3655 // Opsize & Mask is 0 since Mask is Opsize - 1. 3656 return Width.getLoBits(MaskLoBits) == 0; 3657 return Width == OpSize; 3658} 3659 3660// A subroutine of MatchRotate used once we have found an OR of two opposite 3661// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces 3662// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the 3663// former being preferred if supported. InnerPos and InnerNeg are Pos and 3664// Neg with outer conversions stripped away. 3665SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, 3666 SDValue Neg, SDValue InnerPos, 3667 SDValue InnerNeg, unsigned PosOpcode, 3668 unsigned NegOpcode, SDLoc DL) { 3669 // fold (or (shl x, (*ext y)), 3670 // (srl x, (*ext (sub 32, y)))) -> 3671 // (rotl x, y) or (rotr x, (sub 32, y)) 3672 // 3673 // fold (or (shl x, (*ext (sub 32, y))), 3674 // (srl x, (*ext y))) -> 3675 // (rotr x, y) or (rotl x, (sub 32, y)) 3676 EVT VT = Shifted.getValueType(); 3677 if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) { 3678 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); 3679 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, 3680 HasPos ? Pos : Neg).getNode(); 3681 } 3682 3683 return nullptr; 3684} 3685 3686// MatchRotate - Handle an 'or' of two operands. If this is one of the many 3687// idioms for rotate, and if the target supports rotation instructions, generate 3688// a rot[lr]. 3689SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { 3690 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 3691 EVT VT = LHS.getValueType(); 3692 if (!TLI.isTypeLegal(VT)) return nullptr; 3693 3694 // The target must have at least one rotate flavor. 3695 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); 3696 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); 3697 if (!HasROTL && !HasROTR) return nullptr; 3698 3699 // Match "(X shl/srl V1) & V2" where V2 may not be present. 3700 SDValue LHSShift; // The shift. 3701 SDValue LHSMask; // AND value if any. 3702 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 3703 return nullptr; // Not part of a rotate. 3704 3705 SDValue RHSShift; // The shift. 3706 SDValue RHSMask; // AND value if any. 3707 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 3708 return nullptr; // Not part of a rotate. 3709 3710 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 3711 return nullptr; // Not shifting the same value. 3712 3713 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 3714 return nullptr; // Shifts must disagree. 3715 3716 // Canonicalize shl to left side in a shl/srl pair. 3717 if (RHSShift.getOpcode() == ISD::SHL) { 3718 std::swap(LHS, RHS); 3719 std::swap(LHSShift, RHSShift); 3720 std::swap(LHSMask , RHSMask ); 3721 } 3722 3723 unsigned OpSizeInBits = VT.getSizeInBits(); 3724 SDValue LHSShiftArg = LHSShift.getOperand(0); 3725 SDValue LHSShiftAmt = LHSShift.getOperand(1); 3726 SDValue RHSShiftArg = RHSShift.getOperand(0); 3727 SDValue RHSShiftAmt = RHSShift.getOperand(1); 3728 3729 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 3730 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 3731 if (LHSShiftAmt.getOpcode() == ISD::Constant && 3732 RHSShiftAmt.getOpcode() == ISD::Constant) { 3733 uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); 3734 uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); 3735 if ((LShVal + RShVal) != OpSizeInBits) 3736 return nullptr; 3737 3738 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 3739 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); 3740 3741 // If there is an AND of either shifted operand, apply it to the result. 3742 if (LHSMask.getNode() || RHSMask.getNode()) { 3743 APInt Mask = APInt::getAllOnesValue(OpSizeInBits); 3744 3745 if (LHSMask.getNode()) { 3746 APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); 3747 Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; 3748 } 3749 if (RHSMask.getNode()) { 3750 APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); 3751 Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; 3752 } 3753 3754 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); 3755 } 3756 3757 return Rot.getNode(); 3758 } 3759 3760 // If there is a mask here, and we have a variable shift, we can't be sure 3761 // that we're masking out the right stuff. 3762 if (LHSMask.getNode() || RHSMask.getNode()) 3763 return nullptr; 3764 3765 // If the shift amount is sign/zext/any-extended just peel it off. 3766 SDValue LExtOp0 = LHSShiftAmt; 3767 SDValue RExtOp0 = RHSShiftAmt; 3768 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 3769 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 3770 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 3771 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 3772 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 3773 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 3774 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 3775 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 3776 LExtOp0 = LHSShiftAmt.getOperand(0); 3777 RExtOp0 = RHSShiftAmt.getOperand(0); 3778 } 3779 3780 SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, 3781 LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); 3782 if (TryL) 3783 return TryL; 3784 3785 SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, 3786 RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); 3787 if (TryR) 3788 return TryR; 3789 3790 return nullptr; 3791} 3792 3793SDValue DAGCombiner::visitXOR(SDNode *N) { 3794 SDValue N0 = N->getOperand(0); 3795 SDValue N1 = N->getOperand(1); 3796 SDValue LHS, RHS, CC; 3797 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3798 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3799 EVT VT = N0.getValueType(); 3800 3801 // fold vector ops 3802 if (VT.isVector()) { 3803 SDValue FoldedVOp = SimplifyVBinOp(N); 3804 if (FoldedVOp.getNode()) return FoldedVOp; 3805 3806 // fold (xor x, 0) -> x, vector edition 3807 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3808 return N1; 3809 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3810 return N0; 3811 } 3812 3813 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 3814 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 3815 return DAG.getConstant(0, VT); 3816 // fold (xor x, undef) -> undef 3817 if (N0.getOpcode() == ISD::UNDEF) 3818 return N0; 3819 if (N1.getOpcode() == ISD::UNDEF) 3820 return N1; 3821 // fold (xor c1, c2) -> c1^c2 3822 if (N0C && N1C) 3823 return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); 3824 // canonicalize constant to RHS 3825 if (N0C && !N1C) 3826 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 3827 // fold (xor x, 0) -> x 3828 if (N1C && N1C->isNullValue()) 3829 return N0; 3830 // reassociate xor 3831 SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); 3832 if (RXOR.getNode()) 3833 return RXOR; 3834 3835 // fold !(x cc y) -> (x !cc y) 3836 if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { 3837 bool isInt = LHS.getValueType().isInteger(); 3838 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 3839 isInt); 3840 3841 if (!LegalOperations || 3842 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { 3843 switch (N0.getOpcode()) { 3844 default: 3845 llvm_unreachable("Unhandled SetCC Equivalent!"); 3846 case ISD::SETCC: 3847 return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); 3848 case ISD::SELECT_CC: 3849 return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), 3850 N0.getOperand(3), NotCC); 3851 } 3852 } 3853 } 3854 3855 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 3856 if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && 3857 N0.getNode()->hasOneUse() && 3858 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 3859 SDValue V = N0.getOperand(0); 3860 V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, 3861 DAG.getConstant(1, V.getValueType())); 3862 AddToWorklist(V.getNode()); 3863 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); 3864 } 3865 3866 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 3867 if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && 3868 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 3869 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3870 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 3871 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3872 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 3873 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 3874 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 3875 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 3876 } 3877 } 3878 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 3879 if (N1C && N1C->isAllOnesValue() && 3880 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 3881 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3882 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 3883 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3884 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 3885 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 3886 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 3887 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 3888 } 3889 } 3890 // fold (xor (and x, y), y) -> (and (not x), y) 3891 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3892 N0->getOperand(1) == N1) { 3893 SDValue X = N0->getOperand(0); 3894 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); 3895 AddToWorklist(NotX.getNode()); 3896 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); 3897 } 3898 // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) 3899 if (N1C && N0.getOpcode() == ISD::XOR) { 3900 ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 3901 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3902 if (N00C) 3903 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1), 3904 DAG.getConstant(N1C->getAPIntValue() ^ 3905 N00C->getAPIntValue(), VT)); 3906 if (N01C) 3907 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0), 3908 DAG.getConstant(N1C->getAPIntValue() ^ 3909 N01C->getAPIntValue(), VT)); 3910 } 3911 // fold (xor x, x) -> 0 3912 if (N0 == N1) 3913 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 3914 3915 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 3916 if (N0.getOpcode() == N1.getOpcode()) { 3917 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3918 if (Tmp.getNode()) return Tmp; 3919 } 3920 3921 // Simplify the expression using non-local knowledge. 3922 if (!VT.isVector() && 3923 SimplifyDemandedBits(SDValue(N, 0))) 3924 return SDValue(N, 0); 3925 3926 return SDValue(); 3927} 3928 3929/// Handle transforms common to the three shifts, when the shift amount is a 3930/// constant. 3931SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { 3932 // We can't and shouldn't fold opaque constants. 3933 if (Amt->isOpaque()) 3934 return SDValue(); 3935 3936 SDNode *LHS = N->getOperand(0).getNode(); 3937 if (!LHS->hasOneUse()) return SDValue(); 3938 3939 // We want to pull some binops through shifts, so that we have (and (shift)) 3940 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 3941 // thing happens with address calculations, so it's important to canonicalize 3942 // it. 3943 bool HighBitSet = false; // Can we transform this if the high bit is set? 3944 3945 switch (LHS->getOpcode()) { 3946 default: return SDValue(); 3947 case ISD::OR: 3948 case ISD::XOR: 3949 HighBitSet = false; // We can only transform sra if the high bit is clear. 3950 break; 3951 case ISD::AND: 3952 HighBitSet = true; // We can only transform sra if the high bit is set. 3953 break; 3954 case ISD::ADD: 3955 if (N->getOpcode() != ISD::SHL) 3956 return SDValue(); // only shl(add) not sr[al](add). 3957 HighBitSet = false; // We can only transform sra if the high bit is clear. 3958 break; 3959 } 3960 3961 // We require the RHS of the binop to be a constant and not opaque as well. 3962 ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); 3963 if (!BinOpCst || BinOpCst->isOpaque()) return SDValue(); 3964 3965 // FIXME: disable this unless the input to the binop is a shift by a constant. 3966 // If it is not a shift, it pessimizes some common cases like: 3967 // 3968 // void foo(int *X, int i) { X[i & 1235] = 1; } 3969 // int bar(int *X, int i) { return X[i & 255]; } 3970 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 3971 if ((BinOpLHSVal->getOpcode() != ISD::SHL && 3972 BinOpLHSVal->getOpcode() != ISD::SRA && 3973 BinOpLHSVal->getOpcode() != ISD::SRL) || 3974 !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) 3975 return SDValue(); 3976 3977 EVT VT = N->getValueType(0); 3978 3979 // If this is a signed shift right, and the high bit is modified by the 3980 // logical operation, do not perform the transformation. The highBitSet 3981 // boolean indicates the value of the high bit of the constant which would 3982 // cause it to be modified for this operation. 3983 if (N->getOpcode() == ISD::SRA) { 3984 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 3985 if (BinOpRHSSignSet != HighBitSet) 3986 return SDValue(); 3987 } 3988 3989 if (!TLI.isDesirableToCommuteWithShift(LHS)) 3990 return SDValue(); 3991 3992 // Fold the constants, shifting the binop RHS by the shift amount. 3993 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), 3994 N->getValueType(0), 3995 LHS->getOperand(1), N->getOperand(1)); 3996 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); 3997 3998 // Create the new shift. 3999 SDValue NewShift = DAG.getNode(N->getOpcode(), 4000 SDLoc(LHS->getOperand(0)), 4001 VT, LHS->getOperand(0), N->getOperand(1)); 4002 4003 // Create the new binop. 4004 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); 4005} 4006 4007SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { 4008 assert(N->getOpcode() == ISD::TRUNCATE); 4009 assert(N->getOperand(0).getOpcode() == ISD::AND); 4010 4011 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) 4012 if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { 4013 SDValue N01 = N->getOperand(0).getOperand(1); 4014 4015 if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { 4016 EVT TruncVT = N->getValueType(0); 4017 SDValue N00 = N->getOperand(0).getOperand(0); 4018 APInt TruncC = N01C->getAPIntValue(); 4019 TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); 4020 4021 return DAG.getNode(ISD::AND, SDLoc(N), TruncVT, 4022 DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00), 4023 DAG.getConstant(TruncC, TruncVT)); 4024 } 4025 } 4026 4027 return SDValue(); 4028} 4029 4030SDValue DAGCombiner::visitRotate(SDNode *N) { 4031 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). 4032 if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && 4033 N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { 4034 SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); 4035 if (NewOp1.getNode()) 4036 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), 4037 N->getOperand(0), NewOp1); 4038 } 4039 return SDValue(); 4040} 4041 4042SDValue DAGCombiner::visitSHL(SDNode *N) { 4043 SDValue N0 = N->getOperand(0); 4044 SDValue N1 = N->getOperand(1); 4045 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4046 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4047 EVT VT = N0.getValueType(); 4048 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 4049 4050 // fold vector ops 4051 if (VT.isVector()) { 4052 SDValue FoldedVOp = SimplifyVBinOp(N); 4053 if (FoldedVOp.getNode()) return FoldedVOp; 4054 4055 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); 4056 // If setcc produces all-one true value then: 4057 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) 4058 if (N1CV && N1CV->isConstant()) { 4059 if (N0.getOpcode() == ISD::AND) { 4060 SDValue N00 = N0->getOperand(0); 4061 SDValue N01 = N0->getOperand(1); 4062 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); 4063 4064 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && 4065 TLI.getBooleanContents(N00.getOperand(0).getValueType()) == 4066 TargetLowering::ZeroOrNegativeOneBooleanContent) { 4067 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV)) 4068 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); 4069 } 4070 } else { 4071 N1C = isConstOrConstSplat(N1); 4072 } 4073 } 4074 } 4075 4076 // fold (shl c1, c2) -> c1<<c2 4077 if (N0C && N1C) 4078 return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); 4079 // fold (shl 0, x) -> 0 4080 if (N0C && N0C->isNullValue()) 4081 return N0; 4082 // fold (shl x, c >= size(x)) -> undef 4083 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4084 return DAG.getUNDEF(VT); 4085 // fold (shl x, 0) -> x 4086 if (N1C && N1C->isNullValue()) 4087 return N0; 4088 // fold (shl undef, x) -> 0 4089 if (N0.getOpcode() == ISD::UNDEF) 4090 return DAG.getConstant(0, VT); 4091 // if (shl x, c) is known to be zero, return 0 4092 if (DAG.MaskedValueIsZero(SDValue(N, 0), 4093 APInt::getAllOnesValue(OpSizeInBits))) 4094 return DAG.getConstant(0, VT); 4095 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 4096 if (N1.getOpcode() == ISD::TRUNCATE && 4097 N1.getOperand(0).getOpcode() == ISD::AND) { 4098 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4099 if (NewOp1.getNode()) 4100 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); 4101 } 4102 4103 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4104 return SDValue(N, 0); 4105 4106 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 4107 if (N1C && N0.getOpcode() == ISD::SHL) { 4108 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4109 uint64_t c1 = N0C1->getZExtValue(); 4110 uint64_t c2 = N1C->getZExtValue(); 4111 if (c1 + c2 >= OpSizeInBits) 4112 return DAG.getConstant(0, VT); 4113 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), 4114 DAG.getConstant(c1 + c2, N1.getValueType())); 4115 } 4116 } 4117 4118 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 4119 // For this to be valid, the second form must not preserve any of the bits 4120 // that are shifted out by the inner shift in the first form. This means 4121 // the outer shift size must be >= the number of bits added by the ext. 4122 // As a corollary, we don't care what kind of ext it is. 4123 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || 4124 N0.getOpcode() == ISD::ANY_EXTEND || 4125 N0.getOpcode() == ISD::SIGN_EXTEND) && 4126 N0.getOperand(0).getOpcode() == ISD::SHL) { 4127 SDValue N0Op0 = N0.getOperand(0); 4128 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4129 uint64_t c1 = N0Op0C1->getZExtValue(); 4130 uint64_t c2 = N1C->getZExtValue(); 4131 EVT InnerShiftVT = N0Op0.getValueType(); 4132 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); 4133 if (c2 >= OpSizeInBits - InnerShiftSize) { 4134 if (c1 + c2 >= OpSizeInBits) 4135 return DAG.getConstant(0, VT); 4136 return DAG.getNode(ISD::SHL, SDLoc(N0), VT, 4137 DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, 4138 N0Op0->getOperand(0)), 4139 DAG.getConstant(c1 + c2, N1.getValueType())); 4140 } 4141 } 4142 } 4143 4144 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) 4145 // Only fold this if the inner zext has no other uses to avoid increasing 4146 // the total number of instructions. 4147 if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && 4148 N0.getOperand(0).getOpcode() == ISD::SRL) { 4149 SDValue N0Op0 = N0.getOperand(0); 4150 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4151 uint64_t c1 = N0Op0C1->getZExtValue(); 4152 if (c1 < VT.getScalarSizeInBits()) { 4153 uint64_t c2 = N1C->getZExtValue(); 4154 if (c1 == c2) { 4155 SDValue NewOp0 = N0.getOperand(0); 4156 EVT CountVT = NewOp0.getOperand(1).getValueType(); 4157 SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), 4158 NewOp0, DAG.getConstant(c2, CountVT)); 4159 AddToWorklist(NewSHL.getNode()); 4160 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); 4161 } 4162 } 4163 } 4164 } 4165 4166 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 4167 // (and (srl x, (sub c1, c2), MASK) 4168 // Only fold this if the inner shift has no other uses -- if it does, folding 4169 // this will increase the total number of instructions. 4170 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 4171 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4172 uint64_t c1 = N0C1->getZExtValue(); 4173 if (c1 < OpSizeInBits) { 4174 uint64_t c2 = N1C->getZExtValue(); 4175 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); 4176 SDValue Shift; 4177 if (c2 > c1) { 4178 Mask = Mask.shl(c2 - c1); 4179 Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), 4180 DAG.getConstant(c2 - c1, N1.getValueType())); 4181 } else { 4182 Mask = Mask.lshr(c1 - c2); 4183 Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), 4184 DAG.getConstant(c1 - c2, N1.getValueType())); 4185 } 4186 return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, 4187 DAG.getConstant(Mask, VT)); 4188 } 4189 } 4190 } 4191 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 4192 if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { 4193 unsigned BitSize = VT.getScalarSizeInBits(); 4194 SDValue HiBitsMask = 4195 DAG.getConstant(APInt::getHighBitsSet(BitSize, 4196 BitSize - N1C->getZExtValue()), VT); 4197 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), 4198 HiBitsMask); 4199 } 4200 4201 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 4202 // Variant of version done on multiply, except mul by a power of 2 is turned 4203 // into a shift. 4204 APInt Val; 4205 if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 4206 (isa<ConstantSDNode>(N0.getOperand(1)) || 4207 isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { 4208 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); 4209 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); 4210 return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); 4211 } 4212 4213 if (N1C) { 4214 SDValue NewSHL = visitShiftByConstant(N, N1C); 4215 if (NewSHL.getNode()) 4216 return NewSHL; 4217 } 4218 4219 return SDValue(); 4220} 4221 4222SDValue DAGCombiner::visitSRA(SDNode *N) { 4223 SDValue N0 = N->getOperand(0); 4224 SDValue N1 = N->getOperand(1); 4225 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4226 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4227 EVT VT = N0.getValueType(); 4228 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4229 4230 // fold vector ops 4231 if (VT.isVector()) { 4232 SDValue FoldedVOp = SimplifyVBinOp(N); 4233 if (FoldedVOp.getNode()) return FoldedVOp; 4234 4235 N1C = isConstOrConstSplat(N1); 4236 } 4237 4238 // fold (sra c1, c2) -> (sra c1, c2) 4239 if (N0C && N1C) 4240 return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); 4241 // fold (sra 0, x) -> 0 4242 if (N0C && N0C->isNullValue()) 4243 return N0; 4244 // fold (sra -1, x) -> -1 4245 if (N0C && N0C->isAllOnesValue()) 4246 return N0; 4247 // fold (sra x, (setge c, size(x))) -> undef 4248 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4249 return DAG.getUNDEF(VT); 4250 // fold (sra x, 0) -> x 4251 if (N1C && N1C->isNullValue()) 4252 return N0; 4253 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 4254 // sext_inreg. 4255 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 4256 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 4257 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); 4258 if (VT.isVector()) 4259 ExtVT = EVT::getVectorVT(*DAG.getContext(), 4260 ExtVT, VT.getVectorNumElements()); 4261 if ((!LegalOperations || 4262 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 4263 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 4264 N0.getOperand(0), DAG.getValueType(ExtVT)); 4265 } 4266 4267 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 4268 if (N1C && N0.getOpcode() == ISD::SRA) { 4269 if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) { 4270 unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); 4271 if (Sum >= OpSizeInBits) 4272 Sum = OpSizeInBits - 1; 4273 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), 4274 DAG.getConstant(Sum, N1.getValueType())); 4275 } 4276 } 4277 4278 // fold (sra (shl X, m), (sub result_size, n)) 4279 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 4280 // result_size - n != m. 4281 // If truncate is free for the target sext(shl) is likely to result in better 4282 // code. 4283 if (N0.getOpcode() == ISD::SHL && N1C) { 4284 // Get the two constanst of the shifts, CN0 = m, CN = n. 4285 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); 4286 if (N01C) { 4287 LLVMContext &Ctx = *DAG.getContext(); 4288 // Determine what the truncate's result bitsize and type would be. 4289 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); 4290 4291 if (VT.isVector()) 4292 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); 4293 4294 // Determine the residual right-shift amount. 4295 signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 4296 4297 // If the shift is not a no-op (in which case this should be just a sign 4298 // extend already), the truncated to type is legal, sign_extend is legal 4299 // on that type, and the truncate to that type is both legal and free, 4300 // perform the transform. 4301 if ((ShiftAmt > 0) && 4302 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 4303 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 4304 TLI.isTruncateFree(VT, TruncVT)) { 4305 4306 SDValue Amt = DAG.getConstant(ShiftAmt, 4307 getShiftAmountTy(N0.getOperand(0).getValueType())); 4308 SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT, 4309 N0.getOperand(0), Amt); 4310 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT, 4311 Shift); 4312 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), 4313 N->getValueType(0), Trunc); 4314 } 4315 } 4316 } 4317 4318 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 4319 if (N1.getOpcode() == ISD::TRUNCATE && 4320 N1.getOperand(0).getOpcode() == ISD::AND) { 4321 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4322 if (NewOp1.getNode()) 4323 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); 4324 } 4325 4326 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) 4327 // if c1 is equal to the number of bits the trunc removes 4328 if (N0.getOpcode() == ISD::TRUNCATE && 4329 (N0.getOperand(0).getOpcode() == ISD::SRL || 4330 N0.getOperand(0).getOpcode() == ISD::SRA) && 4331 N0.getOperand(0).hasOneUse() && 4332 N0.getOperand(0).getOperand(1).hasOneUse() && 4333 N1C) { 4334 SDValue N0Op0 = N0.getOperand(0); 4335 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { 4336 unsigned LargeShiftVal = LargeShift->getZExtValue(); 4337 EVT LargeVT = N0Op0.getValueType(); 4338 4339 if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { 4340 SDValue Amt = 4341 DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), 4342 getShiftAmountTy(N0Op0.getOperand(0).getValueType())); 4343 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, 4344 N0Op0.getOperand(0), Amt); 4345 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); 4346 } 4347 } 4348 } 4349 4350 // Simplify, based on bits shifted out of the LHS. 4351 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4352 return SDValue(N, 0); 4353 4354 4355 // If the sign bit is known to be zero, switch this to a SRL. 4356 if (DAG.SignBitIsZero(N0)) 4357 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); 4358 4359 if (N1C) { 4360 SDValue NewSRA = visitShiftByConstant(N, N1C); 4361 if (NewSRA.getNode()) 4362 return NewSRA; 4363 } 4364 4365 return SDValue(); 4366} 4367 4368SDValue DAGCombiner::visitSRL(SDNode *N) { 4369 SDValue N0 = N->getOperand(0); 4370 SDValue N1 = N->getOperand(1); 4371 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4372 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4373 EVT VT = N0.getValueType(); 4374 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4375 4376 // fold vector ops 4377 if (VT.isVector()) { 4378 SDValue FoldedVOp = SimplifyVBinOp(N); 4379 if (FoldedVOp.getNode()) return FoldedVOp; 4380 4381 N1C = isConstOrConstSplat(N1); 4382 } 4383 4384 // fold (srl c1, c2) -> c1 >>u c2 4385 if (N0C && N1C) 4386 return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); 4387 // fold (srl 0, x) -> 0 4388 if (N0C && N0C->isNullValue()) 4389 return N0; 4390 // fold (srl x, c >= size(x)) -> undef 4391 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4392 return DAG.getUNDEF(VT); 4393 // fold (srl x, 0) -> x 4394 if (N1C && N1C->isNullValue()) 4395 return N0; 4396 // if (srl x, c) is known to be zero, return 0 4397 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 4398 APInt::getAllOnesValue(OpSizeInBits))) 4399 return DAG.getConstant(0, VT); 4400 4401 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 4402 if (N1C && N0.getOpcode() == ISD::SRL) { 4403 if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { 4404 uint64_t c1 = N01C->getZExtValue(); 4405 uint64_t c2 = N1C->getZExtValue(); 4406 if (c1 + c2 >= OpSizeInBits) 4407 return DAG.getConstant(0, VT); 4408 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), 4409 DAG.getConstant(c1 + c2, N1.getValueType())); 4410 } 4411 } 4412 4413 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) 4414 if (N1C && N0.getOpcode() == ISD::TRUNCATE && 4415 N0.getOperand(0).getOpcode() == ISD::SRL && 4416 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 4417 uint64_t c1 = 4418 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 4419 uint64_t c2 = N1C->getZExtValue(); 4420 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 4421 EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); 4422 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 4423 // This is only valid if the OpSizeInBits + c1 = size of inner shift. 4424 if (c1 + OpSizeInBits == InnerShiftSize) { 4425 if (c1 + c2 >= InnerShiftSize) 4426 return DAG.getConstant(0, VT); 4427 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, 4428 DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT, 4429 N0.getOperand(0)->getOperand(0), 4430 DAG.getConstant(c1 + c2, ShiftCountVT))); 4431 } 4432 } 4433 4434 // fold (srl (shl x, c), c) -> (and x, cst2) 4435 if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) { 4436 unsigned BitSize = N0.getScalarValueSizeInBits(); 4437 if (BitSize <= 64) { 4438 uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; 4439 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), 4440 DAG.getConstant(~0ULL >> ShAmt, VT)); 4441 } 4442 } 4443 4444 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) 4445 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 4446 // Shifting in all undef bits? 4447 EVT SmallVT = N0.getOperand(0).getValueType(); 4448 unsigned BitSize = SmallVT.getScalarSizeInBits(); 4449 if (N1C->getZExtValue() >= BitSize) 4450 return DAG.getUNDEF(VT); 4451 4452 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 4453 uint64_t ShiftAmt = N1C->getZExtValue(); 4454 SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, 4455 N0.getOperand(0), 4456 DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); 4457 AddToWorklist(SmallShift.getNode()); 4458 APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); 4459 return DAG.getNode(ISD::AND, SDLoc(N), VT, 4460 DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), 4461 DAG.getConstant(Mask, VT)); 4462 } 4463 } 4464 4465 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 4466 // bit, which is unmodified by sra. 4467 if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { 4468 if (N0.getOpcode() == ISD::SRA) 4469 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); 4470 } 4471 4472 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 4473 if (N1C && N0.getOpcode() == ISD::CTLZ && 4474 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { 4475 APInt KnownZero, KnownOne; 4476 DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne); 4477 4478 // If any of the input bits are KnownOne, then the input couldn't be all 4479 // zeros, thus the result of the srl will always be zero. 4480 if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); 4481 4482 // If all of the bits input the to ctlz node are known to be zero, then 4483 // the result of the ctlz is "32" and the result of the shift is one. 4484 APInt UnknownBits = ~KnownZero; 4485 if (UnknownBits == 0) return DAG.getConstant(1, VT); 4486 4487 // Otherwise, check to see if there is exactly one bit input to the ctlz. 4488 if ((UnknownBits & (UnknownBits - 1)) == 0) { 4489 // Okay, we know that only that the single bit specified by UnknownBits 4490 // could be set on input to the CTLZ node. If this bit is set, the SRL 4491 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 4492 // to an SRL/XOR pair, which is likely to simplify more. 4493 unsigned ShAmt = UnknownBits.countTrailingZeros(); 4494 SDValue Op = N0.getOperand(0); 4495 4496 if (ShAmt) { 4497 Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, 4498 DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); 4499 AddToWorklist(Op.getNode()); 4500 } 4501 4502 return DAG.getNode(ISD::XOR, SDLoc(N), VT, 4503 Op, DAG.getConstant(1, VT)); 4504 } 4505 } 4506 4507 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 4508 if (N1.getOpcode() == ISD::TRUNCATE && 4509 N1.getOperand(0).getOpcode() == ISD::AND) { 4510 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4511 if (NewOp1.getNode()) 4512 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); 4513 } 4514 4515 // fold operands of srl based on knowledge that the low bits are not 4516 // demanded. 4517 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4518 return SDValue(N, 0); 4519 4520 if (N1C) { 4521 SDValue NewSRL = visitShiftByConstant(N, N1C); 4522 if (NewSRL.getNode()) 4523 return NewSRL; 4524 } 4525 4526 // Attempt to convert a srl of a load into a narrower zero-extending load. 4527 SDValue NarrowLoad = ReduceLoadWidth(N); 4528 if (NarrowLoad.getNode()) 4529 return NarrowLoad; 4530 4531 // Here is a common situation. We want to optimize: 4532 // 4533 // %a = ... 4534 // %b = and i32 %a, 2 4535 // %c = srl i32 %b, 1 4536 // brcond i32 %c ... 4537 // 4538 // into 4539 // 4540 // %a = ... 4541 // %b = and %a, 2 4542 // %c = setcc eq %b, 0 4543 // brcond %c ... 4544 // 4545 // However when after the source operand of SRL is optimized into AND, the SRL 4546 // itself may not be optimized further. Look for it and add the BRCOND into 4547 // the worklist. 4548 if (N->hasOneUse()) { 4549 SDNode *Use = *N->use_begin(); 4550 if (Use->getOpcode() == ISD::BRCOND) 4551 AddToWorklist(Use); 4552 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 4553 // Also look pass the truncate. 4554 Use = *Use->use_begin(); 4555 if (Use->getOpcode() == ISD::BRCOND) 4556 AddToWorklist(Use); 4557 } 4558 } 4559 4560 return SDValue(); 4561} 4562 4563SDValue DAGCombiner::visitCTLZ(SDNode *N) { 4564 SDValue N0 = N->getOperand(0); 4565 EVT VT = N->getValueType(0); 4566 4567 // fold (ctlz c1) -> c2 4568 if (isa<ConstantSDNode>(N0)) 4569 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); 4570 return SDValue(); 4571} 4572 4573SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { 4574 SDValue N0 = N->getOperand(0); 4575 EVT VT = N->getValueType(0); 4576 4577 // fold (ctlz_zero_undef c1) -> c2 4578 if (isa<ConstantSDNode>(N0)) 4579 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); 4580 return SDValue(); 4581} 4582 4583SDValue DAGCombiner::visitCTTZ(SDNode *N) { 4584 SDValue N0 = N->getOperand(0); 4585 EVT VT = N->getValueType(0); 4586 4587 // fold (cttz c1) -> c2 4588 if (isa<ConstantSDNode>(N0)) 4589 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); 4590 return SDValue(); 4591} 4592 4593SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { 4594 SDValue N0 = N->getOperand(0); 4595 EVT VT = N->getValueType(0); 4596 4597 // fold (cttz_zero_undef c1) -> c2 4598 if (isa<ConstantSDNode>(N0)) 4599 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); 4600 return SDValue(); 4601} 4602 4603SDValue DAGCombiner::visitCTPOP(SDNode *N) { 4604 SDValue N0 = N->getOperand(0); 4605 EVT VT = N->getValueType(0); 4606 4607 // fold (ctpop c1) -> c2 4608 if (isa<ConstantSDNode>(N0)) 4609 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); 4610 return SDValue(); 4611} 4612 4613 4614/// \brief Generate Min/Max node 4615static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, 4616 SDValue True, SDValue False, 4617 ISD::CondCode CC, const TargetLowering &TLI, 4618 SelectionDAG &DAG) { 4619 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) 4620 return SDValue(); 4621 4622 switch (CC) { 4623 case ISD::SETOLT: 4624 case ISD::SETOLE: 4625 case ISD::SETLT: 4626 case ISD::SETLE: 4627 case ISD::SETULT: 4628 case ISD::SETULE: { 4629 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; 4630 if (TLI.isOperationLegal(Opcode, VT)) 4631 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 4632 return SDValue(); 4633 } 4634 case ISD::SETOGT: 4635 case ISD::SETOGE: 4636 case ISD::SETGT: 4637 case ISD::SETGE: 4638 case ISD::SETUGT: 4639 case ISD::SETUGE: { 4640 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; 4641 if (TLI.isOperationLegal(Opcode, VT)) 4642 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 4643 return SDValue(); 4644 } 4645 default: 4646 return SDValue(); 4647 } 4648} 4649 4650SDValue DAGCombiner::visitSELECT(SDNode *N) { 4651 SDValue N0 = N->getOperand(0); 4652 SDValue N1 = N->getOperand(1); 4653 SDValue N2 = N->getOperand(2); 4654 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4655 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4656 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 4657 EVT VT = N->getValueType(0); 4658 EVT VT0 = N0.getValueType(); 4659 4660 // fold (select C, X, X) -> X 4661 if (N1 == N2) 4662 return N1; 4663 // fold (select true, X, Y) -> X 4664 if (N0C && !N0C->isNullValue()) 4665 return N1; 4666 // fold (select false, X, Y) -> Y 4667 if (N0C && N0C->isNullValue()) 4668 return N2; 4669 // fold (select C, 1, X) -> (or C, X) 4670 if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) 4671 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 4672 // fold (select C, 0, 1) -> (xor C, 1) 4673 // We can't do this reliably if integer based booleans have different contents 4674 // to floating point based booleans. This is because we can't tell whether we 4675 // have an integer-based boolean or a floating-point-based boolean unless we 4676 // can find the SETCC that produced it and inspect its operands. This is 4677 // fairly easy if C is the SETCC node, but it can potentially be 4678 // undiscoverable (or not reasonably discoverable). For example, it could be 4679 // in another basic block or it could require searching a complicated 4680 // expression. 4681 if (VT.isInteger() && 4682 (VT0 == MVT::i1 || (VT0.isInteger() && 4683 TLI.getBooleanContents(false, false) == 4684 TLI.getBooleanContents(false, true) && 4685 TLI.getBooleanContents(false, false) == 4686 TargetLowering::ZeroOrOneBooleanContent)) && 4687 N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { 4688 SDValue XORNode; 4689 if (VT == VT0) 4690 return DAG.getNode(ISD::XOR, SDLoc(N), VT0, 4691 N0, DAG.getConstant(1, VT0)); 4692 XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, 4693 N0, DAG.getConstant(1, VT0)); 4694 AddToWorklist(XORNode.getNode()); 4695 if (VT.bitsGT(VT0)) 4696 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); 4697 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); 4698 } 4699 // fold (select C, 0, X) -> (and (not C), X) 4700 if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { 4701 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 4702 AddToWorklist(NOTNode.getNode()); 4703 return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); 4704 } 4705 // fold (select C, X, 1) -> (or (not C), X) 4706 if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { 4707 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 4708 AddToWorklist(NOTNode.getNode()); 4709 return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); 4710 } 4711 // fold (select C, X, 0) -> (and C, X) 4712 if (VT == MVT::i1 && N2C && N2C->isNullValue()) 4713 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 4714 // fold (select X, X, Y) -> (or X, Y) 4715 // fold (select X, 1, Y) -> (or X, Y) 4716 if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) 4717 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 4718 // fold (select X, Y, X) -> (and X, Y) 4719 // fold (select X, Y, 0) -> (and X, Y) 4720 if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) 4721 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 4722 4723 // If we can fold this based on the true/false value, do so. 4724 if (SimplifySelectOps(N, N1, N2)) 4725 return SDValue(N, 0); // Don't revisit N. 4726 4727 // fold selects based on a setcc into other things, such as min/max/abs 4728 if (N0.getOpcode() == ISD::SETCC) { 4729 // select x, y (fcmp lt x, y) -> fminnum x, y 4730 // select x, y (fcmp gt x, y) -> fmaxnum x, y 4731 // 4732 // This is OK if we don't care about what happens if either operand is a 4733 // NaN. 4734 // 4735 4736 // FIXME: Instead of testing for UnsafeFPMath, this should be checking for 4737 // no signed zeros as well as no nans. 4738 const TargetOptions &Options = DAG.getTarget().Options; 4739 if (Options.UnsafeFPMath && 4740 VT.isFloatingPoint() && N0.hasOneUse() && 4741 DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { 4742 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 4743 4744 SDValue FMinMax = 4745 combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), 4746 N1, N2, CC, TLI, DAG); 4747 if (FMinMax) 4748 return FMinMax; 4749 } 4750 4751 if ((!LegalOperations && 4752 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || 4753 TLI.isOperationLegal(ISD::SELECT_CC, VT)) 4754 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, 4755 N0.getOperand(0), N0.getOperand(1), 4756 N1, N2, N0.getOperand(2)); 4757 return SimplifySelect(SDLoc(N), N0, N1, N2); 4758 } 4759 4760 return SDValue(); 4761} 4762 4763static 4764std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { 4765 SDLoc DL(N); 4766 EVT LoVT, HiVT; 4767 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 4768 4769 // Split the inputs. 4770 SDValue Lo, Hi, LL, LH, RL, RH; 4771 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); 4772 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); 4773 4774 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); 4775 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); 4776 4777 return std::make_pair(Lo, Hi); 4778} 4779 4780// This function assumes all the vselect's arguments are CONCAT_VECTOR 4781// nodes and that the condition is a BV of ConstantSDNodes (or undefs). 4782static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { 4783 SDLoc dl(N); 4784 SDValue Cond = N->getOperand(0); 4785 SDValue LHS = N->getOperand(1); 4786 SDValue RHS = N->getOperand(2); 4787 EVT VT = N->getValueType(0); 4788 int NumElems = VT.getVectorNumElements(); 4789 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && 4790 RHS.getOpcode() == ISD::CONCAT_VECTORS && 4791 Cond.getOpcode() == ISD::BUILD_VECTOR); 4792 4793 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about 4794 // binary ones here. 4795 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2) 4796 return SDValue(); 4797 4798 // We're sure we have an even number of elements due to the 4799 // concat_vectors we have as arguments to vselect. 4800 // Skip BV elements until we find one that's not an UNDEF 4801 // After we find an UNDEF element, keep looping until we get to half the 4802 // length of the BV and see if all the non-undef nodes are the same. 4803 ConstantSDNode *BottomHalf = nullptr; 4804 for (int i = 0; i < NumElems / 2; ++i) { 4805 if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) 4806 continue; 4807 4808 if (BottomHalf == nullptr) 4809 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 4810 else if (Cond->getOperand(i).getNode() != BottomHalf) 4811 return SDValue(); 4812 } 4813 4814 // Do the same for the second half of the BuildVector 4815 ConstantSDNode *TopHalf = nullptr; 4816 for (int i = NumElems / 2; i < NumElems; ++i) { 4817 if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) 4818 continue; 4819 4820 if (TopHalf == nullptr) 4821 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 4822 else if (Cond->getOperand(i).getNode() != TopHalf) 4823 return SDValue(); 4824 } 4825 4826 assert(TopHalf && BottomHalf && 4827 "One half of the selector was all UNDEFs and the other was all the " 4828 "same value. This should have been addressed before this function."); 4829 return DAG.getNode( 4830 ISD::CONCAT_VECTORS, dl, VT, 4831 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), 4832 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); 4833} 4834 4835SDValue DAGCombiner::visitMSTORE(SDNode *N) { 4836 4837 if (Level >= AfterLegalizeTypes) 4838 return SDValue(); 4839 4840 MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); 4841 SDValue Mask = MST->getMask(); 4842 SDValue Data = MST->getValue(); 4843 SDLoc DL(N); 4844 4845 // If the MSTORE data type requires splitting and the mask is provided by a 4846 // SETCC, then split both nodes and its operands before legalization. This 4847 // prevents the type legalizer from unrolling SETCC into scalar comparisons 4848 // and enables future optimizations (e.g. min/max pattern matching on X86). 4849 if (Mask.getOpcode() == ISD::SETCC) { 4850 4851 // Check if any splitting is required. 4852 if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != 4853 TargetLowering::TypeSplitVector) 4854 return SDValue(); 4855 4856 SDValue MaskLo, MaskHi, Lo, Hi; 4857 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 4858 4859 EVT LoVT, HiVT; 4860 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0)); 4861 4862 SDValue Chain = MST->getChain(); 4863 SDValue Ptr = MST->getBasePtr(); 4864 4865 EVT MemoryVT = MST->getMemoryVT(); 4866 unsigned Alignment = MST->getOriginalAlignment(); 4867 4868 // if Alignment is equal to the vector size, 4869 // take the half of it for the second part 4870 unsigned SecondHalfAlignment = 4871 (Alignment == Data->getValueType(0).getSizeInBits()/8) ? 4872 Alignment/2 : Alignment; 4873 4874 EVT LoMemVT, HiMemVT; 4875 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 4876 4877 SDValue DataLo, DataHi; 4878 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 4879 4880 MachineMemOperand *MMO = DAG.getMachineFunction(). 4881 getMachineMemOperand(MST->getPointerInfo(), 4882 MachineMemOperand::MOStore, LoMemVT.getStoreSize(), 4883 Alignment, MST->getAAInfo(), MST->getRanges()); 4884 4885 Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, 4886 MST->isTruncatingStore()); 4887 4888 unsigned IncrementSize = LoMemVT.getSizeInBits()/8; 4889 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 4890 DAG.getConstant(IncrementSize, Ptr.getValueType())); 4891 4892 MMO = DAG.getMachineFunction(). 4893 getMachineMemOperand(MST->getPointerInfo(), 4894 MachineMemOperand::MOStore, HiMemVT.getStoreSize(), 4895 SecondHalfAlignment, MST->getAAInfo(), 4896 MST->getRanges()); 4897 4898 Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, 4899 MST->isTruncatingStore()); 4900 4901 AddToWorklist(Lo.getNode()); 4902 AddToWorklist(Hi.getNode()); 4903 4904 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 4905 } 4906 return SDValue(); 4907} 4908 4909SDValue DAGCombiner::visitMLOAD(SDNode *N) { 4910 4911 if (Level >= AfterLegalizeTypes) 4912 return SDValue(); 4913 4914 MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N); 4915 SDValue Mask = MLD->getMask(); 4916 SDLoc DL(N); 4917 4918 // If the MLOAD result requires splitting and the mask is provided by a 4919 // SETCC, then split both nodes and its operands before legalization. This 4920 // prevents the type legalizer from unrolling SETCC into scalar comparisons 4921 // and enables future optimizations (e.g. min/max pattern matching on X86). 4922 4923 if (Mask.getOpcode() == ISD::SETCC) { 4924 EVT VT = N->getValueType(0); 4925 4926 // Check if any splitting is required. 4927 if (TLI.getTypeAction(*DAG.getContext(), VT) != 4928 TargetLowering::TypeSplitVector) 4929 return SDValue(); 4930 4931 SDValue MaskLo, MaskHi, Lo, Hi; 4932 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 4933 4934 SDValue Src0 = MLD->getSrc0(); 4935 SDValue Src0Lo, Src0Hi; 4936 std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); 4937 4938 EVT LoVT, HiVT; 4939 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); 4940 4941 SDValue Chain = MLD->getChain(); 4942 SDValue Ptr = MLD->getBasePtr(); 4943 EVT MemoryVT = MLD->getMemoryVT(); 4944 unsigned Alignment = MLD->getOriginalAlignment(); 4945 4946 // if Alignment is equal to the vector size, 4947 // take the half of it for the second part 4948 unsigned SecondHalfAlignment = 4949 (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? 4950 Alignment/2 : Alignment; 4951 4952 EVT LoMemVT, HiMemVT; 4953 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 4954 4955 MachineMemOperand *MMO = DAG.getMachineFunction(). 4956 getMachineMemOperand(MLD->getPointerInfo(), 4957 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), 4958 Alignment, MLD->getAAInfo(), MLD->getRanges()); 4959 4960 Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, 4961 ISD::NON_EXTLOAD); 4962 4963 unsigned IncrementSize = LoMemVT.getSizeInBits()/8; 4964 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 4965 DAG.getConstant(IncrementSize, Ptr.getValueType())); 4966 4967 MMO = DAG.getMachineFunction(). 4968 getMachineMemOperand(MLD->getPointerInfo(), 4969 MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), 4970 SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); 4971 4972 Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, 4973 ISD::NON_EXTLOAD); 4974 4975 AddToWorklist(Lo.getNode()); 4976 AddToWorklist(Hi.getNode()); 4977 4978 // Build a factor node to remember that this load is independent of the 4979 // other one. 4980 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), 4981 Hi.getValue(1)); 4982 4983 // Legalized the chain result - switch anything that used the old chain to 4984 // use the new one. 4985 DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); 4986 4987 SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 4988 4989 SDValue RetOps[] = { LoadRes, Chain }; 4990 return DAG.getMergeValues(RetOps, DL); 4991 } 4992 return SDValue(); 4993} 4994 4995SDValue DAGCombiner::visitVSELECT(SDNode *N) { 4996 SDValue N0 = N->getOperand(0); 4997 SDValue N1 = N->getOperand(1); 4998 SDValue N2 = N->getOperand(2); 4999 SDLoc DL(N); 5000 5001 // Canonicalize integer abs. 5002 // vselect (setg[te] X, 0), X, -X -> 5003 // vselect (setgt X, -1), X, -X -> 5004 // vselect (setl[te] X, 0), -X, X -> 5005 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 5006 if (N0.getOpcode() == ISD::SETCC) { 5007 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 5008 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 5009 bool isAbs = false; 5010 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); 5011 5012 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || 5013 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && 5014 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) 5015 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); 5016 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && 5017 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) 5018 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); 5019 5020 if (isAbs) { 5021 EVT VT = LHS.getValueType(); 5022 SDValue Shift = DAG.getNode( 5023 ISD::SRA, DL, VT, LHS, 5024 DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); 5025 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); 5026 AddToWorklist(Shift.getNode()); 5027 AddToWorklist(Add.getNode()); 5028 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); 5029 } 5030 } 5031 5032 // If the VSELECT result requires splitting and the mask is provided by a 5033 // SETCC, then split both nodes and its operands before legalization. This 5034 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5035 // and enables future optimizations (e.g. min/max pattern matching on X86). 5036 if (N0.getOpcode() == ISD::SETCC) { 5037 EVT VT = N->getValueType(0); 5038 5039 // Check if any splitting is required. 5040 if (TLI.getTypeAction(*DAG.getContext(), VT) != 5041 TargetLowering::TypeSplitVector) 5042 return SDValue(); 5043 5044 SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; 5045 std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); 5046 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); 5047 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); 5048 5049 Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); 5050 Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); 5051 5052 // Add the new VSELECT nodes to the work list in case they need to be split 5053 // again. 5054 AddToWorklist(Lo.getNode()); 5055 AddToWorklist(Hi.getNode()); 5056 5057 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 5058 } 5059 5060 // Fold (vselect (build_vector all_ones), N1, N2) -> N1 5061 if (ISD::isBuildVectorAllOnes(N0.getNode())) 5062 return N1; 5063 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 5064 if (ISD::isBuildVectorAllZeros(N0.getNode())) 5065 return N2; 5066 5067 // The ConvertSelectToConcatVector function is assuming both the above 5068 // checks for (vselect (build_vector all{ones,zeros) ...) have been made 5069 // and addressed. 5070 if (N1.getOpcode() == ISD::CONCAT_VECTORS && 5071 N2.getOpcode() == ISD::CONCAT_VECTORS && 5072 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { 5073 SDValue CV = ConvertSelectToConcatVector(N, DAG); 5074 if (CV.getNode()) 5075 return CV; 5076 } 5077 5078 return SDValue(); 5079} 5080 5081SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { 5082 SDValue N0 = N->getOperand(0); 5083 SDValue N1 = N->getOperand(1); 5084 SDValue N2 = N->getOperand(2); 5085 SDValue N3 = N->getOperand(3); 5086 SDValue N4 = N->getOperand(4); 5087 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); 5088 5089 // fold select_cc lhs, rhs, x, x, cc -> x 5090 if (N2 == N3) 5091 return N2; 5092 5093 // Determine if the condition we're dealing with is constant 5094 SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), 5095 N0, N1, CC, SDLoc(N), false); 5096 if (SCC.getNode()) { 5097 AddToWorklist(SCC.getNode()); 5098 5099 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { 5100 if (!SCCC->isNullValue()) 5101 return N2; // cond always true -> true val 5102 else 5103 return N3; // cond always false -> false val 5104 } else if (SCC->getOpcode() == ISD::UNDEF) { 5105 // When the condition is UNDEF, just return the first operand. This is 5106 // coherent the DAG creation, no setcc node is created in this case 5107 return N2; 5108 } else if (SCC.getOpcode() == ISD::SETCC) { 5109 // Fold to a simpler select_cc 5110 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), 5111 SCC.getOperand(0), SCC.getOperand(1), N2, N3, 5112 SCC.getOperand(2)); 5113 } 5114 } 5115 5116 // If we can fold this based on the true/false value, do so. 5117 if (SimplifySelectOps(N, N2, N3)) 5118 return SDValue(N, 0); // Don't revisit N. 5119 5120 // fold select_cc into other things, such as min/max/abs 5121 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); 5122} 5123 5124SDValue DAGCombiner::visitSETCC(SDNode *N) { 5125 return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), 5126 cast<CondCodeSDNode>(N->getOperand(2))->get(), 5127 SDLoc(N)); 5128} 5129 5130// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext 5131// dag node into a ConstantSDNode or a build_vector of constants. 5132// This function is called by the DAGCombiner when visiting sext/zext/aext 5133// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). 5134// Vector extends are not folded if operations are legal; this is to 5135// avoid introducing illegal build_vector dag nodes. 5136static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, 5137 SelectionDAG &DAG, bool LegalTypes, 5138 bool LegalOperations) { 5139 unsigned Opcode = N->getOpcode(); 5140 SDValue N0 = N->getOperand(0); 5141 EVT VT = N->getValueType(0); 5142 5143 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || 5144 Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"); 5145 5146 // fold (sext c1) -> c1 5147 // fold (zext c1) -> c1 5148 // fold (aext c1) -> c1 5149 if (isa<ConstantSDNode>(N0)) 5150 return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode(); 5151 5152 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) 5153 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) 5154 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) 5155 EVT SVT = VT.getScalarType(); 5156 if (!(VT.isVector() && 5157 (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) && 5158 ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) 5159 return nullptr; 5160 5161 // We can fold this node into a build_vector. 5162 unsigned VTBits = SVT.getSizeInBits(); 5163 unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); 5164 unsigned ShAmt = VTBits - EVTBits; 5165 SmallVector<SDValue, 8> Elts; 5166 unsigned NumElts = N0->getNumOperands(); 5167 SDLoc DL(N); 5168 5169 for (unsigned i=0; i != NumElts; ++i) { 5170 SDValue Op = N0->getOperand(i); 5171 if (Op->getOpcode() == ISD::UNDEF) { 5172 Elts.push_back(DAG.getUNDEF(SVT)); 5173 continue; 5174 } 5175 5176 ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); 5177 const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); 5178 if (Opcode == ISD::SIGN_EXTEND) 5179 Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), 5180 SVT)); 5181 else 5182 Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(), 5183 SVT)); 5184 } 5185 5186 return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); 5187} 5188 5189// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: 5190// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" 5191// transformation. Returns true if extension are possible and the above 5192// mentioned transformation is profitable. 5193static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, 5194 unsigned ExtOpc, 5195 SmallVectorImpl<SDNode *> &ExtendNodes, 5196 const TargetLowering &TLI) { 5197 bool HasCopyToRegUses = false; 5198 bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); 5199 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 5200 UE = N0.getNode()->use_end(); 5201 UI != UE; ++UI) { 5202 SDNode *User = *UI; 5203 if (User == N) 5204 continue; 5205 if (UI.getUse().getResNo() != N0.getResNo()) 5206 continue; 5207 // FIXME: Only extend SETCC N, N and SETCC N, c for now. 5208 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { 5209 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); 5210 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) 5211 // Sign bits will be lost after a zext. 5212 return false; 5213 bool Add = false; 5214 for (unsigned i = 0; i != 2; ++i) { 5215 SDValue UseOp = User->getOperand(i); 5216 if (UseOp == N0) 5217 continue; 5218 if (!isa<ConstantSDNode>(UseOp)) 5219 return false; 5220 Add = true; 5221 } 5222 if (Add) 5223 ExtendNodes.push_back(User); 5224 continue; 5225 } 5226 // If truncates aren't free and there are users we can't 5227 // extend, it isn't worthwhile. 5228 if (!isTruncFree) 5229 return false; 5230 // Remember if this value is live-out. 5231 if (User->getOpcode() == ISD::CopyToReg) 5232 HasCopyToRegUses = true; 5233 } 5234 5235 if (HasCopyToRegUses) { 5236 bool BothLiveOut = false; 5237 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 5238 UI != UE; ++UI) { 5239 SDUse &Use = UI.getUse(); 5240 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { 5241 BothLiveOut = true; 5242 break; 5243 } 5244 } 5245 if (BothLiveOut) 5246 // Both unextended and extended values are live out. There had better be 5247 // a good reason for the transformation. 5248 return ExtendNodes.size(); 5249 } 5250 return true; 5251} 5252 5253void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 5254 SDValue Trunc, SDValue ExtLoad, SDLoc DL, 5255 ISD::NodeType ExtType) { 5256 // Extend SetCC uses if necessary. 5257 for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { 5258 SDNode *SetCC = SetCCs[i]; 5259 SmallVector<SDValue, 4> Ops; 5260 5261 for (unsigned j = 0; j != 2; ++j) { 5262 SDValue SOp = SetCC->getOperand(j); 5263 if (SOp == Trunc) 5264 Ops.push_back(ExtLoad); 5265 else 5266 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); 5267 } 5268 5269 Ops.push_back(SetCC->getOperand(2)); 5270 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); 5271 } 5272} 5273 5274SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { 5275 SDValue N0 = N->getOperand(0); 5276 EVT VT = N->getValueType(0); 5277 5278 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 5279 LegalOperations)) 5280 return SDValue(Res, 0); 5281 5282 // fold (sext (sext x)) -> (sext x) 5283 // fold (sext (aext x)) -> (sext x) 5284 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 5285 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, 5286 N0.getOperand(0)); 5287 5288 if (N0.getOpcode() == ISD::TRUNCATE) { 5289 // fold (sext (truncate (load x))) -> (sext (smaller load x)) 5290 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) 5291 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5292 if (NarrowLoad.getNode()) { 5293 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5294 if (NarrowLoad.getNode() != N0.getNode()) { 5295 CombineTo(N0.getNode(), NarrowLoad); 5296 // CombineTo deleted the truncate, if needed, but not what's under it. 5297 AddToWorklist(oye); 5298 } 5299 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5300 } 5301 5302 // See if the value being truncated is already sign extended. If so, just 5303 // eliminate the trunc/sext pair. 5304 SDValue Op = N0.getOperand(0); 5305 unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); 5306 unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); 5307 unsigned DestBits = VT.getScalarType().getSizeInBits(); 5308 unsigned NumSignBits = DAG.ComputeNumSignBits(Op); 5309 5310 if (OpBits == DestBits) { 5311 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 5312 // bits, it is already ready. 5313 if (NumSignBits > DestBits-MidBits) 5314 return Op; 5315 } else if (OpBits < DestBits) { 5316 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 5317 // bits, just sext from i32. 5318 if (NumSignBits > OpBits-MidBits) 5319 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op); 5320 } else { 5321 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 5322 // bits, just truncate to i32. 5323 if (NumSignBits > OpBits-MidBits) 5324 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 5325 } 5326 5327 // fold (sext (truncate x)) -> (sextinreg x). 5328 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, 5329 N0.getValueType())) { 5330 if (OpBits < DestBits) 5331 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); 5332 else if (OpBits > DestBits) 5333 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); 5334 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op, 5335 DAG.getValueType(N0.getValueType())); 5336 } 5337 } 5338 5339 // fold (sext (load x)) -> (sext (truncate (sextload x))) 5340 // None of the supported targets knows how to perform load and sign extend 5341 // on vectors in one instruction. We only perform this transformation on 5342 // scalars. 5343 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 5344 ISD::isUNINDEXEDLoad(N0.getNode()) && 5345 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 5346 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { 5347 bool DoXform = true; 5348 SmallVector<SDNode*, 4> SetCCs; 5349 if (!N0.hasOneUse()) 5350 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); 5351 if (DoXform) { 5352 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5353 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 5354 LN0->getChain(), 5355 LN0->getBasePtr(), N0.getValueType(), 5356 LN0->getMemOperand()); 5357 CombineTo(N, ExtLoad); 5358 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5359 N0.getValueType(), ExtLoad); 5360 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 5361 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5362 ISD::SIGN_EXTEND); 5363 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5364 } 5365 } 5366 5367 // fold (sext (sextload x)) -> (sext (truncate (sextload x))) 5368 // fold (sext ( extload x)) -> (sext (truncate (sextload x))) 5369 if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 5370 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 5371 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5372 EVT MemVT = LN0->getMemoryVT(); 5373 if ((!LegalOperations && !LN0->isVolatile()) || 5374 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) { 5375 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 5376 LN0->getChain(), 5377 LN0->getBasePtr(), MemVT, 5378 LN0->getMemOperand()); 5379 CombineTo(N, ExtLoad); 5380 CombineTo(N0.getNode(), 5381 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5382 N0.getValueType(), ExtLoad), 5383 ExtLoad.getValue(1)); 5384 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5385 } 5386 } 5387 5388 // fold (sext (and/or/xor (load x), cst)) -> 5389 // (and/or/xor (sextload x), (sext cst)) 5390 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 5391 N0.getOpcode() == ISD::XOR) && 5392 isa<LoadSDNode>(N0.getOperand(0)) && 5393 N0.getOperand(1).getOpcode() == ISD::Constant && 5394 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) && 5395 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 5396 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 5397 if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { 5398 bool DoXform = true; 5399 SmallVector<SDNode*, 4> SetCCs; 5400 if (!N0.hasOneUse()) 5401 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, 5402 SetCCs, TLI); 5403 if (DoXform) { 5404 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, 5405 LN0->getChain(), LN0->getBasePtr(), 5406 LN0->getMemoryVT(), 5407 LN0->getMemOperand()); 5408 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5409 Mask = Mask.sext(VT.getSizeInBits()); 5410 SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 5411 ExtLoad, DAG.getConstant(Mask, VT)); 5412 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 5413 SDLoc(N0.getOperand(0)), 5414 N0.getOperand(0).getValueType(), ExtLoad); 5415 CombineTo(N, And); 5416 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 5417 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5418 ISD::SIGN_EXTEND); 5419 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5420 } 5421 } 5422 } 5423 5424 if (N0.getOpcode() == ISD::SETCC) { 5425 EVT N0VT = N0.getOperand(0).getValueType(); 5426 // sext(setcc) -> sext_in_reg(vsetcc) for vectors. 5427 // Only do this before legalize for now. 5428 if (VT.isVector() && !LegalOperations && 5429 TLI.getBooleanContents(N0VT) == 5430 TargetLowering::ZeroOrNegativeOneBooleanContent) { 5431 // On some architectures (such as SSE/NEON/etc) the SETCC result type is 5432 // of the same size as the compared operands. Only optimize sext(setcc()) 5433 // if this is the case. 5434 EVT SVT = getSetCCResultType(N0VT); 5435 5436 // We know that the # elements of the results is the same as the 5437 // # elements of the compare (and the # elements of the compare result 5438 // for that matter). Check to see that they are the same size. If so, 5439 // we know that the element size of the sext'd result matches the 5440 // element size of the compare operands. 5441 if (VT.getSizeInBits() == SVT.getSizeInBits()) 5442 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 5443 N0.getOperand(1), 5444 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5445 5446 // If the desired elements are smaller or larger than the source 5447 // elements we can use a matching integer vector type and then 5448 // truncate/sign extend 5449 EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); 5450 if (SVT == MatchingVectorType) { 5451 SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, 5452 N0.getOperand(0), N0.getOperand(1), 5453 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5454 return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); 5455 } 5456 } 5457 5458 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) 5459 unsigned ElementWidth = VT.getScalarType().getSizeInBits(); 5460 SDValue NegOne = 5461 DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); 5462 SDValue SCC = 5463 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 5464 NegOne, DAG.getConstant(0, VT), 5465 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 5466 if (SCC.getNode()) return SCC; 5467 5468 if (!VT.isVector()) { 5469 EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); 5470 if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) { 5471 SDLoc DL(N); 5472 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 5473 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, 5474 N0.getOperand(0), N0.getOperand(1), CC); 5475 return DAG.getSelect(DL, VT, SetCC, 5476 NegOne, DAG.getConstant(0, VT)); 5477 } 5478 } 5479 } 5480 5481 // fold (sext x) -> (zext x) if the sign bit is known zero. 5482 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && 5483 DAG.SignBitIsZero(N0)) 5484 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); 5485 5486 return SDValue(); 5487} 5488 5489// isTruncateOf - If N is a truncate of some other value, return true, record 5490// the value being truncated in Op and which of Op's bits are zero in KnownZero. 5491// This function computes KnownZero to avoid a duplicated call to 5492// computeKnownBits in the caller. 5493static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, 5494 APInt &KnownZero) { 5495 APInt KnownOne; 5496 if (N->getOpcode() == ISD::TRUNCATE) { 5497 Op = N->getOperand(0); 5498 DAG.computeKnownBits(Op, KnownZero, KnownOne); 5499 return true; 5500 } 5501 5502 if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || 5503 cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) 5504 return false; 5505 5506 SDValue Op0 = N->getOperand(0); 5507 SDValue Op1 = N->getOperand(1); 5508 assert(Op0.getValueType() == Op1.getValueType()); 5509 5510 ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); 5511 ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); 5512 if (COp0 && COp0->isNullValue()) 5513 Op = Op1; 5514 else if (COp1 && COp1->isNullValue()) 5515 Op = Op0; 5516 else 5517 return false; 5518 5519 DAG.computeKnownBits(Op, KnownZero, KnownOne); 5520 5521 if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) 5522 return false; 5523 5524 return true; 5525} 5526 5527SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { 5528 SDValue N0 = N->getOperand(0); 5529 EVT VT = N->getValueType(0); 5530 5531 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 5532 LegalOperations)) 5533 return SDValue(Res, 0); 5534 5535 // fold (zext (zext x)) -> (zext x) 5536 // fold (zext (aext x)) -> (zext x) 5537 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 5538 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, 5539 N0.getOperand(0)); 5540 5541 // fold (zext (truncate x)) -> (zext x) or 5542 // (zext (truncate x)) -> (truncate x) 5543 // This is valid when the truncated bits of x are already zero. 5544 // FIXME: We should extend this to work for vectors too. 5545 SDValue Op; 5546 APInt KnownZero; 5547 if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { 5548 APInt TruncatedBits = 5549 (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? 5550 APInt(Op.getValueSizeInBits(), 0) : 5551 APInt::getBitsSet(Op.getValueSizeInBits(), 5552 N0.getValueSizeInBits(), 5553 std::min(Op.getValueSizeInBits(), 5554 VT.getSizeInBits())); 5555 if (TruncatedBits == (KnownZero & TruncatedBits)) { 5556 if (VT.bitsGT(Op.getValueType())) 5557 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); 5558 if (VT.bitsLT(Op.getValueType())) 5559 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 5560 5561 return Op; 5562 } 5563 } 5564 5565 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 5566 // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) 5567 if (N0.getOpcode() == ISD::TRUNCATE) { 5568 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5569 if (NarrowLoad.getNode()) { 5570 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5571 if (NarrowLoad.getNode() != N0.getNode()) { 5572 CombineTo(N0.getNode(), NarrowLoad); 5573 // CombineTo deleted the truncate, if needed, but not what's under it. 5574 AddToWorklist(oye); 5575 } 5576 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5577 } 5578 } 5579 5580 // fold (zext (truncate x)) -> (and x, mask) 5581 if (N0.getOpcode() == ISD::TRUNCATE && 5582 (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { 5583 5584 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 5585 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) 5586 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5587 if (NarrowLoad.getNode()) { 5588 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5589 if (NarrowLoad.getNode() != N0.getNode()) { 5590 CombineTo(N0.getNode(), NarrowLoad); 5591 // CombineTo deleted the truncate, if needed, but not what's under it. 5592 AddToWorklist(oye); 5593 } 5594 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5595 } 5596 5597 SDValue Op = N0.getOperand(0); 5598 if (Op.getValueType().bitsLT(VT)) { 5599 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); 5600 AddToWorklist(Op.getNode()); 5601 } else if (Op.getValueType().bitsGT(VT)) { 5602 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 5603 AddToWorklist(Op.getNode()); 5604 } 5605 return DAG.getZeroExtendInReg(Op, SDLoc(N), 5606 N0.getValueType().getScalarType()); 5607 } 5608 5609 // Fold (zext (and (trunc x), cst)) -> (and x, cst), 5610 // if either of the casts is not free. 5611 if (N0.getOpcode() == ISD::AND && 5612 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 5613 N0.getOperand(1).getOpcode() == ISD::Constant && 5614 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 5615 N0.getValueType()) || 5616 !TLI.isZExtFree(N0.getValueType(), VT))) { 5617 SDValue X = N0.getOperand(0).getOperand(0); 5618 if (X.getValueType().bitsLT(VT)) { 5619 X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X); 5620 } else if (X.getValueType().bitsGT(VT)) { 5621 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 5622 } 5623 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5624 Mask = Mask.zext(VT.getSizeInBits()); 5625 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5626 X, DAG.getConstant(Mask, VT)); 5627 } 5628 5629 // fold (zext (load x)) -> (zext (truncate (zextload x))) 5630 // None of the supported targets knows how to perform load and vector_zext 5631 // on vectors in one instruction. We only perform this transformation on 5632 // scalars. 5633 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 5634 ISD::isUNINDEXEDLoad(N0.getNode()) && 5635 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 5636 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { 5637 bool DoXform = true; 5638 SmallVector<SDNode*, 4> SetCCs; 5639 if (!N0.hasOneUse()) 5640 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); 5641 if (DoXform) { 5642 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5643 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, 5644 LN0->getChain(), 5645 LN0->getBasePtr(), N0.getValueType(), 5646 LN0->getMemOperand()); 5647 CombineTo(N, ExtLoad); 5648 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5649 N0.getValueType(), ExtLoad); 5650 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 5651 5652 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5653 ISD::ZERO_EXTEND); 5654 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5655 } 5656 } 5657 5658 // fold (zext (and/or/xor (load x), cst)) -> 5659 // (and/or/xor (zextload x), (zext cst)) 5660 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 5661 N0.getOpcode() == ISD::XOR) && 5662 isa<LoadSDNode>(N0.getOperand(0)) && 5663 N0.getOperand(1).getOpcode() == ISD::Constant && 5664 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) && 5665 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 5666 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 5667 if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { 5668 bool DoXform = true; 5669 SmallVector<SDNode*, 4> SetCCs; 5670 if (!N0.hasOneUse()) 5671 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, 5672 SetCCs, TLI); 5673 if (DoXform) { 5674 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, 5675 LN0->getChain(), LN0->getBasePtr(), 5676 LN0->getMemoryVT(), 5677 LN0->getMemOperand()); 5678 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5679 Mask = Mask.zext(VT.getSizeInBits()); 5680 SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 5681 ExtLoad, DAG.getConstant(Mask, VT)); 5682 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 5683 SDLoc(N0.getOperand(0)), 5684 N0.getOperand(0).getValueType(), ExtLoad); 5685 CombineTo(N, And); 5686 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 5687 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5688 ISD::ZERO_EXTEND); 5689 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5690 } 5691 } 5692 } 5693 5694 // fold (zext (zextload x)) -> (zext (truncate (zextload x))) 5695 // fold (zext ( extload x)) -> (zext (truncate (zextload x))) 5696 if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 5697 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 5698 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5699 EVT MemVT = LN0->getMemoryVT(); 5700 if ((!LegalOperations && !LN0->isVolatile()) || 5701 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) { 5702 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, 5703 LN0->getChain(), 5704 LN0->getBasePtr(), MemVT, 5705 LN0->getMemOperand()); 5706 CombineTo(N, ExtLoad); 5707 CombineTo(N0.getNode(), 5708 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), 5709 ExtLoad), 5710 ExtLoad.getValue(1)); 5711 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5712 } 5713 } 5714 5715 if (N0.getOpcode() == ISD::SETCC) { 5716 if (!LegalOperations && VT.isVector() && 5717 N0.getValueType().getVectorElementType() == MVT::i1) { 5718 EVT N0VT = N0.getOperand(0).getValueType(); 5719 if (getSetCCResultType(N0VT) == N0.getValueType()) 5720 return SDValue(); 5721 5722 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. 5723 // Only do this before legalize for now. 5724 EVT EltVT = VT.getVectorElementType(); 5725 SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), 5726 DAG.getConstant(1, EltVT)); 5727 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 5728 // We know that the # elements of the results is the same as the 5729 // # elements of the compare (and the # elements of the compare result 5730 // for that matter). Check to see that they are the same size. If so, 5731 // we know that the element size of the sext'd result matches the 5732 // element size of the compare operands. 5733 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5734 DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 5735 N0.getOperand(1), 5736 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 5737 DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, 5738 OneOps)); 5739 5740 // If the desired elements are smaller or larger than the source 5741 // elements we can use a matching integer vector type and then 5742 // truncate/sign extend 5743 EVT MatchingElementType = 5744 EVT::getIntegerVT(*DAG.getContext(), 5745 N0VT.getScalarType().getSizeInBits()); 5746 EVT MatchingVectorType = 5747 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 5748 N0VT.getVectorNumElements()); 5749 SDValue VsetCC = 5750 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 5751 N0.getOperand(1), 5752 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5753 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5754 DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), 5755 DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps)); 5756 } 5757 5758 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 5759 SDValue SCC = 5760 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 5761 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 5762 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 5763 if (SCC.getNode()) return SCC; 5764 } 5765 5766 // (zext (shl (zext x), cst)) -> (shl (zext x), cst) 5767 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && 5768 isa<ConstantSDNode>(N0.getOperand(1)) && 5769 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && 5770 N0.hasOneUse()) { 5771 SDValue ShAmt = N0.getOperand(1); 5772 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 5773 if (N0.getOpcode() == ISD::SHL) { 5774 SDValue InnerZExt = N0.getOperand(0); 5775 // If the original shl may be shifting out bits, do not perform this 5776 // transformation. 5777 unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - 5778 InnerZExt.getOperand(0).getValueType().getSizeInBits(); 5779 if (ShAmtVal > KnownZeroBits) 5780 return SDValue(); 5781 } 5782 5783 SDLoc DL(N); 5784 5785 // Ensure that the shift amount is wide enough for the shifted value. 5786 if (VT.getSizeInBits() >= 256) 5787 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); 5788 5789 return DAG.getNode(N0.getOpcode(), DL, VT, 5790 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), 5791 ShAmt); 5792 } 5793 5794 return SDValue(); 5795} 5796 5797SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { 5798 SDValue N0 = N->getOperand(0); 5799 EVT VT = N->getValueType(0); 5800 5801 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 5802 LegalOperations)) 5803 return SDValue(Res, 0); 5804 5805 // fold (aext (aext x)) -> (aext x) 5806 // fold (aext (zext x)) -> (zext x) 5807 // fold (aext (sext x)) -> (sext x) 5808 if (N0.getOpcode() == ISD::ANY_EXTEND || 5809 N0.getOpcode() == ISD::ZERO_EXTEND || 5810 N0.getOpcode() == ISD::SIGN_EXTEND) 5811 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); 5812 5813 // fold (aext (truncate (load x))) -> (aext (smaller load x)) 5814 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) 5815 if (N0.getOpcode() == ISD::TRUNCATE) { 5816 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5817 if (NarrowLoad.getNode()) { 5818 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5819 if (NarrowLoad.getNode() != N0.getNode()) { 5820 CombineTo(N0.getNode(), NarrowLoad); 5821 // CombineTo deleted the truncate, if needed, but not what's under it. 5822 AddToWorklist(oye); 5823 } 5824 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5825 } 5826 } 5827 5828 // fold (aext (truncate x)) 5829 if (N0.getOpcode() == ISD::TRUNCATE) { 5830 SDValue TruncOp = N0.getOperand(0); 5831 if (TruncOp.getValueType() == VT) 5832 return TruncOp; // x iff x size == zext size. 5833 if (TruncOp.getValueType().bitsGT(VT)) 5834 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp); 5835 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp); 5836 } 5837 5838 // Fold (aext (and (trunc x), cst)) -> (and x, cst) 5839 // if the trunc is not free. 5840 if (N0.getOpcode() == ISD::AND && 5841 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 5842 N0.getOperand(1).getOpcode() == ISD::Constant && 5843 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 5844 N0.getValueType())) { 5845 SDValue X = N0.getOperand(0).getOperand(0); 5846 if (X.getValueType().bitsLT(VT)) { 5847 X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X); 5848 } else if (X.getValueType().bitsGT(VT)) { 5849 X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); 5850 } 5851 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5852 Mask = Mask.zext(VT.getSizeInBits()); 5853 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5854 X, DAG.getConstant(Mask, VT)); 5855 } 5856 5857 // fold (aext (load x)) -> (aext (truncate (extload x))) 5858 // None of the supported targets knows how to perform load and any_ext 5859 // on vectors in one instruction. We only perform this transformation on 5860 // scalars. 5861 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 5862 ISD::isUNINDEXEDLoad(N0.getNode()) && 5863 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 5864 bool DoXform = true; 5865 SmallVector<SDNode*, 4> SetCCs; 5866 if (!N0.hasOneUse()) 5867 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); 5868 if (DoXform) { 5869 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5870 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 5871 LN0->getChain(), 5872 LN0->getBasePtr(), N0.getValueType(), 5873 LN0->getMemOperand()); 5874 CombineTo(N, ExtLoad); 5875 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5876 N0.getValueType(), ExtLoad); 5877 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 5878 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5879 ISD::ANY_EXTEND); 5880 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5881 } 5882 } 5883 5884 // fold (aext (zextload x)) -> (aext (truncate (zextload x))) 5885 // fold (aext (sextload x)) -> (aext (truncate (sextload x))) 5886 // fold (aext ( extload x)) -> (aext (truncate (extload x))) 5887 if (N0.getOpcode() == ISD::LOAD && 5888 !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 5889 N0.hasOneUse()) { 5890 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5891 ISD::LoadExtType ExtType = LN0->getExtensionType(); 5892 EVT MemVT = LN0->getMemoryVT(); 5893 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) { 5894 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), 5895 VT, LN0->getChain(), LN0->getBasePtr(), 5896 MemVT, LN0->getMemOperand()); 5897 CombineTo(N, ExtLoad); 5898 CombineTo(N0.getNode(), 5899 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5900 N0.getValueType(), ExtLoad), 5901 ExtLoad.getValue(1)); 5902 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5903 } 5904 } 5905 5906 if (N0.getOpcode() == ISD::SETCC) { 5907 // For vectors: 5908 // aext(setcc) -> vsetcc 5909 // aext(setcc) -> truncate(vsetcc) 5910 // aext(setcc) -> aext(vsetcc) 5911 // Only do this before legalize for now. 5912 if (VT.isVector() && !LegalOperations) { 5913 EVT N0VT = N0.getOperand(0).getValueType(); 5914 // We know that the # elements of the results is the same as the 5915 // # elements of the compare (and the # elements of the compare result 5916 // for that matter). Check to see that they are the same size. If so, 5917 // we know that the element size of the sext'd result matches the 5918 // element size of the compare operands. 5919 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 5920 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 5921 N0.getOperand(1), 5922 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5923 // If the desired elements are smaller or larger than the source 5924 // elements we can use a matching integer vector type and then 5925 // truncate/any extend 5926 else { 5927 EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); 5928 SDValue VsetCC = 5929 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 5930 N0.getOperand(1), 5931 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5932 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); 5933 } 5934 } 5935 5936 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 5937 SDValue SCC = 5938 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 5939 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 5940 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 5941 if (SCC.getNode()) 5942 return SCC; 5943 } 5944 5945 return SDValue(); 5946} 5947 5948/// See if the specified operand can be simplified with the knowledge that only 5949/// the bits specified by Mask are used. If so, return the simpler operand, 5950/// otherwise return a null SDValue. 5951SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { 5952 switch (V.getOpcode()) { 5953 default: break; 5954 case ISD::Constant: { 5955 const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); 5956 assert(CV && "Const value should be ConstSDNode."); 5957 const APInt &CVal = CV->getAPIntValue(); 5958 APInt NewVal = CVal & Mask; 5959 if (NewVal != CVal) 5960 return DAG.getConstant(NewVal, V.getValueType()); 5961 break; 5962 } 5963 case ISD::OR: 5964 case ISD::XOR: 5965 // If the LHS or RHS don't contribute bits to the or, drop them. 5966 if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) 5967 return V.getOperand(1); 5968 if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) 5969 return V.getOperand(0); 5970 break; 5971 case ISD::SRL: 5972 // Only look at single-use SRLs. 5973 if (!V.getNode()->hasOneUse()) 5974 break; 5975 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { 5976 // See if we can recursively simplify the LHS. 5977 unsigned Amt = RHSC->getZExtValue(); 5978 5979 // Watch out for shift count overflow though. 5980 if (Amt >= Mask.getBitWidth()) break; 5981 APInt NewMask = Mask << Amt; 5982 SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); 5983 if (SimplifyLHS.getNode()) 5984 return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), 5985 SimplifyLHS, V.getOperand(1)); 5986 } 5987 } 5988 return SDValue(); 5989} 5990 5991/// If the result of a wider load is shifted to right of N bits and then 5992/// truncated to a narrower type and where N is a multiple of number of bits of 5993/// the narrower type, transform it to a narrower load from address + N / num of 5994/// bits of new type. If the result is to be extended, also fold the extension 5995/// to form a extending load. 5996SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { 5997 unsigned Opc = N->getOpcode(); 5998 5999 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; 6000 SDValue N0 = N->getOperand(0); 6001 EVT VT = N->getValueType(0); 6002 EVT ExtVT = VT; 6003 6004 // This transformation isn't valid for vector loads. 6005 if (VT.isVector()) 6006 return SDValue(); 6007 6008 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then 6009 // extended to VT. 6010 if (Opc == ISD::SIGN_EXTEND_INREG) { 6011 ExtType = ISD::SEXTLOAD; 6012 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 6013 } else if (Opc == ISD::SRL) { 6014 // Another special-case: SRL is basically zero-extending a narrower value. 6015 ExtType = ISD::ZEXTLOAD; 6016 N0 = SDValue(N, 0); 6017 ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 6018 if (!N01) return SDValue(); 6019 ExtVT = EVT::getIntegerVT(*DAG.getContext(), 6020 VT.getSizeInBits() - N01->getZExtValue()); 6021 } 6022 if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT)) 6023 return SDValue(); 6024 6025 unsigned EVTBits = ExtVT.getSizeInBits(); 6026 6027 // Do not generate loads of non-round integer types since these can 6028 // be expensive (and would be wrong if the type is not byte sized). 6029 if (!ExtVT.isRound()) 6030 return SDValue(); 6031 6032 unsigned ShAmt = 0; 6033 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 6034 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 6035 ShAmt = N01->getZExtValue(); 6036 // Is the shift amount a multiple of size of VT? 6037 if ((ShAmt & (EVTBits-1)) == 0) { 6038 N0 = N0.getOperand(0); 6039 // Is the load width a multiple of size of VT? 6040 if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) 6041 return SDValue(); 6042 } 6043 6044 // At this point, we must have a load or else we can't do the transform. 6045 if (!isa<LoadSDNode>(N0)) return SDValue(); 6046 6047 // Because a SRL must be assumed to *need* to zero-extend the high bits 6048 // (as opposed to anyext the high bits), we can't combine the zextload 6049 // lowering of SRL and an sextload. 6050 if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD) 6051 return SDValue(); 6052 6053 // If the shift amount is larger than the input type then we're not 6054 // accessing any of the loaded bytes. If the load was a zextload/extload 6055 // then the result of the shift+trunc is zero/undef (handled elsewhere). 6056 if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) 6057 return SDValue(); 6058 } 6059 } 6060 6061 // If the load is shifted left (and the result isn't shifted back right), 6062 // we can fold the truncate through the shift. 6063 unsigned ShLeftAmt = 0; 6064 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 6065 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { 6066 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 6067 ShLeftAmt = N01->getZExtValue(); 6068 N0 = N0.getOperand(0); 6069 } 6070 } 6071 6072 // If we haven't found a load, we can't narrow it. Don't transform one with 6073 // multiple uses, this would require adding a new load. 6074 if (!isa<LoadSDNode>(N0) || !N0.hasOneUse()) 6075 return SDValue(); 6076 6077 // Don't change the width of a volatile load. 6078 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6079 if (LN0->isVolatile()) 6080 return SDValue(); 6081 6082 // Verify that we are actually reducing a load width here. 6083 if (LN0->getMemoryVT().getSizeInBits() < EVTBits) 6084 return SDValue(); 6085 6086 // For the transform to be legal, the load must produce only two values 6087 // (the value loaded and the chain). Don't transform a pre-increment 6088 // load, for example, which produces an extra value. Otherwise the 6089 // transformation is not equivalent, and the downstream logic to replace 6090 // uses gets things wrong. 6091 if (LN0->getNumValues() > 2) 6092 return SDValue(); 6093 6094 // If the load that we're shrinking is an extload and we're not just 6095 // discarding the extension we can't simply shrink the load. Bail. 6096 // TODO: It would be possible to merge the extensions in some cases. 6097 if (LN0->getExtensionType() != ISD::NON_EXTLOAD && 6098 LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) 6099 return SDValue(); 6100 6101 if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT)) 6102 return SDValue(); 6103 6104 EVT PtrType = N0.getOperand(1).getValueType(); 6105 6106 if (PtrType == MVT::Untyped || PtrType.isExtended()) 6107 // It's not possible to generate a constant of extended or untyped type. 6108 return SDValue(); 6109 6110 // For big endian targets, we need to adjust the offset to the pointer to 6111 // load the correct bytes. 6112 if (TLI.isBigEndian()) { 6113 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); 6114 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); 6115 ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; 6116 } 6117 6118 uint64_t PtrOff = ShAmt / 8; 6119 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); 6120 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), 6121 PtrType, LN0->getBasePtr(), 6122 DAG.getConstant(PtrOff, PtrType)); 6123 AddToWorklist(NewPtr.getNode()); 6124 6125 SDValue Load; 6126 if (ExtType == ISD::NON_EXTLOAD) 6127 Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, 6128 LN0->getPointerInfo().getWithOffset(PtrOff), 6129 LN0->isVolatile(), LN0->isNonTemporal(), 6130 LN0->isInvariant(), NewAlign, LN0->getAAInfo()); 6131 else 6132 Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, 6133 LN0->getPointerInfo().getWithOffset(PtrOff), 6134 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 6135 LN0->isInvariant(), NewAlign, LN0->getAAInfo()); 6136 6137 // Replace the old load's chain with the new load's chain. 6138 WorklistRemover DeadNodes(*this); 6139 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 6140 6141 // Shift the result left, if we've swallowed a left shift. 6142 SDValue Result = Load; 6143 if (ShLeftAmt != 0) { 6144 EVT ShImmTy = getShiftAmountTy(Result.getValueType()); 6145 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) 6146 ShImmTy = VT; 6147 // If the shift amount is as large as the result size (but, presumably, 6148 // no larger than the source) then the useful bits of the result are 6149 // zero; we can't simply return the shortened shift, because the result 6150 // of that operation is undefined. 6151 if (ShLeftAmt >= VT.getSizeInBits()) 6152 Result = DAG.getConstant(0, VT); 6153 else 6154 Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT, 6155 Result, DAG.getConstant(ShLeftAmt, ShImmTy)); 6156 } 6157 6158 // Return the new loaded value. 6159 return Result; 6160} 6161 6162SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { 6163 SDValue N0 = N->getOperand(0); 6164 SDValue N1 = N->getOperand(1); 6165 EVT VT = N->getValueType(0); 6166 EVT EVT = cast<VTSDNode>(N1)->getVT(); 6167 unsigned VTBits = VT.getScalarType().getSizeInBits(); 6168 unsigned EVTBits = EVT.getScalarType().getSizeInBits(); 6169 6170 // fold (sext_in_reg c1) -> c1 6171 if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) 6172 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); 6173 6174 // If the input is already sign extended, just drop the extension. 6175 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) 6176 return N0; 6177 6178 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 6179 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 6180 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) 6181 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 6182 N0.getOperand(0), N1); 6183 6184 // fold (sext_in_reg (sext x)) -> (sext x) 6185 // fold (sext_in_reg (aext x)) -> (sext x) 6186 // if x is small enough. 6187 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { 6188 SDValue N00 = N0.getOperand(0); 6189 if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && 6190 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 6191 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); 6192 } 6193 6194 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. 6195 if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) 6196 return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT); 6197 6198 // fold operands of sext_in_reg based on knowledge that the top bits are not 6199 // demanded. 6200 if (SimplifyDemandedBits(SDValue(N, 0))) 6201 return SDValue(N, 0); 6202 6203 // fold (sext_in_reg (load x)) -> (smaller sextload x) 6204 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) 6205 SDValue NarrowLoad = ReduceLoadWidth(N); 6206 if (NarrowLoad.getNode()) 6207 return NarrowLoad; 6208 6209 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) 6210 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. 6211 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. 6212 if (N0.getOpcode() == ISD::SRL) { 6213 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 6214 if (ShAmt->getZExtValue()+EVTBits <= VTBits) { 6215 // We can turn this into an SRA iff the input to the SRL is already sign 6216 // extended enough. 6217 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); 6218 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) 6219 return DAG.getNode(ISD::SRA, SDLoc(N), VT, 6220 N0.getOperand(0), N0.getOperand(1)); 6221 } 6222 } 6223 6224 // fold (sext_inreg (extload x)) -> (sextload x) 6225 if (ISD::isEXTLoad(N0.getNode()) && 6226 ISD::isUNINDEXEDLoad(N0.getNode()) && 6227 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 6228 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 6229 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 6230 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6231 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 6232 LN0->getChain(), 6233 LN0->getBasePtr(), EVT, 6234 LN0->getMemOperand()); 6235 CombineTo(N, ExtLoad); 6236 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 6237 AddToWorklist(ExtLoad.getNode()); 6238 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6239 } 6240 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use 6241 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 6242 N0.hasOneUse() && 6243 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 6244 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 6245 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 6246 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6247 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 6248 LN0->getChain(), 6249 LN0->getBasePtr(), EVT, 6250 LN0->getMemOperand()); 6251 CombineTo(N, ExtLoad); 6252 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 6253 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6254 } 6255 6256 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) 6257 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { 6258 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 6259 N0.getOperand(1), false); 6260 if (BSwap.getNode()) 6261 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 6262 BSwap, N1); 6263 } 6264 6265 // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs 6266 // into a build_vector. 6267 if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { 6268 SmallVector<SDValue, 8> Elts; 6269 unsigned NumElts = N0->getNumOperands(); 6270 unsigned ShAmt = VTBits - EVTBits; 6271 6272 for (unsigned i = 0; i != NumElts; ++i) { 6273 SDValue Op = N0->getOperand(i); 6274 if (Op->getOpcode() == ISD::UNDEF) { 6275 Elts.push_back(Op); 6276 continue; 6277 } 6278 6279 ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); 6280 const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); 6281 Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), 6282 Op.getValueType())); 6283 } 6284 6285 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts); 6286 } 6287 6288 return SDValue(); 6289} 6290 6291SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { 6292 SDValue N0 = N->getOperand(0); 6293 EVT VT = N->getValueType(0); 6294 bool isLE = TLI.isLittleEndian(); 6295 6296 // noop truncate 6297 if (N0.getValueType() == N->getValueType(0)) 6298 return N0; 6299 // fold (truncate c1) -> c1 6300 if (isa<ConstantSDNode>(N0)) 6301 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); 6302 // fold (truncate (truncate x)) -> (truncate x) 6303 if (N0.getOpcode() == ISD::TRUNCATE) 6304 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 6305 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x 6306 if (N0.getOpcode() == ISD::ZERO_EXTEND || 6307 N0.getOpcode() == ISD::SIGN_EXTEND || 6308 N0.getOpcode() == ISD::ANY_EXTEND) { 6309 if (N0.getOperand(0).getValueType().bitsLT(VT)) 6310 // if the source is smaller than the dest, we still need an extend 6311 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 6312 N0.getOperand(0)); 6313 if (N0.getOperand(0).getValueType().bitsGT(VT)) 6314 // if the source is larger than the dest, than we just need the truncate 6315 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 6316 // if the source and dest are the same type, we can drop both the extend 6317 // and the truncate. 6318 return N0.getOperand(0); 6319 } 6320 6321 // Fold extract-and-trunc into a narrow extract. For example: 6322 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) 6323 // i32 y = TRUNCATE(i64 x) 6324 // -- becomes -- 6325 // v16i8 b = BITCAST (v2i64 val) 6326 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) 6327 // 6328 // Note: We only run this optimization after type legalization (which often 6329 // creates this pattern) and before operation legalization after which 6330 // we need to be more careful about the vector instructions that we generate. 6331 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 6332 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { 6333 6334 EVT VecTy = N0.getOperand(0).getValueType(); 6335 EVT ExTy = N0.getValueType(); 6336 EVT TrTy = N->getValueType(0); 6337 6338 unsigned NumElem = VecTy.getVectorNumElements(); 6339 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); 6340 6341 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); 6342 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); 6343 6344 SDValue EltNo = N0->getOperand(1); 6345 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { 6346 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 6347 EVT IndexTy = TLI.getVectorIdxTy(); 6348 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); 6349 6350 SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), 6351 NVT, N0.getOperand(0)); 6352 6353 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 6354 SDLoc(N), TrTy, V, 6355 DAG.getConstant(Index, IndexTy)); 6356 } 6357 } 6358 6359 // trunc (select c, a, b) -> select c, (trunc a), (trunc b) 6360 if (N0.getOpcode() == ISD::SELECT) { 6361 EVT SrcVT = N0.getValueType(); 6362 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && 6363 TLI.isTruncateFree(SrcVT, VT)) { 6364 SDLoc SL(N0); 6365 SDValue Cond = N0.getOperand(0); 6366 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); 6367 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); 6368 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); 6369 } 6370 } 6371 6372 // Fold a series of buildvector, bitcast, and truncate if possible. 6373 // For example fold 6374 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to 6375 // (2xi32 (buildvector x, y)). 6376 if (Level == AfterLegalizeVectorOps && VT.isVector() && 6377 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && 6378 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 6379 N0.getOperand(0).hasOneUse()) { 6380 6381 SDValue BuildVect = N0.getOperand(0); 6382 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); 6383 EVT TruncVecEltTy = VT.getVectorElementType(); 6384 6385 // Check that the element types match. 6386 if (BuildVectEltTy == TruncVecEltTy) { 6387 // Now we only need to compute the offset of the truncated elements. 6388 unsigned BuildVecNumElts = BuildVect.getNumOperands(); 6389 unsigned TruncVecNumElts = VT.getVectorNumElements(); 6390 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; 6391 6392 assert((BuildVecNumElts % TruncVecNumElts) == 0 && 6393 "Invalid number of elements"); 6394 6395 SmallVector<SDValue, 8> Opnds; 6396 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) 6397 Opnds.push_back(BuildVect.getOperand(i)); 6398 6399 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); 6400 } 6401 } 6402 6403 // See if we can simplify the input to this truncate through knowledge that 6404 // only the low bits are being used. 6405 // For example "trunc (or (shl x, 8), y)" // -> trunc y 6406 // Currently we only perform this optimization on scalars because vectors 6407 // may have different active low bits. 6408 if (!VT.isVector()) { 6409 SDValue Shorter = 6410 GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), 6411 VT.getSizeInBits())); 6412 if (Shorter.getNode()) 6413 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); 6414 } 6415 // fold (truncate (load x)) -> (smaller load x) 6416 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) 6417 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { 6418 SDValue Reduced = ReduceLoadWidth(N); 6419 if (Reduced.getNode()) 6420 return Reduced; 6421 // Handle the case where the load remains an extending load even 6422 // after truncation. 6423 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { 6424 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6425 if (!LN0->isVolatile() && 6426 LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { 6427 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), 6428 VT, LN0->getChain(), LN0->getBasePtr(), 6429 LN0->getMemoryVT(), 6430 LN0->getMemOperand()); 6431 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); 6432 return NewLoad; 6433 } 6434 } 6435 } 6436 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), 6437 // where ... are all 'undef'. 6438 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { 6439 SmallVector<EVT, 8> VTs; 6440 SDValue V; 6441 unsigned Idx = 0; 6442 unsigned NumDefs = 0; 6443 6444 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 6445 SDValue X = N0.getOperand(i); 6446 if (X.getOpcode() != ISD::UNDEF) { 6447 V = X; 6448 Idx = i; 6449 NumDefs++; 6450 } 6451 // Stop if more than one members are non-undef. 6452 if (NumDefs > 1) 6453 break; 6454 VTs.push_back(EVT::getVectorVT(*DAG.getContext(), 6455 VT.getVectorElementType(), 6456 X.getValueType().getVectorNumElements())); 6457 } 6458 6459 if (NumDefs == 0) 6460 return DAG.getUNDEF(VT); 6461 6462 if (NumDefs == 1) { 6463 assert(V.getNode() && "The single defined operand is empty!"); 6464 SmallVector<SDValue, 8> Opnds; 6465 for (unsigned i = 0, e = VTs.size(); i != e; ++i) { 6466 if (i != Idx) { 6467 Opnds.push_back(DAG.getUNDEF(VTs[i])); 6468 continue; 6469 } 6470 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); 6471 AddToWorklist(NV.getNode()); 6472 Opnds.push_back(NV); 6473 } 6474 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); 6475 } 6476 } 6477 6478 // Simplify the operands using demanded-bits information. 6479 if (!VT.isVector() && 6480 SimplifyDemandedBits(SDValue(N, 0))) 6481 return SDValue(N, 0); 6482 6483 return SDValue(); 6484} 6485 6486static SDNode *getBuildPairElt(SDNode *N, unsigned i) { 6487 SDValue Elt = N->getOperand(i); 6488 if (Elt.getOpcode() != ISD::MERGE_VALUES) 6489 return Elt.getNode(); 6490 return Elt.getOperand(Elt.getResNo()).getNode(); 6491} 6492 6493/// build_pair (load, load) -> load 6494/// if load locations are consecutive. 6495SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { 6496 assert(N->getOpcode() == ISD::BUILD_PAIR); 6497 6498 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); 6499 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); 6500 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || 6501 LD1->getAddressSpace() != LD2->getAddressSpace()) 6502 return SDValue(); 6503 EVT LD1VT = LD1->getValueType(0); 6504 6505 if (ISD::isNON_EXTLoad(LD2) && 6506 LD2->hasOneUse() && 6507 // If both are volatile this would reduce the number of volatile loads. 6508 // If one is volatile it might be ok, but play conservative and bail out. 6509 !LD1->isVolatile() && 6510 !LD2->isVolatile() && 6511 DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { 6512 unsigned Align = LD1->getAlignment(); 6513 unsigned NewAlign = TLI.getDataLayout()-> 6514 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 6515 6516 if (NewAlign <= Align && 6517 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) 6518 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), 6519 LD1->getBasePtr(), LD1->getPointerInfo(), 6520 false, false, false, Align); 6521 } 6522 6523 return SDValue(); 6524} 6525 6526SDValue DAGCombiner::visitBITCAST(SDNode *N) { 6527 SDValue N0 = N->getOperand(0); 6528 EVT VT = N->getValueType(0); 6529 6530 // If the input is a BUILD_VECTOR with all constant elements, fold this now. 6531 // Only do this before legalize, since afterward the target may be depending 6532 // on the bitconvert. 6533 // First check to see if this is all constant. 6534 if (!LegalTypes && 6535 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && 6536 VT.isVector()) { 6537 bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant(); 6538 6539 EVT DestEltVT = N->getValueType(0).getVectorElementType(); 6540 assert(!DestEltVT.isVector() && 6541 "Element type of vector ValueType must not be vector!"); 6542 if (isSimple) 6543 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); 6544 } 6545 6546 // If the input is a constant, let getNode fold it. 6547 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { 6548 // If we can't allow illegal operations, we need to check that this is just 6549 // a fp -> int or int -> conversion and that the resulting operation will 6550 // be legal. 6551 if (!LegalOperations || 6552 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && 6553 TLI.isOperationLegal(ISD::ConstantFP, VT)) || 6554 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && 6555 TLI.isOperationLegal(ISD::Constant, VT))) 6556 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); 6557 } 6558 6559 // (conv (conv x, t1), t2) -> (conv x, t2) 6560 if (N0.getOpcode() == ISD::BITCAST) 6561 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, 6562 N0.getOperand(0)); 6563 6564 // fold (conv (load x)) -> (load (conv*)x) 6565 // If the resultant load doesn't need a higher alignment than the original! 6566 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 6567 // Do not change the width of a volatile load. 6568 !cast<LoadSDNode>(N0)->isVolatile() && 6569 // Do not remove the cast if the types differ in endian layout. 6570 TLI.hasBigEndianPartOrdering(N0.getValueType()) == 6571 TLI.hasBigEndianPartOrdering(VT) && 6572 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && 6573 TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { 6574 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6575 unsigned Align = TLI.getDataLayout()-> 6576 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 6577 unsigned OrigAlign = LN0->getAlignment(); 6578 6579 if (Align <= OrigAlign) { 6580 SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), 6581 LN0->getBasePtr(), LN0->getPointerInfo(), 6582 LN0->isVolatile(), LN0->isNonTemporal(), 6583 LN0->isInvariant(), OrigAlign, 6584 LN0->getAAInfo()); 6585 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 6586 return Load; 6587 } 6588 } 6589 6590 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 6591 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 6592 // This often reduces constant pool loads. 6593 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || 6594 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && 6595 N0.getNode()->hasOneUse() && VT.isInteger() && 6596 !VT.isVector() && !N0.getValueType().isVector()) { 6597 SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, 6598 N0.getOperand(0)); 6599 AddToWorklist(NewConv.getNode()); 6600 6601 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 6602 if (N0.getOpcode() == ISD::FNEG) 6603 return DAG.getNode(ISD::XOR, SDLoc(N), VT, 6604 NewConv, DAG.getConstant(SignBit, VT)); 6605 assert(N0.getOpcode() == ISD::FABS); 6606 return DAG.getNode(ISD::AND, SDLoc(N), VT, 6607 NewConv, DAG.getConstant(~SignBit, VT)); 6608 } 6609 6610 // fold (bitconvert (fcopysign cst, x)) -> 6611 // (or (and (bitconvert x), sign), (and cst, (not sign))) 6612 // Note that we don't handle (copysign x, cst) because this can always be 6613 // folded to an fneg or fabs. 6614 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && 6615 isa<ConstantFPSDNode>(N0.getOperand(0)) && 6616 VT.isInteger() && !VT.isVector()) { 6617 unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); 6618 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); 6619 if (isTypeLegal(IntXVT)) { 6620 SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), 6621 IntXVT, N0.getOperand(1)); 6622 AddToWorklist(X.getNode()); 6623 6624 // If X has a different width than the result/lhs, sext it or truncate it. 6625 unsigned VTWidth = VT.getSizeInBits(); 6626 if (OrigXWidth < VTWidth) { 6627 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); 6628 AddToWorklist(X.getNode()); 6629 } else if (OrigXWidth > VTWidth) { 6630 // To get the sign bit in the right place, we have to shift it right 6631 // before truncating. 6632 X = DAG.getNode(ISD::SRL, SDLoc(X), 6633 X.getValueType(), X, 6634 DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); 6635 AddToWorklist(X.getNode()); 6636 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 6637 AddToWorklist(X.getNode()); 6638 } 6639 6640 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 6641 X = DAG.getNode(ISD::AND, SDLoc(X), VT, 6642 X, DAG.getConstant(SignBit, VT)); 6643 AddToWorklist(X.getNode()); 6644 6645 SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), 6646 VT, N0.getOperand(0)); 6647 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, 6648 Cst, DAG.getConstant(~SignBit, VT)); 6649 AddToWorklist(Cst.getNode()); 6650 6651 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); 6652 } 6653 } 6654 6655 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. 6656 if (N0.getOpcode() == ISD::BUILD_PAIR) { 6657 SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); 6658 if (CombineLD.getNode()) 6659 return CombineLD; 6660 } 6661 6662 return SDValue(); 6663} 6664 6665SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { 6666 EVT VT = N->getValueType(0); 6667 return CombineConsecutiveLoads(N, VT); 6668} 6669 6670/// We know that BV is a build_vector node with Constant, ConstantFP or Undef 6671/// operands. DstEltVT indicates the destination element value type. 6672SDValue DAGCombiner:: 6673ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { 6674 EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 6675 6676 // If this is already the right type, we're done. 6677 if (SrcEltVT == DstEltVT) return SDValue(BV, 0); 6678 6679 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 6680 unsigned DstBitSize = DstEltVT.getSizeInBits(); 6681 6682 // If this is a conversion of N elements of one type to N elements of another 6683 // type, convert each element. This handles FP<->INT cases. 6684 if (SrcBitSize == DstBitSize) { 6685 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 6686 BV->getValueType(0).getVectorNumElements()); 6687 6688 // Due to the FP element handling below calling this routine recursively, 6689 // we can end up with a scalar-to-vector node here. 6690 if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) 6691 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, 6692 DAG.getNode(ISD::BITCAST, SDLoc(BV), 6693 DstEltVT, BV->getOperand(0))); 6694 6695 SmallVector<SDValue, 8> Ops; 6696 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 6697 SDValue Op = BV->getOperand(i); 6698 // If the vector element type is not legal, the BUILD_VECTOR operands 6699 // are promoted and implicitly truncated. Make that explicit here. 6700 if (Op.getValueType() != SrcEltVT) 6701 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); 6702 Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), 6703 DstEltVT, Op)); 6704 AddToWorklist(Ops.back().getNode()); 6705 } 6706 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 6707 } 6708 6709 // Otherwise, we're growing or shrinking the elements. To avoid having to 6710 // handle annoying details of growing/shrinking FP values, we convert them to 6711 // int first. 6712 if (SrcEltVT.isFloatingPoint()) { 6713 // Convert the input float vector to a int vector where the elements are the 6714 // same sizes. 6715 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); 6716 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); 6717 SrcEltVT = IntVT; 6718 } 6719 6720 // Now we know the input is an integer vector. If the output is a FP type, 6721 // convert to integer first, then to FP of the right size. 6722 if (DstEltVT.isFloatingPoint()) { 6723 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); 6724 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); 6725 6726 // Next, convert to FP elements of the same size. 6727 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); 6728 } 6729 6730 // Okay, we know the src/dst types are both integers of differing types. 6731 // Handling growing first. 6732 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 6733 if (SrcBitSize < DstBitSize) { 6734 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; 6735 6736 SmallVector<SDValue, 8> Ops; 6737 for (unsigned i = 0, e = BV->getNumOperands(); i != e; 6738 i += NumInputsPerOutput) { 6739 bool isLE = TLI.isLittleEndian(); 6740 APInt NewBits = APInt(DstBitSize, 0); 6741 bool EltIsUndef = true; 6742 for (unsigned j = 0; j != NumInputsPerOutput; ++j) { 6743 // Shift the previously computed bits over. 6744 NewBits <<= SrcBitSize; 6745 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); 6746 if (Op.getOpcode() == ISD::UNDEF) continue; 6747 EltIsUndef = false; 6748 6749 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). 6750 zextOrTrunc(SrcBitSize).zext(DstBitSize); 6751 } 6752 6753 if (EltIsUndef) 6754 Ops.push_back(DAG.getUNDEF(DstEltVT)); 6755 else 6756 Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); 6757 } 6758 6759 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); 6760 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 6761 } 6762 6763 // Finally, this must be the case where we are shrinking elements: each input 6764 // turns into multiple outputs. 6765 bool isS2V = ISD::isScalarToVector(BV); 6766 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; 6767 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 6768 NumOutputsPerInput*BV->getNumOperands()); 6769 SmallVector<SDValue, 8> Ops; 6770 6771 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 6772 if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { 6773 for (unsigned j = 0; j != NumOutputsPerInput; ++j) 6774 Ops.push_back(DAG.getUNDEF(DstEltVT)); 6775 continue; 6776 } 6777 6778 APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))-> 6779 getAPIntValue().zextOrTrunc(SrcBitSize); 6780 6781 for (unsigned j = 0; j != NumOutputsPerInput; ++j) { 6782 APInt ThisVal = OpVal.trunc(DstBitSize); 6783 Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); 6784 if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) 6785 // Simply turn this into a SCALAR_TO_VECTOR of the new type. 6786 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, 6787 Ops[0]); 6788 OpVal = OpVal.lshr(DstBitSize); 6789 } 6790 6791 // For big endian targets, swap the order of the pieces of each element. 6792 if (TLI.isBigEndian()) 6793 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); 6794 } 6795 6796 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 6797} 6798 6799SDValue DAGCombiner::visitFADD(SDNode *N) { 6800 SDValue N0 = N->getOperand(0); 6801 SDValue N1 = N->getOperand(1); 6802 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6803 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6804 EVT VT = N->getValueType(0); 6805 const TargetOptions &Options = DAG.getTarget().Options; 6806 6807 // fold vector ops 6808 if (VT.isVector()) { 6809 SDValue FoldedVOp = SimplifyVBinOp(N); 6810 if (FoldedVOp.getNode()) return FoldedVOp; 6811 } 6812 6813 // fold (fadd c1, c2) -> c1 + c2 6814 if (N0CFP && N1CFP) 6815 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1); 6816 6817 // canonicalize constant to RHS 6818 if (N0CFP && !N1CFP) 6819 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); 6820 6821 // fold (fadd A, (fneg B)) -> (fsub A, B) 6822 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 6823 isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) 6824 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, 6825 GetNegatedExpression(N1, DAG, LegalOperations)); 6826 6827 // fold (fadd (fneg A), B) -> (fsub B, A) 6828 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 6829 isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) 6830 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1, 6831 GetNegatedExpression(N0, DAG, LegalOperations)); 6832 6833 // If 'unsafe math' is enabled, fold lots of things. 6834 if (Options.UnsafeFPMath) { 6835 // No FP constant should be created after legalization as Instruction 6836 // Selection pass has a hard time dealing with FP constants. 6837 bool AllowNewConst = (Level < AfterLegalizeDAG); 6838 6839 // fold (fadd A, 0) -> A 6840 if (N1CFP && N1CFP->getValueAPF().isZero()) 6841 return N0; 6842 6843 // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) 6844 if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && 6845 isa<ConstantFPSDNode>(N0.getOperand(1))) 6846 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), 6847 DAG.getNode(ISD::FADD, SDLoc(N), VT, 6848 N0.getOperand(1), N1)); 6849 6850 // If allowed, fold (fadd (fneg x), x) -> 0.0 6851 if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) 6852 return DAG.getConstantFP(0.0, VT); 6853 6854 // If allowed, fold (fadd x, (fneg x)) -> 0.0 6855 if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) 6856 return DAG.getConstantFP(0.0, VT); 6857 6858 // We can fold chains of FADD's of the same value into multiplications. 6859 // This transform is not safe in general because we are reducing the number 6860 // of rounding steps. 6861 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { 6862 if (N0.getOpcode() == ISD::FMUL) { 6863 ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); 6864 ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 6865 6866 // (fadd (fmul x, c), x) -> (fmul x, c+1) 6867 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { 6868 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6869 SDValue(CFP01, 0), 6870 DAG.getConstantFP(1.0, VT)); 6871 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); 6872 } 6873 6874 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) 6875 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && 6876 N1.getOperand(0) == N1.getOperand(1) && 6877 N0.getOperand(0) == N1.getOperand(0)) { 6878 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6879 SDValue(CFP01, 0), 6880 DAG.getConstantFP(2.0, VT)); 6881 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6882 N0.getOperand(0), NewCFP); 6883 } 6884 } 6885 6886 if (N1.getOpcode() == ISD::FMUL) { 6887 ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); 6888 ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); 6889 6890 // (fadd x, (fmul x, c)) -> (fmul x, c+1) 6891 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { 6892 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6893 SDValue(CFP11, 0), 6894 DAG.getConstantFP(1.0, VT)); 6895 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); 6896 } 6897 6898 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) 6899 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && 6900 N0.getOperand(0) == N0.getOperand(1) && 6901 N1.getOperand(0) == N0.getOperand(0)) { 6902 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6903 SDValue(CFP11, 0), 6904 DAG.getConstantFP(2.0, VT)); 6905 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP); 6906 } 6907 } 6908 6909 if (N0.getOpcode() == ISD::FADD && AllowNewConst) { 6910 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); 6911 // (fadd (fadd x, x), x) -> (fmul x, 3.0) 6912 if (!CFP && N0.getOperand(0) == N0.getOperand(1) && 6913 (N0.getOperand(0) == N1)) 6914 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6915 N1, DAG.getConstantFP(3.0, VT)); 6916 } 6917 6918 if (N1.getOpcode() == ISD::FADD && AllowNewConst) { 6919 ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); 6920 // (fadd x, (fadd x, x)) -> (fmul x, 3.0) 6921 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && 6922 N1.getOperand(0) == N0) 6923 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6924 N0, DAG.getConstantFP(3.0, VT)); 6925 } 6926 6927 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) 6928 if (AllowNewConst && 6929 N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && 6930 N0.getOperand(0) == N0.getOperand(1) && 6931 N1.getOperand(0) == N1.getOperand(1) && 6932 N0.getOperand(0) == N1.getOperand(0)) 6933 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6934 N0.getOperand(0), DAG.getConstantFP(4.0, VT)); 6935 } 6936 } // enable-unsafe-fp-math 6937 6938 // FADD -> FMA combines: 6939 if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && 6940 TLI.isFMAFasterThanFMulAndFAdd(VT) && 6941 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { 6942 6943 // fold (fadd (fmul x, y), z) -> (fma x, y, z) 6944 if (N0.getOpcode() == ISD::FMUL && 6945 (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) 6946 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6947 N0.getOperand(0), N0.getOperand(1), N1); 6948 6949 // fold (fadd x, (fmul y, z)) -> (fma y, z, x) 6950 // Note: Commutes FADD operands. 6951 if (N1.getOpcode() == ISD::FMUL && 6952 (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) 6953 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6954 N1.getOperand(0), N1.getOperand(1), N0); 6955 6956 // When FP_EXTEND nodes are free on the target, and there is an opportunity 6957 // to combine into FMA, arrange such nodes accordingly. 6958 if (TLI.isFPExtFree(VT)) { 6959 6960 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) 6961 if (N0.getOpcode() == ISD::FP_EXTEND) { 6962 SDValue N00 = N0.getOperand(0); 6963 if (N00.getOpcode() == ISD::FMUL) 6964 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6965 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 6966 N00.getOperand(0)), 6967 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 6968 N00.getOperand(1)), N1); 6969 } 6970 6971 // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x) 6972 // Note: Commutes FADD operands. 6973 if (N1.getOpcode() == ISD::FP_EXTEND) { 6974 SDValue N10 = N1.getOperand(0); 6975 if (N10.getOpcode() == ISD::FMUL) 6976 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6977 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 6978 N10.getOperand(0)), 6979 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 6980 N10.getOperand(1)), N0); 6981 } 6982 } 6983 6984 // More folding opportunities when target permits. 6985 if (TLI.enableAggressiveFMAFusion(VT)) { 6986 6987 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) 6988 if (N0.getOpcode() == ISD::FMA && 6989 N0.getOperand(2).getOpcode() == ISD::FMUL) 6990 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6991 N0.getOperand(0), N0.getOperand(1), 6992 DAG.getNode(ISD::FMA, SDLoc(N), VT, 6993 N0.getOperand(2).getOperand(0), 6994 N0.getOperand(2).getOperand(1), 6995 N1)); 6996 6997 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) 6998 if (N1->getOpcode() == ISD::FMA && 6999 N1.getOperand(2).getOpcode() == ISD::FMUL) 7000 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7001 N1.getOperand(0), N1.getOperand(1), 7002 DAG.getNode(ISD::FMA, SDLoc(N), VT, 7003 N1.getOperand(2).getOperand(0), 7004 N1.getOperand(2).getOperand(1), 7005 N0)); 7006 } 7007 } 7008 7009 return SDValue(); 7010} 7011 7012SDValue DAGCombiner::visitFSUB(SDNode *N) { 7013 SDValue N0 = N->getOperand(0); 7014 SDValue N1 = N->getOperand(1); 7015 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); 7016 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); 7017 EVT VT = N->getValueType(0); 7018 SDLoc dl(N); 7019 const TargetOptions &Options = DAG.getTarget().Options; 7020 7021 // fold vector ops 7022 if (VT.isVector()) { 7023 SDValue FoldedVOp = SimplifyVBinOp(N); 7024 if (FoldedVOp.getNode()) return FoldedVOp; 7025 } 7026 7027 // fold (fsub c1, c2) -> c1-c2 7028 if (N0CFP && N1CFP) 7029 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); 7030 7031 // fold (fsub A, (fneg B)) -> (fadd A, B) 7032 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 7033 return DAG.getNode(ISD::FADD, dl, VT, N0, 7034 GetNegatedExpression(N1, DAG, LegalOperations)); 7035 7036 // If 'unsafe math' is enabled, fold lots of things. 7037 if (Options.UnsafeFPMath) { 7038 // (fsub A, 0) -> A 7039 if (N1CFP && N1CFP->getValueAPF().isZero()) 7040 return N0; 7041 7042 // (fsub 0, B) -> -B 7043 if (N0CFP && N0CFP->getValueAPF().isZero()) { 7044 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 7045 return GetNegatedExpression(N1, DAG, LegalOperations); 7046 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 7047 return DAG.getNode(ISD::FNEG, dl, VT, N1); 7048 } 7049 7050 // (fsub x, x) -> 0.0 7051 if (N0 == N1) 7052 return DAG.getConstantFP(0.0f, VT); 7053 7054 // (fsub x, (fadd x, y)) -> (fneg y) 7055 // (fsub x, (fadd y, x)) -> (fneg y) 7056 if (N1.getOpcode() == ISD::FADD) { 7057 SDValue N10 = N1->getOperand(0); 7058 SDValue N11 = N1->getOperand(1); 7059 7060 if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options)) 7061 return GetNegatedExpression(N11, DAG, LegalOperations); 7062 7063 if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options)) 7064 return GetNegatedExpression(N10, DAG, LegalOperations); 7065 } 7066 } 7067 7068 // FSUB -> FMA combines: 7069 if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && 7070 TLI.isFMAFasterThanFMulAndFAdd(VT) && 7071 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { 7072 7073 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 7074 if (N0.getOpcode() == ISD::FMUL && 7075 (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) 7076 return DAG.getNode(ISD::FMA, dl, VT, 7077 N0.getOperand(0), N0.getOperand(1), 7078 DAG.getNode(ISD::FNEG, dl, VT, N1)); 7079 7080 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 7081 // Note: Commutes FSUB operands. 7082 if (N1.getOpcode() == ISD::FMUL && 7083 (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) 7084 return DAG.getNode(ISD::FMA, dl, VT, 7085 DAG.getNode(ISD::FNEG, dl, VT, 7086 N1.getOperand(0)), 7087 N1.getOperand(1), N0); 7088 7089 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) 7090 if (N0.getOpcode() == ISD::FNEG && 7091 N0.getOperand(0).getOpcode() == ISD::FMUL && 7092 ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) || 7093 TLI.enableAggressiveFMAFusion(VT))) { 7094 SDValue N00 = N0.getOperand(0).getOperand(0); 7095 SDValue N01 = N0.getOperand(0).getOperand(1); 7096 return DAG.getNode(ISD::FMA, dl, VT, 7097 DAG.getNode(ISD::FNEG, dl, VT, N00), N01, 7098 DAG.getNode(ISD::FNEG, dl, VT, N1)); 7099 } 7100 7101 // When FP_EXTEND nodes are free on the target, and there is an opportunity 7102 // to combine into FMA, arrange such nodes accordingly. 7103 if (TLI.isFPExtFree(VT)) { 7104 7105 // fold (fsub (fpext (fmul x, y)), z) 7106 // -> (fma (fpext x), (fpext y), (fneg z)) 7107 if (N0.getOpcode() == ISD::FP_EXTEND) { 7108 SDValue N00 = N0.getOperand(0); 7109 if (N00.getOpcode() == ISD::FMUL) 7110 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7111 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7112 N00.getOperand(0)), 7113 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7114 N00.getOperand(1)), 7115 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1)); 7116 } 7117 7118 // fold (fsub x, (fpext (fmul y, z))) 7119 // -> (fma (fneg (fpext y)), (fpext z), x) 7120 // Note: Commutes FSUB operands. 7121 if (N1.getOpcode() == ISD::FP_EXTEND) { 7122 SDValue N10 = N1.getOperand(0); 7123 if (N10.getOpcode() == ISD::FMUL) 7124 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7125 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7126 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), 7127 VT, N10.getOperand(0))), 7128 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7129 N10.getOperand(1)), 7130 N0); 7131 } 7132 7133 // fold (fsub (fpext (fneg (fmul, x, y))), z) 7134 // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) 7135 if (N0.getOpcode() == ISD::FP_EXTEND) { 7136 SDValue N00 = N0.getOperand(0); 7137 if (N00.getOpcode() == ISD::FNEG) { 7138 SDValue N000 = N00.getOperand(0); 7139 if (N000.getOpcode() == ISD::FMUL) { 7140 return DAG.getNode(ISD::FMA, dl, VT, 7141 DAG.getNode(ISD::FNEG, dl, VT, 7142 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), 7143 VT, N000.getOperand(0))), 7144 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7145 N000.getOperand(1)), 7146 DAG.getNode(ISD::FNEG, dl, VT, N1)); 7147 } 7148 } 7149 } 7150 7151 // fold (fsub (fneg (fpext (fmul, x, y))), z) 7152 // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) 7153 if (N0.getOpcode() == ISD::FNEG) { 7154 SDValue N00 = N0.getOperand(0); 7155 if (N00.getOpcode() == ISD::FP_EXTEND) { 7156 SDValue N000 = N00.getOperand(0); 7157 if (N000.getOpcode() == ISD::FMUL) { 7158 return DAG.getNode(ISD::FMA, dl, VT, 7159 DAG.getNode(ISD::FNEG, dl, VT, 7160 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), 7161 VT, N000.getOperand(0))), 7162 DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, 7163 N000.getOperand(1)), 7164 DAG.getNode(ISD::FNEG, dl, VT, N1)); 7165 } 7166 } 7167 } 7168 } 7169 7170 // More folding opportunities when target permits. 7171 if (TLI.enableAggressiveFMAFusion(VT)) { 7172 7173 // fold (fsub (fma x, y, (fmul u, v)), z) 7174 // -> (fma x, y (fma u, v, (fneg z))) 7175 if (N0.getOpcode() == ISD::FMA && 7176 N0.getOperand(2).getOpcode() == ISD::FMUL) 7177 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7178 N0.getOperand(0), N0.getOperand(1), 7179 DAG.getNode(ISD::FMA, SDLoc(N), VT, 7180 N0.getOperand(2).getOperand(0), 7181 N0.getOperand(2).getOperand(1), 7182 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7183 N1))); 7184 7185 // fold (fsub x, (fma y, z, (fmul u, v))) 7186 // -> (fma (fneg y), z, (fma (fneg u), v, x)) 7187 if (N1.getOpcode() == ISD::FMA && 7188 N1.getOperand(2).getOpcode() == ISD::FMUL) { 7189 SDValue N20 = N1.getOperand(2).getOperand(0); 7190 SDValue N21 = N1.getOperand(2).getOperand(1); 7191 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 7192 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7193 N1.getOperand(0)), 7194 N1.getOperand(1), 7195 DAG.getNode(ISD::FMA, SDLoc(N), VT, 7196 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7197 N20), 7198 N21, N0)); 7199 } 7200 } 7201 } 7202 7203 return SDValue(); 7204} 7205 7206SDValue DAGCombiner::visitFMUL(SDNode *N) { 7207 SDValue N0 = N->getOperand(0); 7208 SDValue N1 = N->getOperand(1); 7209 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); 7210 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); 7211 EVT VT = N->getValueType(0); 7212 const TargetOptions &Options = DAG.getTarget().Options; 7213 7214 // fold vector ops 7215 if (VT.isVector()) { 7216 // This just handles C1 * C2 for vectors. Other vector folds are below. 7217 SDValue FoldedVOp = SimplifyVBinOp(N); 7218 if (FoldedVOp.getNode()) 7219 return FoldedVOp; 7220 // Canonicalize vector constant to RHS. 7221 if (N0.getOpcode() == ISD::BUILD_VECTOR && 7222 N1.getOpcode() != ISD::BUILD_VECTOR) 7223 if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0)) 7224 if (BV0->isConstant()) 7225 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); 7226 } 7227 7228 // fold (fmul c1, c2) -> c1*c2 7229 if (N0CFP && N1CFP) 7230 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); 7231 7232 // canonicalize constant to RHS 7233 if (N0CFP && !N1CFP) 7234 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); 7235 7236 // fold (fmul A, 1.0) -> A 7237 if (N1CFP && N1CFP->isExactlyValue(1.0)) 7238 return N0; 7239 7240 if (Options.UnsafeFPMath) { 7241 // fold (fmul A, 0) -> 0 7242 if (N1CFP && N1CFP->getValueAPF().isZero()) 7243 return N1; 7244 7245 // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) 7246 if (N0.getOpcode() == ISD::FMUL) { 7247 // Fold scalars or any vector constants (not just splats). 7248 // This fold is done in general by InstCombine, but extra fmul insts 7249 // may have been generated during lowering. 7250 SDValue N01 = N0.getOperand(1); 7251 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); 7252 auto *BV01 = dyn_cast<BuildVectorSDNode>(N01); 7253 if ((N1CFP && isConstOrConstSplatFP(N01)) || 7254 (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { 7255 SDLoc SL(N); 7256 SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1); 7257 return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts); 7258 } 7259 } 7260 7261 // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) 7262 // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs 7263 // during an early run of DAGCombiner can prevent folding with fmuls 7264 // inserted during lowering. 7265 if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { 7266 SDLoc SL(N); 7267 const SDValue Two = DAG.getConstantFP(2.0, VT); 7268 SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1); 7269 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts); 7270 } 7271 } 7272 7273 // fold (fmul X, 2.0) -> (fadd X, X) 7274 if (N1CFP && N1CFP->isExactlyValue(+2.0)) 7275 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); 7276 7277 // fold (fmul X, -1.0) -> (fneg X) 7278 if (N1CFP && N1CFP->isExactlyValue(-1.0)) 7279 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 7280 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); 7281 7282 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) 7283 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 7284 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 7285 // Both can be negated for free, check to see if at least one is cheaper 7286 // negated. 7287 if (LHSNeg == 2 || RHSNeg == 2) 7288 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 7289 GetNegatedExpression(N0, DAG, LegalOperations), 7290 GetNegatedExpression(N1, DAG, LegalOperations)); 7291 } 7292 } 7293 7294 return SDValue(); 7295} 7296 7297SDValue DAGCombiner::visitFMA(SDNode *N) { 7298 SDValue N0 = N->getOperand(0); 7299 SDValue N1 = N->getOperand(1); 7300 SDValue N2 = N->getOperand(2); 7301 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7302 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7303 EVT VT = N->getValueType(0); 7304 SDLoc dl(N); 7305 const TargetOptions &Options = DAG.getTarget().Options; 7306 7307 // Constant fold FMA. 7308 if (isa<ConstantFPSDNode>(N0) && 7309 isa<ConstantFPSDNode>(N1) && 7310 isa<ConstantFPSDNode>(N2)) { 7311 return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2); 7312 } 7313 7314 if (Options.UnsafeFPMath) { 7315 if (N0CFP && N0CFP->isZero()) 7316 return N2; 7317 if (N1CFP && N1CFP->isZero()) 7318 return N2; 7319 } 7320 if (N0CFP && N0CFP->isExactlyValue(1.0)) 7321 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); 7322 if (N1CFP && N1CFP->isExactlyValue(1.0)) 7323 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); 7324 7325 // Canonicalize (fma c, x, y) -> (fma x, c, y) 7326 if (N0CFP && !N1CFP) 7327 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); 7328 7329 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 7330 if (Options.UnsafeFPMath && N1CFP && 7331 N2.getOpcode() == ISD::FMUL && 7332 N0 == N2.getOperand(0) && 7333 N2.getOperand(1).getOpcode() == ISD::ConstantFP) { 7334 return DAG.getNode(ISD::FMUL, dl, VT, N0, 7335 DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); 7336 } 7337 7338 7339 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 7340 if (Options.UnsafeFPMath && 7341 N0.getOpcode() == ISD::FMUL && N1CFP && 7342 N0.getOperand(1).getOpcode() == ISD::ConstantFP) { 7343 return DAG.getNode(ISD::FMA, dl, VT, 7344 N0.getOperand(0), 7345 DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), 7346 N2); 7347 } 7348 7349 // (fma x, 1, y) -> (fadd x, y) 7350 // (fma x, -1, y) -> (fadd (fneg x), y) 7351 if (N1CFP) { 7352 if (N1CFP->isExactlyValue(1.0)) 7353 return DAG.getNode(ISD::FADD, dl, VT, N0, N2); 7354 7355 if (N1CFP->isExactlyValue(-1.0) && 7356 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { 7357 SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); 7358 AddToWorklist(RHSNeg.getNode()); 7359 return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); 7360 } 7361 } 7362 7363 // (fma x, c, x) -> (fmul x, (c+1)) 7364 if (Options.UnsafeFPMath && N1CFP && N0 == N2) 7365 return DAG.getNode(ISD::FMUL, dl, VT, N0, 7366 DAG.getNode(ISD::FADD, dl, VT, 7367 N1, DAG.getConstantFP(1.0, VT))); 7368 7369 // (fma x, c, (fneg x)) -> (fmul x, (c-1)) 7370 if (Options.UnsafeFPMath && N1CFP && 7371 N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) 7372 return DAG.getNode(ISD::FMUL, dl, VT, N0, 7373 DAG.getNode(ISD::FADD, dl, VT, 7374 N1, DAG.getConstantFP(-1.0, VT))); 7375 7376 7377 return SDValue(); 7378} 7379 7380SDValue DAGCombiner::visitFDIV(SDNode *N) { 7381 SDValue N0 = N->getOperand(0); 7382 SDValue N1 = N->getOperand(1); 7383 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7384 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7385 EVT VT = N->getValueType(0); 7386 SDLoc DL(N); 7387 const TargetOptions &Options = DAG.getTarget().Options; 7388 7389 // fold vector ops 7390 if (VT.isVector()) { 7391 SDValue FoldedVOp = SimplifyVBinOp(N); 7392 if (FoldedVOp.getNode()) return FoldedVOp; 7393 } 7394 7395 // fold (fdiv c1, c2) -> c1/c2 7396 if (N0CFP && N1CFP) 7397 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); 7398 7399 if (Options.UnsafeFPMath) { 7400 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. 7401 if (N1CFP) { 7402 // Compute the reciprocal 1.0 / c2. 7403 APFloat N1APF = N1CFP->getValueAPF(); 7404 APFloat Recip(N1APF.getSemantics(), 1); // 1.0 7405 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); 7406 // Only do the transform if the reciprocal is a legal fp immediate that 7407 // isn't too nasty (eg NaN, denormal, ...). 7408 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty 7409 (!LegalOperations || 7410 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM 7411 // backend)... we should handle this gracefully after Legalize. 7412 // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || 7413 TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || 7414 TLI.isFPImmLegal(Recip, VT))) 7415 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, 7416 DAG.getConstantFP(Recip, VT)); 7417 } 7418 7419 // If this FDIV is part of a reciprocal square root, it may be folded 7420 // into a target-specific square root estimate instruction. 7421 if (N1.getOpcode() == ISD::FSQRT) { 7422 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) { 7423 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7424 } 7425 } else if (N1.getOpcode() == ISD::FP_EXTEND && 7426 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 7427 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { 7428 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); 7429 AddToWorklist(RV.getNode()); 7430 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7431 } 7432 } else if (N1.getOpcode() == ISD::FP_ROUND && 7433 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 7434 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { 7435 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); 7436 AddToWorklist(RV.getNode()); 7437 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7438 } 7439 } else if (N1.getOpcode() == ISD::FMUL) { 7440 // Look through an FMUL. Even though this won't remove the FDIV directly, 7441 // it's still worthwhile to get rid of the FSQRT if possible. 7442 SDValue SqrtOp; 7443 SDValue OtherOp; 7444 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { 7445 SqrtOp = N1.getOperand(0); 7446 OtherOp = N1.getOperand(1); 7447 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { 7448 SqrtOp = N1.getOperand(1); 7449 OtherOp = N1.getOperand(0); 7450 } 7451 if (SqrtOp.getNode()) { 7452 // We found a FSQRT, so try to make this fold: 7453 // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) 7454 if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) { 7455 RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp); 7456 AddToWorklist(RV.getNode()); 7457 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7458 } 7459 } 7460 } 7461 7462 // Fold into a reciprocal estimate and multiply instead of a real divide. 7463 if (SDValue RV = BuildReciprocalEstimate(N1)) { 7464 AddToWorklist(RV.getNode()); 7465 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 7466 } 7467 } 7468 7469 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) 7470 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 7471 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 7472 // Both can be negated for free, check to see if at least one is cheaper 7473 // negated. 7474 if (LHSNeg == 2 || RHSNeg == 2) 7475 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, 7476 GetNegatedExpression(N0, DAG, LegalOperations), 7477 GetNegatedExpression(N1, DAG, LegalOperations)); 7478 } 7479 } 7480 7481 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 7482 // reciprocal. 7483 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) 7484 // Notice that this is not always beneficial. One reason is different target 7485 // may have different costs for FDIV and FMUL, so sometimes the cost of two 7486 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason 7487 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". 7488 if (Options.UnsafeFPMath) { 7489 // Skip if current node is a reciprocal. 7490 if (N0CFP && N0CFP->isExactlyValue(1.0)) 7491 return SDValue(); 7492 7493 SmallVector<SDNode *, 4> Users; 7494 // Find all FDIV users of the same divisor. 7495 for (SDNode::use_iterator UI = N1.getNode()->use_begin(), 7496 UE = N1.getNode()->use_end(); 7497 UI != UE; ++UI) { 7498 SDNode *User = UI.getUse().getUser(); 7499 if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1) 7500 Users.push_back(User); 7501 } 7502 7503 if (TLI.combineRepeatedFPDivisors(Users.size())) { 7504 SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0 7505 SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1); 7506 7507 // Dividend / Divisor -> Dividend * Reciprocal 7508 for (auto I = Users.begin(), E = Users.end(); I != E; ++I) { 7509 if ((*I)->getOperand(0) != FPOne) { 7510 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT, 7511 (*I)->getOperand(0), Reciprocal); 7512 DAG.ReplaceAllUsesWith(*I, NewNode.getNode()); 7513 } 7514 } 7515 return SDValue(); 7516 } 7517 } 7518 7519 return SDValue(); 7520} 7521 7522SDValue DAGCombiner::visitFREM(SDNode *N) { 7523 SDValue N0 = N->getOperand(0); 7524 SDValue N1 = N->getOperand(1); 7525 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7526 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7527 EVT VT = N->getValueType(0); 7528 7529 // fold (frem c1, c2) -> fmod(c1,c2) 7530 if (N0CFP && N1CFP) 7531 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); 7532 7533 return SDValue(); 7534} 7535 7536SDValue DAGCombiner::visitFSQRT(SDNode *N) { 7537 if (DAG.getTarget().Options.UnsafeFPMath && 7538 !TLI.isFsqrtCheap()) { 7539 // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) 7540 if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { 7541 EVT VT = RV.getValueType(); 7542 RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV); 7543 AddToWorklist(RV.getNode()); 7544 7545 // Unfortunately, RV is now NaN if the input was exactly 0. 7546 // Select out this case and force the answer to 0. 7547 SDValue Zero = DAG.getConstantFP(0.0, VT); 7548 SDValue ZeroCmp = 7549 DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT), 7550 N->getOperand(0), Zero, ISD::SETEQ); 7551 AddToWorklist(ZeroCmp.getNode()); 7552 AddToWorklist(RV.getNode()); 7553 7554 RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, 7555 SDLoc(N), VT, ZeroCmp, Zero, RV); 7556 return RV; 7557 } 7558 } 7559 return SDValue(); 7560} 7561 7562SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { 7563 SDValue N0 = N->getOperand(0); 7564 SDValue N1 = N->getOperand(1); 7565 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7566 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7567 EVT VT = N->getValueType(0); 7568 7569 if (N0CFP && N1CFP) // Constant fold 7570 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); 7571 7572 if (N1CFP) { 7573 const APFloat& V = N1CFP->getValueAPF(); 7574 // copysign(x, c1) -> fabs(x) iff ispos(c1) 7575 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) 7576 if (!V.isNegative()) { 7577 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) 7578 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 7579 } else { 7580 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 7581 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, 7582 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); 7583 } 7584 } 7585 7586 // copysign(fabs(x), y) -> copysign(x, y) 7587 // copysign(fneg(x), y) -> copysign(x, y) 7588 // copysign(copysign(x,z), y) -> copysign(x, y) 7589 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || 7590 N0.getOpcode() == ISD::FCOPYSIGN) 7591 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7592 N0.getOperand(0), N1); 7593 7594 // copysign(x, abs(y)) -> abs(x) 7595 if (N1.getOpcode() == ISD::FABS) 7596 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 7597 7598 // copysign(x, copysign(y,z)) -> copysign(x, z) 7599 if (N1.getOpcode() == ISD::FCOPYSIGN) 7600 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7601 N0, N1.getOperand(1)); 7602 7603 // copysign(x, fp_extend(y)) -> copysign(x, y) 7604 // copysign(x, fp_round(y)) -> copysign(x, y) 7605 if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) 7606 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7607 N0, N1.getOperand(0)); 7608 7609 return SDValue(); 7610} 7611 7612SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { 7613 SDValue N0 = N->getOperand(0); 7614 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 7615 EVT VT = N->getValueType(0); 7616 EVT OpVT = N0.getValueType(); 7617 7618 // fold (sint_to_fp c1) -> c1fp 7619 if (N0C && 7620 // ...but only if the target supports immediate floating-point values 7621 (!LegalOperations || 7622 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 7623 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 7624 7625 // If the input is a legal type, and SINT_TO_FP is not legal on this target, 7626 // but UINT_TO_FP is legal on this target, try to convert. 7627 if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && 7628 TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { 7629 // If the sign bit is known to be zero, we can change this to UINT_TO_FP. 7630 if (DAG.SignBitIsZero(N0)) 7631 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 7632 } 7633 7634 // The next optimizations are desirable only if SELECT_CC can be lowered. 7635 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { 7636 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 7637 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && 7638 !VT.isVector() && 7639 (!LegalOperations || 7640 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 7641 SDValue Ops[] = 7642 { N0.getOperand(0), N0.getOperand(1), 7643 DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), 7644 N0.getOperand(2) }; 7645 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); 7646 } 7647 7648 // fold (sint_to_fp (zext (setcc x, y, cc))) -> 7649 // (select_cc x, y, 1.0, 0.0,, cc) 7650 if (N0.getOpcode() == ISD::ZERO_EXTEND && 7651 N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && 7652 (!LegalOperations || 7653 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 7654 SDValue Ops[] = 7655 { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), 7656 DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), 7657 N0.getOperand(0).getOperand(2) }; 7658 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); 7659 } 7660 } 7661 7662 return SDValue(); 7663} 7664 7665SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { 7666 SDValue N0 = N->getOperand(0); 7667 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 7668 EVT VT = N->getValueType(0); 7669 EVT OpVT = N0.getValueType(); 7670 7671 // fold (uint_to_fp c1) -> c1fp 7672 if (N0C && 7673 // ...but only if the target supports immediate floating-point values 7674 (!LegalOperations || 7675 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 7676 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 7677 7678 // If the input is a legal type, and UINT_TO_FP is not legal on this target, 7679 // but SINT_TO_FP is legal on this target, try to convert. 7680 if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && 7681 TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { 7682 // If the sign bit is known to be zero, we can change this to SINT_TO_FP. 7683 if (DAG.SignBitIsZero(N0)) 7684 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 7685 } 7686 7687 // The next optimizations are desirable only if SELECT_CC can be lowered. 7688 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { 7689 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 7690 7691 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && 7692 (!LegalOperations || 7693 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 7694 SDValue Ops[] = 7695 { N0.getOperand(0), N0.getOperand(1), 7696 DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), 7697 N0.getOperand(2) }; 7698 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); 7699 } 7700 } 7701 7702 return SDValue(); 7703} 7704 7705SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { 7706 SDValue N0 = N->getOperand(0); 7707 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7708 EVT VT = N->getValueType(0); 7709 7710 // fold (fp_to_sint c1fp) -> c1 7711 if (N0CFP) 7712 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); 7713 7714 return SDValue(); 7715} 7716 7717SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { 7718 SDValue N0 = N->getOperand(0); 7719 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7720 EVT VT = N->getValueType(0); 7721 7722 // fold (fp_to_uint c1fp) -> c1 7723 if (N0CFP) 7724 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); 7725 7726 return SDValue(); 7727} 7728 7729SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { 7730 SDValue N0 = N->getOperand(0); 7731 SDValue N1 = N->getOperand(1); 7732 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7733 EVT VT = N->getValueType(0); 7734 7735 // fold (fp_round c1fp) -> c1fp 7736 if (N0CFP) 7737 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); 7738 7739 // fold (fp_round (fp_extend x)) -> x 7740 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) 7741 return N0.getOperand(0); 7742 7743 // fold (fp_round (fp_round x)) -> (fp_round x) 7744 if (N0.getOpcode() == ISD::FP_ROUND) { 7745 // This is a value preserving truncation if both round's are. 7746 bool IsTrunc = N->getConstantOperandVal(1) == 1 && 7747 N0.getNode()->getConstantOperandVal(1) == 1; 7748 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), 7749 DAG.getIntPtrConstant(IsTrunc)); 7750 } 7751 7752 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) 7753 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { 7754 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, 7755 N0.getOperand(0), N1); 7756 AddToWorklist(Tmp.getNode()); 7757 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7758 Tmp, N0.getOperand(1)); 7759 } 7760 7761 return SDValue(); 7762} 7763 7764SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { 7765 SDValue N0 = N->getOperand(0); 7766 EVT VT = N->getValueType(0); 7767 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 7768 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7769 7770 // fold (fp_round_inreg c1fp) -> c1fp 7771 if (N0CFP && isTypeLegal(EVT)) { 7772 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); 7773 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round); 7774 } 7775 7776 return SDValue(); 7777} 7778 7779SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { 7780 SDValue N0 = N->getOperand(0); 7781 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7782 EVT VT = N->getValueType(0); 7783 7784 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. 7785 if (N->hasOneUse() && 7786 N->use_begin()->getOpcode() == ISD::FP_ROUND) 7787 return SDValue(); 7788 7789 // fold (fp_extend c1fp) -> c1fp 7790 if (N0CFP) 7791 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); 7792 7793 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the 7794 // value of X. 7795 if (N0.getOpcode() == ISD::FP_ROUND 7796 && N0.getNode()->getConstantOperandVal(1) == 1) { 7797 SDValue In = N0.getOperand(0); 7798 if (In.getValueType() == VT) return In; 7799 if (VT.bitsLT(In.getValueType())) 7800 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, 7801 In, N0.getOperand(1)); 7802 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); 7803 } 7804 7805 // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) 7806 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 7807 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 7808 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 7809 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 7810 LN0->getChain(), 7811 LN0->getBasePtr(), N0.getValueType(), 7812 LN0->getMemOperand()); 7813 CombineTo(N, ExtLoad); 7814 CombineTo(N0.getNode(), 7815 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), 7816 N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), 7817 ExtLoad.getValue(1)); 7818 return SDValue(N, 0); // Return N so it doesn't get rechecked! 7819 } 7820 7821 return SDValue(); 7822} 7823 7824SDValue DAGCombiner::visitFCEIL(SDNode *N) { 7825 SDValue N0 = N->getOperand(0); 7826 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7827 EVT VT = N->getValueType(0); 7828 7829 // fold (fceil c1) -> fceil(c1) 7830 if (N0CFP) 7831 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); 7832 7833 return SDValue(); 7834} 7835 7836SDValue DAGCombiner::visitFTRUNC(SDNode *N) { 7837 SDValue N0 = N->getOperand(0); 7838 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7839 EVT VT = N->getValueType(0); 7840 7841 // fold (ftrunc c1) -> ftrunc(c1) 7842 if (N0CFP) 7843 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); 7844 7845 return SDValue(); 7846} 7847 7848SDValue DAGCombiner::visitFFLOOR(SDNode *N) { 7849 SDValue N0 = N->getOperand(0); 7850 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7851 EVT VT = N->getValueType(0); 7852 7853 // fold (ffloor c1) -> ffloor(c1) 7854 if (N0CFP) 7855 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); 7856 7857 return SDValue(); 7858} 7859 7860// FIXME: FNEG and FABS have a lot in common; refactor. 7861SDValue DAGCombiner::visitFNEG(SDNode *N) { 7862 SDValue N0 = N->getOperand(0); 7863 EVT VT = N->getValueType(0); 7864 7865 if (VT.isVector()) { 7866 SDValue FoldedVOp = SimplifyVUnaryOp(N); 7867 if (FoldedVOp.getNode()) return FoldedVOp; 7868 } 7869 7870 // Constant fold FNEG. 7871 if (isa<ConstantFPSDNode>(N0)) 7872 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0)); 7873 7874 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), 7875 &DAG.getTarget().Options)) 7876 return GetNegatedExpression(N0, DAG, LegalOperations); 7877 7878 // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading 7879 // constant pool values. 7880 if (!TLI.isFNegFree(VT) && 7881 N0.getOpcode() == ISD::BITCAST && 7882 N0.getNode()->hasOneUse()) { 7883 SDValue Int = N0.getOperand(0); 7884 EVT IntVT = Int.getValueType(); 7885 if (IntVT.isInteger() && !IntVT.isVector()) { 7886 APInt SignMask; 7887 if (N0.getValueType().isVector()) { 7888 // For a vector, get a mask such as 0x80... per scalar element 7889 // and splat it. 7890 SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); 7891 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 7892 } else { 7893 // For a scalar, just generate 0x80... 7894 SignMask = APInt::getSignBit(IntVT.getSizeInBits()); 7895 } 7896 Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, 7897 DAG.getConstant(SignMask, IntVT)); 7898 AddToWorklist(Int.getNode()); 7899 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); 7900 } 7901 } 7902 7903 // (fneg (fmul c, x)) -> (fmul -c, x) 7904 if (N0.getOpcode() == ISD::FMUL) { 7905 ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 7906 if (CFP1) { 7907 APFloat CVal = CFP1->getValueAPF(); 7908 CVal.changeSign(); 7909 if (Level >= AfterLegalizeDAG && 7910 (TLI.isFPImmLegal(CVal, N->getValueType(0)) || 7911 TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0)))) 7912 return DAG.getNode( 7913 ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), 7914 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); 7915 } 7916 } 7917 7918 return SDValue(); 7919} 7920 7921SDValue DAGCombiner::visitFMINNUM(SDNode *N) { 7922 SDValue N0 = N->getOperand(0); 7923 SDValue N1 = N->getOperand(1); 7924 const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7925 const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7926 7927 if (N0CFP && N1CFP) { 7928 const APFloat &C0 = N0CFP->getValueAPF(); 7929 const APFloat &C1 = N1CFP->getValueAPF(); 7930 return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0)); 7931 } 7932 7933 if (N0CFP) { 7934 EVT VT = N->getValueType(0); 7935 // Canonicalize to constant on RHS. 7936 return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); 7937 } 7938 7939 return SDValue(); 7940} 7941 7942SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { 7943 SDValue N0 = N->getOperand(0); 7944 SDValue N1 = N->getOperand(1); 7945 const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7946 const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7947 7948 if (N0CFP && N1CFP) { 7949 const APFloat &C0 = N0CFP->getValueAPF(); 7950 const APFloat &C1 = N1CFP->getValueAPF(); 7951 return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0)); 7952 } 7953 7954 if (N0CFP) { 7955 EVT VT = N->getValueType(0); 7956 // Canonicalize to constant on RHS. 7957 return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); 7958 } 7959 7960 return SDValue(); 7961} 7962 7963SDValue DAGCombiner::visitFABS(SDNode *N) { 7964 SDValue N0 = N->getOperand(0); 7965 EVT VT = N->getValueType(0); 7966 7967 if (VT.isVector()) { 7968 SDValue FoldedVOp = SimplifyVUnaryOp(N); 7969 if (FoldedVOp.getNode()) return FoldedVOp; 7970 } 7971 7972 // fold (fabs c1) -> fabs(c1) 7973 if (isa<ConstantFPSDNode>(N0)) 7974 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 7975 7976 // fold (fabs (fabs x)) -> (fabs x) 7977 if (N0.getOpcode() == ISD::FABS) 7978 return N->getOperand(0); 7979 7980 // fold (fabs (fneg x)) -> (fabs x) 7981 // fold (fabs (fcopysign x, y)) -> (fabs x) 7982 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) 7983 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); 7984 7985 // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading 7986 // constant pool values. 7987 if (!TLI.isFAbsFree(VT) && 7988 N0.getOpcode() == ISD::BITCAST && 7989 N0.getNode()->hasOneUse()) { 7990 SDValue Int = N0.getOperand(0); 7991 EVT IntVT = Int.getValueType(); 7992 if (IntVT.isInteger() && !IntVT.isVector()) { 7993 APInt SignMask; 7994 if (N0.getValueType().isVector()) { 7995 // For a vector, get a mask such as 0x7f... per scalar element 7996 // and splat it. 7997 SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); 7998 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 7999 } else { 8000 // For a scalar, just generate 0x7f... 8001 SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); 8002 } 8003 Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, 8004 DAG.getConstant(SignMask, IntVT)); 8005 AddToWorklist(Int.getNode()); 8006 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); 8007 } 8008 } 8009 8010 return SDValue(); 8011} 8012 8013SDValue DAGCombiner::visitBRCOND(SDNode *N) { 8014 SDValue Chain = N->getOperand(0); 8015 SDValue N1 = N->getOperand(1); 8016 SDValue N2 = N->getOperand(2); 8017 8018 // If N is a constant we could fold this into a fallthrough or unconditional 8019 // branch. However that doesn't happen very often in normal code, because 8020 // Instcombine/SimplifyCFG should have handled the available opportunities. 8021 // If we did this folding here, it would be necessary to update the 8022 // MachineBasicBlock CFG, which is awkward. 8023 8024 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal 8025 // on the target. 8026 if (N1.getOpcode() == ISD::SETCC && 8027 TLI.isOperationLegalOrCustom(ISD::BR_CC, 8028 N1.getOperand(0).getValueType())) { 8029 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 8030 Chain, N1.getOperand(2), 8031 N1.getOperand(0), N1.getOperand(1), N2); 8032 } 8033 8034 if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || 8035 ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && 8036 (N1.getOperand(0).hasOneUse() && 8037 N1.getOperand(0).getOpcode() == ISD::SRL))) { 8038 SDNode *Trunc = nullptr; 8039 if (N1.getOpcode() == ISD::TRUNCATE) { 8040 // Look pass the truncate. 8041 Trunc = N1.getNode(); 8042 N1 = N1.getOperand(0); 8043 } 8044 8045 // Match this pattern so that we can generate simpler code: 8046 // 8047 // %a = ... 8048 // %b = and i32 %a, 2 8049 // %c = srl i32 %b, 1 8050 // brcond i32 %c ... 8051 // 8052 // into 8053 // 8054 // %a = ... 8055 // %b = and i32 %a, 2 8056 // %c = setcc eq %b, 0 8057 // brcond %c ... 8058 // 8059 // This applies only when the AND constant value has one bit set and the 8060 // SRL constant is equal to the log2 of the AND constant. The back-end is 8061 // smart enough to convert the result into a TEST/JMP sequence. 8062 SDValue Op0 = N1.getOperand(0); 8063 SDValue Op1 = N1.getOperand(1); 8064 8065 if (Op0.getOpcode() == ISD::AND && 8066 Op1.getOpcode() == ISD::Constant) { 8067 SDValue AndOp1 = Op0.getOperand(1); 8068 8069 if (AndOp1.getOpcode() == ISD::Constant) { 8070 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); 8071 8072 if (AndConst.isPowerOf2() && 8073 cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { 8074 SDValue SetCC = 8075 DAG.getSetCC(SDLoc(N), 8076 getSetCCResultType(Op0.getValueType()), 8077 Op0, DAG.getConstant(0, Op0.getValueType()), 8078 ISD::SETNE); 8079 8080 SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N), 8081 MVT::Other, Chain, SetCC, N2); 8082 // Don't add the new BRCond into the worklist or else SimplifySelectCC 8083 // will convert it back to (X & C1) >> C2. 8084 CombineTo(N, NewBRCond, false); 8085 // Truncate is dead. 8086 if (Trunc) 8087 deleteAndRecombine(Trunc); 8088 // Replace the uses of SRL with SETCC 8089 WorklistRemover DeadNodes(*this); 8090 DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 8091 deleteAndRecombine(N1.getNode()); 8092 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8093 } 8094 } 8095 } 8096 8097 if (Trunc) 8098 // Restore N1 if the above transformation doesn't match. 8099 N1 = N->getOperand(1); 8100 } 8101 8102 // Transform br(xor(x, y)) -> br(x != y) 8103 // Transform br(xor(xor(x,y), 1)) -> br (x == y) 8104 if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { 8105 SDNode *TheXor = N1.getNode(); 8106 SDValue Op0 = TheXor->getOperand(0); 8107 SDValue Op1 = TheXor->getOperand(1); 8108 if (Op0.getOpcode() == Op1.getOpcode()) { 8109 // Avoid missing important xor optimizations. 8110 SDValue Tmp = visitXOR(TheXor); 8111 if (Tmp.getNode()) { 8112 if (Tmp.getNode() != TheXor) { 8113 DEBUG(dbgs() << "\nReplacing.8 "; 8114 TheXor->dump(&DAG); 8115 dbgs() << "\nWith: "; 8116 Tmp.getNode()->dump(&DAG); 8117 dbgs() << '\n'); 8118 WorklistRemover DeadNodes(*this); 8119 DAG.ReplaceAllUsesOfValueWith(N1, Tmp); 8120 deleteAndRecombine(TheXor); 8121 return DAG.getNode(ISD::BRCOND, SDLoc(N), 8122 MVT::Other, Chain, Tmp, N2); 8123 } 8124 8125 // visitXOR has changed XOR's operands or replaced the XOR completely, 8126 // bail out. 8127 return SDValue(N, 0); 8128 } 8129 } 8130 8131 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { 8132 bool Equal = false; 8133 if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0)) 8134 if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && 8135 Op0.getOpcode() == ISD::XOR) { 8136 TheXor = Op0.getNode(); 8137 Equal = true; 8138 } 8139 8140 EVT SetCCVT = N1.getValueType(); 8141 if (LegalTypes) 8142 SetCCVT = getSetCCResultType(SetCCVT); 8143 SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), 8144 SetCCVT, 8145 Op0, Op1, 8146 Equal ? ISD::SETEQ : ISD::SETNE); 8147 // Replace the uses of XOR with SETCC 8148 WorklistRemover DeadNodes(*this); 8149 DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 8150 deleteAndRecombine(N1.getNode()); 8151 return DAG.getNode(ISD::BRCOND, SDLoc(N), 8152 MVT::Other, Chain, SetCC, N2); 8153 } 8154 } 8155 8156 return SDValue(); 8157} 8158 8159// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. 8160// 8161SDValue DAGCombiner::visitBR_CC(SDNode *N) { 8162 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); 8163 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); 8164 8165 // If N is a constant we could fold this into a fallthrough or unconditional 8166 // branch. However that doesn't happen very often in normal code, because 8167 // Instcombine/SimplifyCFG should have handled the available opportunities. 8168 // If we did this folding here, it would be necessary to update the 8169 // MachineBasicBlock CFG, which is awkward. 8170 8171 // Use SimplifySetCC to simplify SETCC's. 8172 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), 8173 CondLHS, CondRHS, CC->get(), SDLoc(N), 8174 false); 8175 if (Simp.getNode()) AddToWorklist(Simp.getNode()); 8176 8177 // fold to a simpler setcc 8178 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) 8179 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 8180 N->getOperand(0), Simp.getOperand(2), 8181 Simp.getOperand(0), Simp.getOperand(1), 8182 N->getOperand(4)); 8183 8184 return SDValue(); 8185} 8186 8187/// Return true if 'Use' is a load or a store that uses N as its base pointer 8188/// and that N may be folded in the load / store addressing mode. 8189static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, 8190 SelectionDAG &DAG, 8191 const TargetLowering &TLI) { 8192 EVT VT; 8193 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { 8194 if (LD->isIndexed() || LD->getBasePtr().getNode() != N) 8195 return false; 8196 VT = Use->getValueType(0); 8197 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { 8198 if (ST->isIndexed() || ST->getBasePtr().getNode() != N) 8199 return false; 8200 VT = ST->getValue().getValueType(); 8201 } else 8202 return false; 8203 8204 TargetLowering::AddrMode AM; 8205 if (N->getOpcode() == ISD::ADD) { 8206 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8207 if (Offset) 8208 // [reg +/- imm] 8209 AM.BaseOffs = Offset->getSExtValue(); 8210 else 8211 // [reg +/- reg] 8212 AM.Scale = 1; 8213 } else if (N->getOpcode() == ISD::SUB) { 8214 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8215 if (Offset) 8216 // [reg +/- imm] 8217 AM.BaseOffs = -Offset->getSExtValue(); 8218 else 8219 // [reg +/- reg] 8220 AM.Scale = 1; 8221 } else 8222 return false; 8223 8224 return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); 8225} 8226 8227/// Try turning a load/store into a pre-indexed load/store when the base 8228/// pointer is an add or subtract and it has other uses besides the load/store. 8229/// After the transformation, the new indexed load/store has effectively folded 8230/// the add/subtract in and all of its other uses are redirected to the 8231/// new load/store. 8232bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { 8233 if (Level < AfterLegalizeDAG) 8234 return false; 8235 8236 bool isLoad = true; 8237 SDValue Ptr; 8238 EVT VT; 8239 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 8240 if (LD->isIndexed()) 8241 return false; 8242 VT = LD->getMemoryVT(); 8243 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && 8244 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) 8245 return false; 8246 Ptr = LD->getBasePtr(); 8247 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 8248 if (ST->isIndexed()) 8249 return false; 8250 VT = ST->getMemoryVT(); 8251 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && 8252 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) 8253 return false; 8254 Ptr = ST->getBasePtr(); 8255 isLoad = false; 8256 } else { 8257 return false; 8258 } 8259 8260 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail 8261 // out. There is no reason to make this a preinc/predec. 8262 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || 8263 Ptr.getNode()->hasOneUse()) 8264 return false; 8265 8266 // Ask the target to do addressing mode selection. 8267 SDValue BasePtr; 8268 SDValue Offset; 8269 ISD::MemIndexedMode AM = ISD::UNINDEXED; 8270 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) 8271 return false; 8272 8273 // Backends without true r+i pre-indexed forms may need to pass a 8274 // constant base with a variable offset so that constant coercion 8275 // will work with the patterns in canonical form. 8276 bool Swapped = false; 8277 if (isa<ConstantSDNode>(BasePtr)) { 8278 std::swap(BasePtr, Offset); 8279 Swapped = true; 8280 } 8281 8282 // Don't create a indexed load / store with zero offset. 8283 if (isa<ConstantSDNode>(Offset) && 8284 cast<ConstantSDNode>(Offset)->isNullValue()) 8285 return false; 8286 8287 // Try turning it into a pre-indexed load / store except when: 8288 // 1) The new base ptr is a frame index. 8289 // 2) If N is a store and the new base ptr is either the same as or is a 8290 // predecessor of the value being stored. 8291 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded 8292 // that would create a cycle. 8293 // 4) All uses are load / store ops that use it as old base ptr. 8294 8295 // Check #1. Preinc'ing a frame index would require copying the stack pointer 8296 // (plus the implicit offset) to a register to preinc anyway. 8297 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 8298 return false; 8299 8300 // Check #2. 8301 if (!isLoad) { 8302 SDValue Val = cast<StoreSDNode>(N)->getValue(); 8303 if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) 8304 return false; 8305 } 8306 8307 // If the offset is a constant, there may be other adds of constants that 8308 // can be folded with this one. We should do this to avoid having to keep 8309 // a copy of the original base pointer. 8310 SmallVector<SDNode *, 16> OtherUses; 8311 if (isa<ConstantSDNode>(Offset)) 8312 for (SDNode *Use : BasePtr.getNode()->uses()) { 8313 if (Use == Ptr.getNode()) 8314 continue; 8315 8316 if (Use->isPredecessorOf(N)) 8317 continue; 8318 8319 if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) { 8320 OtherUses.clear(); 8321 break; 8322 } 8323 8324 SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1); 8325 if (Op1.getNode() == BasePtr.getNode()) 8326 std::swap(Op0, Op1); 8327 assert(Op0.getNode() == BasePtr.getNode() && 8328 "Use of ADD/SUB but not an operand"); 8329 8330 if (!isa<ConstantSDNode>(Op1)) { 8331 OtherUses.clear(); 8332 break; 8333 } 8334 8335 // FIXME: In some cases, we can be smarter about this. 8336 if (Op1.getValueType() != Offset.getValueType()) { 8337 OtherUses.clear(); 8338 break; 8339 } 8340 8341 OtherUses.push_back(Use); 8342 } 8343 8344 if (Swapped) 8345 std::swap(BasePtr, Offset); 8346 8347 // Now check for #3 and #4. 8348 bool RealUse = false; 8349 8350 // Caches for hasPredecessorHelper 8351 SmallPtrSet<const SDNode *, 32> Visited; 8352 SmallVector<const SDNode *, 16> Worklist; 8353 8354 for (SDNode *Use : Ptr.getNode()->uses()) { 8355 if (Use == N) 8356 continue; 8357 if (N->hasPredecessorHelper(Use, Visited, Worklist)) 8358 return false; 8359 8360 // If Ptr may be folded in addressing mode of other use, then it's 8361 // not profitable to do this transformation. 8362 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) 8363 RealUse = true; 8364 } 8365 8366 if (!RealUse) 8367 return false; 8368 8369 SDValue Result; 8370 if (isLoad) 8371 Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 8372 BasePtr, Offset, AM); 8373 else 8374 Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 8375 BasePtr, Offset, AM); 8376 ++PreIndexedNodes; 8377 ++NodesCombined; 8378 DEBUG(dbgs() << "\nReplacing.4 "; 8379 N->dump(&DAG); 8380 dbgs() << "\nWith: "; 8381 Result.getNode()->dump(&DAG); 8382 dbgs() << '\n'); 8383 WorklistRemover DeadNodes(*this); 8384 if (isLoad) { 8385 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 8386 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 8387 } else { 8388 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 8389 } 8390 8391 // Finally, since the node is now dead, remove it from the graph. 8392 deleteAndRecombine(N); 8393 8394 if (Swapped) 8395 std::swap(BasePtr, Offset); 8396 8397 // Replace other uses of BasePtr that can be updated to use Ptr 8398 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { 8399 unsigned OffsetIdx = 1; 8400 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) 8401 OffsetIdx = 0; 8402 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == 8403 BasePtr.getNode() && "Expected BasePtr operand"); 8404 8405 // We need to replace ptr0 in the following expression: 8406 // x0 * offset0 + y0 * ptr0 = t0 8407 // knowing that 8408 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) 8409 // 8410 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the 8411 // indexed load/store and the expresion that needs to be re-written. 8412 // 8413 // Therefore, we have: 8414 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 8415 8416 ConstantSDNode *CN = 8417 cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); 8418 int X0, X1, Y0, Y1; 8419 APInt Offset0 = CN->getAPIntValue(); 8420 APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); 8421 8422 X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; 8423 Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; 8424 X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; 8425 Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; 8426 8427 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; 8428 8429 APInt CNV = Offset0; 8430 if (X0 < 0) CNV = -CNV; 8431 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; 8432 else CNV = CNV - Offset1; 8433 8434 // We can now generate the new expression. 8435 SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0)); 8436 SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); 8437 8438 SDValue NewUse = DAG.getNode(Opcode, 8439 SDLoc(OtherUses[i]), 8440 OtherUses[i]->getValueType(0), NewOp1, NewOp2); 8441 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); 8442 deleteAndRecombine(OtherUses[i]); 8443 } 8444 8445 // Replace the uses of Ptr with uses of the updated base value. 8446 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); 8447 deleteAndRecombine(Ptr.getNode()); 8448 8449 return true; 8450} 8451 8452/// Try to combine a load/store with a add/sub of the base pointer node into a 8453/// post-indexed load/store. The transformation folded the add/subtract into the 8454/// new indexed load/store effectively and all of its uses are redirected to the 8455/// new load/store. 8456bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { 8457 if (Level < AfterLegalizeDAG) 8458 return false; 8459 8460 bool isLoad = true; 8461 SDValue Ptr; 8462 EVT VT; 8463 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 8464 if (LD->isIndexed()) 8465 return false; 8466 VT = LD->getMemoryVT(); 8467 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && 8468 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) 8469 return false; 8470 Ptr = LD->getBasePtr(); 8471 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 8472 if (ST->isIndexed()) 8473 return false; 8474 VT = ST->getMemoryVT(); 8475 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && 8476 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) 8477 return false; 8478 Ptr = ST->getBasePtr(); 8479 isLoad = false; 8480 } else { 8481 return false; 8482 } 8483 8484 if (Ptr.getNode()->hasOneUse()) 8485 return false; 8486 8487 for (SDNode *Op : Ptr.getNode()->uses()) { 8488 if (Op == N || 8489 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) 8490 continue; 8491 8492 SDValue BasePtr; 8493 SDValue Offset; 8494 ISD::MemIndexedMode AM = ISD::UNINDEXED; 8495 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { 8496 // Don't create a indexed load / store with zero offset. 8497 if (isa<ConstantSDNode>(Offset) && 8498 cast<ConstantSDNode>(Offset)->isNullValue()) 8499 continue; 8500 8501 // Try turning it into a post-indexed load / store except when 8502 // 1) All uses are load / store ops that use it as base ptr (and 8503 // it may be folded as addressing mmode). 8504 // 2) Op must be independent of N, i.e. Op is neither a predecessor 8505 // nor a successor of N. Otherwise, if Op is folded that would 8506 // create a cycle. 8507 8508 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 8509 continue; 8510 8511 // Check for #1. 8512 bool TryNext = false; 8513 for (SDNode *Use : BasePtr.getNode()->uses()) { 8514 if (Use == Ptr.getNode()) 8515 continue; 8516 8517 // If all the uses are load / store addresses, then don't do the 8518 // transformation. 8519 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ 8520 bool RealUse = false; 8521 for (SDNode *UseUse : Use->uses()) { 8522 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 8523 RealUse = true; 8524 } 8525 8526 if (!RealUse) { 8527 TryNext = true; 8528 break; 8529 } 8530 } 8531 } 8532 8533 if (TryNext) 8534 continue; 8535 8536 // Check for #2 8537 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { 8538 SDValue Result = isLoad 8539 ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 8540 BasePtr, Offset, AM) 8541 : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 8542 BasePtr, Offset, AM); 8543 ++PostIndexedNodes; 8544 ++NodesCombined; 8545 DEBUG(dbgs() << "\nReplacing.5 "; 8546 N->dump(&DAG); 8547 dbgs() << "\nWith: "; 8548 Result.getNode()->dump(&DAG); 8549 dbgs() << '\n'); 8550 WorklistRemover DeadNodes(*this); 8551 if (isLoad) { 8552 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 8553 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 8554 } else { 8555 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 8556 } 8557 8558 // Finally, since the node is now dead, remove it from the graph. 8559 deleteAndRecombine(N); 8560 8561 // Replace the uses of Use with uses of the updated base value. 8562 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), 8563 Result.getValue(isLoad ? 1 : 0)); 8564 deleteAndRecombine(Op); 8565 return true; 8566 } 8567 } 8568 } 8569 8570 return false; 8571} 8572 8573/// \brief Return the base-pointer arithmetic from an indexed \p LD. 8574SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { 8575 ISD::MemIndexedMode AM = LD->getAddressingMode(); 8576 assert(AM != ISD::UNINDEXED); 8577 SDValue BP = LD->getOperand(1); 8578 SDValue Inc = LD->getOperand(2); 8579 8580 // Some backends use TargetConstants for load offsets, but don't expect 8581 // TargetConstants in general ADD nodes. We can convert these constants into 8582 // regular Constants (if the constant is not opaque). 8583 assert((Inc.getOpcode() != ISD::TargetConstant || 8584 !cast<ConstantSDNode>(Inc)->isOpaque()) && 8585 "Cannot split out indexing using opaque target constants"); 8586 if (Inc.getOpcode() == ISD::TargetConstant) { 8587 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc); 8588 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), 8589 ConstInc->getValueType(0)); 8590 } 8591 8592 unsigned Opc = 8593 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); 8594 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); 8595} 8596 8597SDValue DAGCombiner::visitLOAD(SDNode *N) { 8598 LoadSDNode *LD = cast<LoadSDNode>(N); 8599 SDValue Chain = LD->getChain(); 8600 SDValue Ptr = LD->getBasePtr(); 8601 8602 // If load is not volatile and there are no uses of the loaded value (and 8603 // the updated indexed value in case of indexed loads), change uses of the 8604 // chain value into uses of the chain input (i.e. delete the dead load). 8605 if (!LD->isVolatile()) { 8606 if (N->getValueType(1) == MVT::Other) { 8607 // Unindexed loads. 8608 if (!N->hasAnyUseOfValue(0)) { 8609 // It's not safe to use the two value CombineTo variant here. e.g. 8610 // v1, chain2 = load chain1, loc 8611 // v2, chain3 = load chain2, loc 8612 // v3 = add v2, c 8613 // Now we replace use of chain2 with chain1. This makes the second load 8614 // isomorphic to the one we are deleting, and thus makes this load live. 8615 DEBUG(dbgs() << "\nReplacing.6 "; 8616 N->dump(&DAG); 8617 dbgs() << "\nWith chain: "; 8618 Chain.getNode()->dump(&DAG); 8619 dbgs() << "\n"); 8620 WorklistRemover DeadNodes(*this); 8621 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 8622 8623 if (N->use_empty()) 8624 deleteAndRecombine(N); 8625 8626 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8627 } 8628 } else { 8629 // Indexed loads. 8630 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); 8631 8632 // If this load has an opaque TargetConstant offset, then we cannot split 8633 // the indexing into an add/sub directly (that TargetConstant may not be 8634 // valid for a different type of node, and we cannot convert an opaque 8635 // target constant into a regular constant). 8636 bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && 8637 cast<ConstantSDNode>(LD->getOperand(2))->isOpaque(); 8638 8639 if (!N->hasAnyUseOfValue(0) && 8640 ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) { 8641 SDValue Undef = DAG.getUNDEF(N->getValueType(0)); 8642 SDValue Index; 8643 if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { 8644 Index = SplitIndexingFromLoad(LD); 8645 // Try to fold the base pointer arithmetic into subsequent loads and 8646 // stores. 8647 AddUsersToWorklist(N); 8648 } else 8649 Index = DAG.getUNDEF(N->getValueType(1)); 8650 DEBUG(dbgs() << "\nReplacing.7 "; 8651 N->dump(&DAG); 8652 dbgs() << "\nWith: "; 8653 Undef.getNode()->dump(&DAG); 8654 dbgs() << " and 2 other values\n"); 8655 WorklistRemover DeadNodes(*this); 8656 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); 8657 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); 8658 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); 8659 deleteAndRecombine(N); 8660 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8661 } 8662 } 8663 } 8664 8665 // If this load is directly stored, replace the load value with the stored 8666 // value. 8667 // TODO: Handle store large -> read small portion. 8668 // TODO: Handle TRUNCSTORE/LOADEXT 8669 if (ISD::isNormalLoad(N) && !LD->isVolatile()) { 8670 if (ISD::isNON_TRUNCStore(Chain.getNode())) { 8671 StoreSDNode *PrevST = cast<StoreSDNode>(Chain); 8672 if (PrevST->getBasePtr() == Ptr && 8673 PrevST->getValue().getValueType() == N->getValueType(0)) 8674 return CombineTo(N, Chain.getOperand(1), Chain); 8675 } 8676 } 8677 8678 // Try to infer better alignment information than the load already has. 8679 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { 8680 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 8681 if (Align > LD->getMemOperand()->getBaseAlignment()) { 8682 SDValue NewLoad = 8683 DAG.getExtLoad(LD->getExtensionType(), SDLoc(N), 8684 LD->getValueType(0), 8685 Chain, Ptr, LD->getPointerInfo(), 8686 LD->getMemoryVT(), 8687 LD->isVolatile(), LD->isNonTemporal(), 8688 LD->isInvariant(), Align, LD->getAAInfo()); 8689 return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); 8690 } 8691 } 8692 } 8693 8694 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 8695 : DAG.getSubtarget().useAA(); 8696#ifndef NDEBUG 8697 if (CombinerAAOnlyFunc.getNumOccurrences() && 8698 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 8699 UseAA = false; 8700#endif 8701 if (UseAA && LD->isUnindexed()) { 8702 // Walk up chain skipping non-aliasing memory nodes. 8703 SDValue BetterChain = FindBetterChain(N, Chain); 8704 8705 // If there is a better chain. 8706 if (Chain != BetterChain) { 8707 SDValue ReplLoad; 8708 8709 // Replace the chain to void dependency. 8710 if (LD->getExtensionType() == ISD::NON_EXTLOAD) { 8711 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), 8712 BetterChain, Ptr, LD->getMemOperand()); 8713 } else { 8714 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), 8715 LD->getValueType(0), 8716 BetterChain, Ptr, LD->getMemoryVT(), 8717 LD->getMemOperand()); 8718 } 8719 8720 // Create token factor to keep old chain connected. 8721 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), 8722 MVT::Other, Chain, ReplLoad.getValue(1)); 8723 8724 // Make sure the new and old chains are cleaned up. 8725 AddToWorklist(Token.getNode()); 8726 8727 // Replace uses with load result and token factor. Don't add users 8728 // to work list. 8729 return CombineTo(N, ReplLoad.getValue(0), Token, false); 8730 } 8731 } 8732 8733 // Try transforming N to an indexed load. 8734 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 8735 return SDValue(N, 0); 8736 8737 // Try to slice up N to more direct loads if the slices are mapped to 8738 // different register banks or pairing can take place. 8739 if (SliceUpLoad(N)) 8740 return SDValue(N, 0); 8741 8742 return SDValue(); 8743} 8744 8745namespace { 8746/// \brief Helper structure used to slice a load in smaller loads. 8747/// Basically a slice is obtained from the following sequence: 8748/// Origin = load Ty1, Base 8749/// Shift = srl Ty1 Origin, CstTy Amount 8750/// Inst = trunc Shift to Ty2 8751/// 8752/// Then, it will be rewriten into: 8753/// Slice = load SliceTy, Base + SliceOffset 8754/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 8755/// 8756/// SliceTy is deduced from the number of bits that are actually used to 8757/// build Inst. 8758struct LoadedSlice { 8759 /// \brief Helper structure used to compute the cost of a slice. 8760 struct Cost { 8761 /// Are we optimizing for code size. 8762 bool ForCodeSize; 8763 /// Various cost. 8764 unsigned Loads; 8765 unsigned Truncates; 8766 unsigned CrossRegisterBanksCopies; 8767 unsigned ZExts; 8768 unsigned Shift; 8769 8770 Cost(bool ForCodeSize = false) 8771 : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), 8772 CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} 8773 8774 /// \brief Get the cost of one isolated slice. 8775 Cost(const LoadedSlice &LS, bool ForCodeSize = false) 8776 : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), 8777 CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { 8778 EVT TruncType = LS.Inst->getValueType(0); 8779 EVT LoadedType = LS.getLoadedType(); 8780 if (TruncType != LoadedType && 8781 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) 8782 ZExts = 1; 8783 } 8784 8785 /// \brief Account for slicing gain in the current cost. 8786 /// Slicing provide a few gains like removing a shift or a 8787 /// truncate. This method allows to grow the cost of the original 8788 /// load with the gain from this slice. 8789 void addSliceGain(const LoadedSlice &LS) { 8790 // Each slice saves a truncate. 8791 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); 8792 if (!TLI.isTruncateFree(LS.Inst->getValueType(0), 8793 LS.Inst->getOperand(0).getValueType())) 8794 ++Truncates; 8795 // If there is a shift amount, this slice gets rid of it. 8796 if (LS.Shift) 8797 ++Shift; 8798 // If this slice can merge a cross register bank copy, account for it. 8799 if (LS.canMergeExpensiveCrossRegisterBankCopy()) 8800 ++CrossRegisterBanksCopies; 8801 } 8802 8803 Cost &operator+=(const Cost &RHS) { 8804 Loads += RHS.Loads; 8805 Truncates += RHS.Truncates; 8806 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; 8807 ZExts += RHS.ZExts; 8808 Shift += RHS.Shift; 8809 return *this; 8810 } 8811 8812 bool operator==(const Cost &RHS) const { 8813 return Loads == RHS.Loads && Truncates == RHS.Truncates && 8814 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && 8815 ZExts == RHS.ZExts && Shift == RHS.Shift; 8816 } 8817 8818 bool operator!=(const Cost &RHS) const { return !(*this == RHS); } 8819 8820 bool operator<(const Cost &RHS) const { 8821 // Assume cross register banks copies are as expensive as loads. 8822 // FIXME: Do we want some more target hooks? 8823 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; 8824 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; 8825 // Unless we are optimizing for code size, consider the 8826 // expensive operation first. 8827 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) 8828 return ExpensiveOpsLHS < ExpensiveOpsRHS; 8829 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < 8830 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); 8831 } 8832 8833 bool operator>(const Cost &RHS) const { return RHS < *this; } 8834 8835 bool operator<=(const Cost &RHS) const { return !(RHS < *this); } 8836 8837 bool operator>=(const Cost &RHS) const { return !(*this < RHS); } 8838 }; 8839 // The last instruction that represent the slice. This should be a 8840 // truncate instruction. 8841 SDNode *Inst; 8842 // The original load instruction. 8843 LoadSDNode *Origin; 8844 // The right shift amount in bits from the original load. 8845 unsigned Shift; 8846 // The DAG from which Origin came from. 8847 // This is used to get some contextual information about legal types, etc. 8848 SelectionDAG *DAG; 8849 8850 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr, 8851 unsigned Shift = 0, SelectionDAG *DAG = nullptr) 8852 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} 8853 8854 LoadedSlice(const LoadedSlice &LS) 8855 : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {} 8856 8857 /// \brief Get the bits used in a chunk of bits \p BitWidth large. 8858 /// \return Result is \p BitWidth and has used bits set to 1 and 8859 /// not used bits set to 0. 8860 APInt getUsedBits() const { 8861 // Reproduce the trunc(lshr) sequence: 8862 // - Start from the truncated value. 8863 // - Zero extend to the desired bit width. 8864 // - Shift left. 8865 assert(Origin && "No original load to compare against."); 8866 unsigned BitWidth = Origin->getValueSizeInBits(0); 8867 assert(Inst && "This slice is not bound to an instruction"); 8868 assert(Inst->getValueSizeInBits(0) <= BitWidth && 8869 "Extracted slice is bigger than the whole type!"); 8870 APInt UsedBits(Inst->getValueSizeInBits(0), 0); 8871 UsedBits.setAllBits(); 8872 UsedBits = UsedBits.zext(BitWidth); 8873 UsedBits <<= Shift; 8874 return UsedBits; 8875 } 8876 8877 /// \brief Get the size of the slice to be loaded in bytes. 8878 unsigned getLoadedSize() const { 8879 unsigned SliceSize = getUsedBits().countPopulation(); 8880 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); 8881 return SliceSize / 8; 8882 } 8883 8884 /// \brief Get the type that will be loaded for this slice. 8885 /// Note: This may not be the final type for the slice. 8886 EVT getLoadedType() const { 8887 assert(DAG && "Missing context"); 8888 LLVMContext &Ctxt = *DAG->getContext(); 8889 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); 8890 } 8891 8892 /// \brief Get the alignment of the load used for this slice. 8893 unsigned getAlignment() const { 8894 unsigned Alignment = Origin->getAlignment(); 8895 unsigned Offset = getOffsetFromBase(); 8896 if (Offset != 0) 8897 Alignment = MinAlign(Alignment, Alignment + Offset); 8898 return Alignment; 8899 } 8900 8901 /// \brief Check if this slice can be rewritten with legal operations. 8902 bool isLegal() const { 8903 // An invalid slice is not legal. 8904 if (!Origin || !Inst || !DAG) 8905 return false; 8906 8907 // Offsets are for indexed load only, we do not handle that. 8908 if (Origin->getOffset().getOpcode() != ISD::UNDEF) 8909 return false; 8910 8911 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 8912 8913 // Check that the type is legal. 8914 EVT SliceType = getLoadedType(); 8915 if (!TLI.isTypeLegal(SliceType)) 8916 return false; 8917 8918 // Check that the load is legal for this type. 8919 if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) 8920 return false; 8921 8922 // Check that the offset can be computed. 8923 // 1. Check its type. 8924 EVT PtrType = Origin->getBasePtr().getValueType(); 8925 if (PtrType == MVT::Untyped || PtrType.isExtended()) 8926 return false; 8927 8928 // 2. Check that it fits in the immediate. 8929 if (!TLI.isLegalAddImmediate(getOffsetFromBase())) 8930 return false; 8931 8932 // 3. Check that the computation is legal. 8933 if (!TLI.isOperationLegal(ISD::ADD, PtrType)) 8934 return false; 8935 8936 // Check that the zext is legal if it needs one. 8937 EVT TruncateType = Inst->getValueType(0); 8938 if (TruncateType != SliceType && 8939 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) 8940 return false; 8941 8942 return true; 8943 } 8944 8945 /// \brief Get the offset in bytes of this slice in the original chunk of 8946 /// bits. 8947 /// \pre DAG != nullptr. 8948 uint64_t getOffsetFromBase() const { 8949 assert(DAG && "Missing context."); 8950 bool IsBigEndian = 8951 DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian(); 8952 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); 8953 uint64_t Offset = Shift / 8; 8954 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; 8955 assert(!(Origin->getValueSizeInBits(0) & 0x7) && 8956 "The size of the original loaded type is not a multiple of a" 8957 " byte."); 8958 // If Offset is bigger than TySizeInBytes, it means we are loading all 8959 // zeros. This should have been optimized before in the process. 8960 assert(TySizeInBytes > Offset && 8961 "Invalid shift amount for given loaded size"); 8962 if (IsBigEndian) 8963 Offset = TySizeInBytes - Offset - getLoadedSize(); 8964 return Offset; 8965 } 8966 8967 /// \brief Generate the sequence of instructions to load the slice 8968 /// represented by this object and redirect the uses of this slice to 8969 /// this new sequence of instructions. 8970 /// \pre this->Inst && this->Origin are valid Instructions and this 8971 /// object passed the legal check: LoadedSlice::isLegal returned true. 8972 /// \return The last instruction of the sequence used to load the slice. 8973 SDValue loadSlice() const { 8974 assert(Inst && Origin && "Unable to replace a non-existing slice."); 8975 const SDValue &OldBaseAddr = Origin->getBasePtr(); 8976 SDValue BaseAddr = OldBaseAddr; 8977 // Get the offset in that chunk of bytes w.r.t. the endianess. 8978 int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); 8979 assert(Offset >= 0 && "Offset too big to fit in int64_t!"); 8980 if (Offset) { 8981 // BaseAddr = BaseAddr + Offset. 8982 EVT ArithType = BaseAddr.getValueType(); 8983 BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr, 8984 DAG->getConstant(Offset, ArithType)); 8985 } 8986 8987 // Create the type of the loaded slice according to its size. 8988 EVT SliceType = getLoadedType(); 8989 8990 // Create the load for the slice. 8991 SDValue LastInst = DAG->getLoad( 8992 SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, 8993 Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), 8994 Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); 8995 // If the final type is not the same as the loaded type, this means that 8996 // we have to pad with zero. Create a zero extend for that. 8997 EVT FinalType = Inst->getValueType(0); 8998 if (SliceType != FinalType) 8999 LastInst = 9000 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); 9001 return LastInst; 9002 } 9003 9004 /// \brief Check if this slice can be merged with an expensive cross register 9005 /// bank copy. E.g., 9006 /// i = load i32 9007 /// f = bitcast i32 i to float 9008 bool canMergeExpensiveCrossRegisterBankCopy() const { 9009 if (!Inst || !Inst->hasOneUse()) 9010 return false; 9011 SDNode *Use = *Inst->use_begin(); 9012 if (Use->getOpcode() != ISD::BITCAST) 9013 return false; 9014 assert(DAG && "Missing context"); 9015 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 9016 EVT ResVT = Use->getValueType(0); 9017 const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); 9018 const TargetRegisterClass *ArgRC = 9019 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); 9020 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) 9021 return false; 9022 9023 // At this point, we know that we perform a cross-register-bank copy. 9024 // Check if it is expensive. 9025 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo(); 9026 // Assume bitcasts are cheap, unless both register classes do not 9027 // explicitly share a common sub class. 9028 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) 9029 return false; 9030 9031 // Check if it will be merged with the load. 9032 // 1. Check the alignment constraint. 9033 unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment( 9034 ResVT.getTypeForEVT(*DAG->getContext())); 9035 9036 if (RequiredAlignment > getAlignment()) 9037 return false; 9038 9039 // 2. Check that the load is a legal operation for that type. 9040 if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) 9041 return false; 9042 9043 // 3. Check that we do not have a zext in the way. 9044 if (Inst->getValueType(0) != getLoadedType()) 9045 return false; 9046 9047 return true; 9048 } 9049}; 9050} 9051 9052/// \brief Check that all bits set in \p UsedBits form a dense region, i.e., 9053/// \p UsedBits looks like 0..0 1..1 0..0. 9054static bool areUsedBitsDense(const APInt &UsedBits) { 9055 // If all the bits are one, this is dense! 9056 if (UsedBits.isAllOnesValue()) 9057 return true; 9058 9059 // Get rid of the unused bits on the right. 9060 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); 9061 // Get rid of the unused bits on the left. 9062 if (NarrowedUsedBits.countLeadingZeros()) 9063 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); 9064 // Check that the chunk of bits is completely used. 9065 return NarrowedUsedBits.isAllOnesValue(); 9066} 9067 9068/// \brief Check whether or not \p First and \p Second are next to each other 9069/// in memory. This means that there is no hole between the bits loaded 9070/// by \p First and the bits loaded by \p Second. 9071static bool areSlicesNextToEachOther(const LoadedSlice &First, 9072 const LoadedSlice &Second) { 9073 assert(First.Origin == Second.Origin && First.Origin && 9074 "Unable to match different memory origins."); 9075 APInt UsedBits = First.getUsedBits(); 9076 assert((UsedBits & Second.getUsedBits()) == 0 && 9077 "Slices are not supposed to overlap."); 9078 UsedBits |= Second.getUsedBits(); 9079 return areUsedBitsDense(UsedBits); 9080} 9081 9082/// \brief Adjust the \p GlobalLSCost according to the target 9083/// paring capabilities and the layout of the slices. 9084/// \pre \p GlobalLSCost should account for at least as many loads as 9085/// there is in the slices in \p LoadedSlices. 9086static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, 9087 LoadedSlice::Cost &GlobalLSCost) { 9088 unsigned NumberOfSlices = LoadedSlices.size(); 9089 // If there is less than 2 elements, no pairing is possible. 9090 if (NumberOfSlices < 2) 9091 return; 9092 9093 // Sort the slices so that elements that are likely to be next to each 9094 // other in memory are next to each other in the list. 9095 std::sort(LoadedSlices.begin(), LoadedSlices.end(), 9096 [](const LoadedSlice &LHS, const LoadedSlice &RHS) { 9097 assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); 9098 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); 9099 }); 9100 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); 9101 // First (resp. Second) is the first (resp. Second) potentially candidate 9102 // to be placed in a paired load. 9103 const LoadedSlice *First = nullptr; 9104 const LoadedSlice *Second = nullptr; 9105 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, 9106 // Set the beginning of the pair. 9107 First = Second) { 9108 9109 Second = &LoadedSlices[CurrSlice]; 9110 9111 // If First is NULL, it means we start a new pair. 9112 // Get to the next slice. 9113 if (!First) 9114 continue; 9115 9116 EVT LoadedType = First->getLoadedType(); 9117 9118 // If the types of the slices are different, we cannot pair them. 9119 if (LoadedType != Second->getLoadedType()) 9120 continue; 9121 9122 // Check if the target supplies paired loads for this type. 9123 unsigned RequiredAlignment = 0; 9124 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { 9125 // move to the next pair, this type is hopeless. 9126 Second = nullptr; 9127 continue; 9128 } 9129 // Check if we meet the alignment requirement. 9130 if (RequiredAlignment > First->getAlignment()) 9131 continue; 9132 9133 // Check that both loads are next to each other in memory. 9134 if (!areSlicesNextToEachOther(*First, *Second)) 9135 continue; 9136 9137 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); 9138 --GlobalLSCost.Loads; 9139 // Move to the next pair. 9140 Second = nullptr; 9141 } 9142} 9143 9144/// \brief Check the profitability of all involved LoadedSlice. 9145/// Currently, it is considered profitable if there is exactly two 9146/// involved slices (1) which are (2) next to each other in memory, and 9147/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). 9148/// 9149/// Note: The order of the elements in \p LoadedSlices may be modified, but not 9150/// the elements themselves. 9151/// 9152/// FIXME: When the cost model will be mature enough, we can relax 9153/// constraints (1) and (2). 9154static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, 9155 const APInt &UsedBits, bool ForCodeSize) { 9156 unsigned NumberOfSlices = LoadedSlices.size(); 9157 if (StressLoadSlicing) 9158 return NumberOfSlices > 1; 9159 9160 // Check (1). 9161 if (NumberOfSlices != 2) 9162 return false; 9163 9164 // Check (2). 9165 if (!areUsedBitsDense(UsedBits)) 9166 return false; 9167 9168 // Check (3). 9169 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); 9170 // The original code has one big load. 9171 OrigCost.Loads = 1; 9172 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { 9173 const LoadedSlice &LS = LoadedSlices[CurrSlice]; 9174 // Accumulate the cost of all the slices. 9175 LoadedSlice::Cost SliceCost(LS, ForCodeSize); 9176 GlobalSlicingCost += SliceCost; 9177 9178 // Account as cost in the original configuration the gain obtained 9179 // with the current slices. 9180 OrigCost.addSliceGain(LS); 9181 } 9182 9183 // If the target supports paired load, adjust the cost accordingly. 9184 adjustCostForPairing(LoadedSlices, GlobalSlicingCost); 9185 return OrigCost > GlobalSlicingCost; 9186} 9187 9188/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) 9189/// operations, split it in the various pieces being extracted. 9190/// 9191/// This sort of thing is introduced by SROA. 9192/// This slicing takes care not to insert overlapping loads. 9193/// \pre LI is a simple load (i.e., not an atomic or volatile load). 9194bool DAGCombiner::SliceUpLoad(SDNode *N) { 9195 if (Level < AfterLegalizeDAG) 9196 return false; 9197 9198 LoadSDNode *LD = cast<LoadSDNode>(N); 9199 if (LD->isVolatile() || !ISD::isNormalLoad(LD) || 9200 !LD->getValueType(0).isInteger()) 9201 return false; 9202 9203 // Keep track of already used bits to detect overlapping values. 9204 // In that case, we will just abort the transformation. 9205 APInt UsedBits(LD->getValueSizeInBits(0), 0); 9206 9207 SmallVector<LoadedSlice, 4> LoadedSlices; 9208 9209 // Check if this load is used as several smaller chunks of bits. 9210 // Basically, look for uses in trunc or trunc(lshr) and record a new chain 9211 // of computation for each trunc. 9212 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); 9213 UI != UIEnd; ++UI) { 9214 // Skip the uses of the chain. 9215 if (UI.getUse().getResNo() != 0) 9216 continue; 9217 9218 SDNode *User = *UI; 9219 unsigned Shift = 0; 9220 9221 // Check if this is a trunc(lshr). 9222 if (User->getOpcode() == ISD::SRL && User->hasOneUse() && 9223 isa<ConstantSDNode>(User->getOperand(1))) { 9224 Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue(); 9225 User = *User->use_begin(); 9226 } 9227 9228 // At this point, User is a Truncate, iff we encountered, trunc or 9229 // trunc(lshr). 9230 if (User->getOpcode() != ISD::TRUNCATE) 9231 return false; 9232 9233 // The width of the type must be a power of 2 and greater than 8-bits. 9234 // Otherwise the load cannot be represented in LLVM IR. 9235 // Moreover, if we shifted with a non-8-bits multiple, the slice 9236 // will be across several bytes. We do not support that. 9237 unsigned Width = User->getValueSizeInBits(0); 9238 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) 9239 return 0; 9240 9241 // Build the slice for this chain of computations. 9242 LoadedSlice LS(User, LD, Shift, &DAG); 9243 APInt CurrentUsedBits = LS.getUsedBits(); 9244 9245 // Check if this slice overlaps with another. 9246 if ((CurrentUsedBits & UsedBits) != 0) 9247 return false; 9248 // Update the bits used globally. 9249 UsedBits |= CurrentUsedBits; 9250 9251 // Check if the new slice would be legal. 9252 if (!LS.isLegal()) 9253 return false; 9254 9255 // Record the slice. 9256 LoadedSlices.push_back(LS); 9257 } 9258 9259 // Abort slicing if it does not seem to be profitable. 9260 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) 9261 return false; 9262 9263 ++SlicedLoads; 9264 9265 // Rewrite each chain to use an independent load. 9266 // By construction, each chain can be represented by a unique load. 9267 9268 // Prepare the argument for the new token factor for all the slices. 9269 SmallVector<SDValue, 8> ArgChains; 9270 for (SmallVectorImpl<LoadedSlice>::const_iterator 9271 LSIt = LoadedSlices.begin(), 9272 LSItEnd = LoadedSlices.end(); 9273 LSIt != LSItEnd; ++LSIt) { 9274 SDValue SliceInst = LSIt->loadSlice(); 9275 CombineTo(LSIt->Inst, SliceInst, true); 9276 if (SliceInst.getNode()->getOpcode() != ISD::LOAD) 9277 SliceInst = SliceInst.getOperand(0); 9278 assert(SliceInst->getOpcode() == ISD::LOAD && 9279 "It takes more than a zext to get to the loaded slice!!"); 9280 ArgChains.push_back(SliceInst.getValue(1)); 9281 } 9282 9283 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, 9284 ArgChains); 9285 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 9286 return true; 9287} 9288 9289/// Check to see if V is (and load (ptr), imm), where the load is having 9290/// specific bytes cleared out. If so, return the byte size being masked out 9291/// and the shift amount. 9292static std::pair<unsigned, unsigned> 9293CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { 9294 std::pair<unsigned, unsigned> Result(0, 0); 9295 9296 // Check for the structure we're looking for. 9297 if (V->getOpcode() != ISD::AND || 9298 !isa<ConstantSDNode>(V->getOperand(1)) || 9299 !ISD::isNormalLoad(V->getOperand(0).getNode())) 9300 return Result; 9301 9302 // Check the chain and pointer. 9303 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); 9304 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. 9305 9306 // The store should be chained directly to the load or be an operand of a 9307 // tokenfactor. 9308 if (LD == Chain.getNode()) 9309 ; // ok. 9310 else if (Chain->getOpcode() != ISD::TokenFactor) 9311 return Result; // Fail. 9312 else { 9313 bool isOk = false; 9314 for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) 9315 if (Chain->getOperand(i).getNode() == LD) { 9316 isOk = true; 9317 break; 9318 } 9319 if (!isOk) return Result; 9320 } 9321 9322 // This only handles simple types. 9323 if (V.getValueType() != MVT::i16 && 9324 V.getValueType() != MVT::i32 && 9325 V.getValueType() != MVT::i64) 9326 return Result; 9327 9328 // Check the constant mask. Invert it so that the bits being masked out are 9329 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits 9330 // follow the sign bit for uniformity. 9331 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); 9332 unsigned NotMaskLZ = countLeadingZeros(NotMask); 9333 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. 9334 unsigned NotMaskTZ = countTrailingZeros(NotMask); 9335 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. 9336 if (NotMaskLZ == 64) return Result; // All zero mask. 9337 9338 // See if we have a continuous run of bits. If so, we have 0*1+0* 9339 if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) 9340 return Result; 9341 9342 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. 9343 if (V.getValueType() != MVT::i64 && NotMaskLZ) 9344 NotMaskLZ -= 64-V.getValueSizeInBits(); 9345 9346 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; 9347 switch (MaskedBytes) { 9348 case 1: 9349 case 2: 9350 case 4: break; 9351 default: return Result; // All one mask, or 5-byte mask. 9352 } 9353 9354 // Verify that the first bit starts at a multiple of mask so that the access 9355 // is aligned the same as the access width. 9356 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; 9357 9358 Result.first = MaskedBytes; 9359 Result.second = NotMaskTZ/8; 9360 return Result; 9361} 9362 9363 9364/// Check to see if IVal is something that provides a value as specified by 9365/// MaskInfo. If so, replace the specified store with a narrower store of 9366/// truncated IVal. 9367static SDNode * 9368ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, 9369 SDValue IVal, StoreSDNode *St, 9370 DAGCombiner *DC) { 9371 unsigned NumBytes = MaskInfo.first; 9372 unsigned ByteShift = MaskInfo.second; 9373 SelectionDAG &DAG = DC->getDAG(); 9374 9375 // Check to see if IVal is all zeros in the part being masked in by the 'or' 9376 // that uses this. If not, this is not a replacement. 9377 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), 9378 ByteShift*8, (ByteShift+NumBytes)*8); 9379 if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; 9380 9381 // Check that it is legal on the target to do this. It is legal if the new 9382 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type 9383 // legalization. 9384 MVT VT = MVT::getIntegerVT(NumBytes*8); 9385 if (!DC->isTypeLegal(VT)) 9386 return nullptr; 9387 9388 // Okay, we can do this! Replace the 'St' store with a store of IVal that is 9389 // shifted by ByteShift and truncated down to NumBytes. 9390 if (ByteShift) 9391 IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal, 9392 DAG.getConstant(ByteShift*8, 9393 DC->getShiftAmountTy(IVal.getValueType()))); 9394 9395 // Figure out the offset for the store and the alignment of the access. 9396 unsigned StOffset; 9397 unsigned NewAlign = St->getAlignment(); 9398 9399 if (DAG.getTargetLoweringInfo().isLittleEndian()) 9400 StOffset = ByteShift; 9401 else 9402 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; 9403 9404 SDValue Ptr = St->getBasePtr(); 9405 if (StOffset) { 9406 Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(), 9407 Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); 9408 NewAlign = MinAlign(NewAlign, StOffset); 9409 } 9410 9411 // Truncate down to the new size. 9412 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); 9413 9414 ++OpsNarrowed; 9415 return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr, 9416 St->getPointerInfo().getWithOffset(StOffset), 9417 false, false, NewAlign).getNode(); 9418} 9419 9420 9421/// Look for sequence of load / op / store where op is one of 'or', 'xor', and 9422/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try 9423/// narrowing the load and store if it would end up being a win for performance 9424/// or code size. 9425SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { 9426 StoreSDNode *ST = cast<StoreSDNode>(N); 9427 if (ST->isVolatile()) 9428 return SDValue(); 9429 9430 SDValue Chain = ST->getChain(); 9431 SDValue Value = ST->getValue(); 9432 SDValue Ptr = ST->getBasePtr(); 9433 EVT VT = Value.getValueType(); 9434 9435 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) 9436 return SDValue(); 9437 9438 unsigned Opc = Value.getOpcode(); 9439 9440 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst 9441 // is a byte mask indicating a consecutive number of bytes, check to see if 9442 // Y is known to provide just those bytes. If so, we try to replace the 9443 // load + replace + store sequence with a single (narrower) store, which makes 9444 // the load dead. 9445 if (Opc == ISD::OR) { 9446 std::pair<unsigned, unsigned> MaskedLoad; 9447 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); 9448 if (MaskedLoad.first) 9449 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 9450 Value.getOperand(1), ST,this)) 9451 return SDValue(NewST, 0); 9452 9453 // Or is commutative, so try swapping X and Y. 9454 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); 9455 if (MaskedLoad.first) 9456 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 9457 Value.getOperand(0), ST,this)) 9458 return SDValue(NewST, 0); 9459 } 9460 9461 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || 9462 Value.getOperand(1).getOpcode() != ISD::Constant) 9463 return SDValue(); 9464 9465 SDValue N0 = Value.getOperand(0); 9466 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 9467 Chain == SDValue(N0.getNode(), 1)) { 9468 LoadSDNode *LD = cast<LoadSDNode>(N0); 9469 if (LD->getBasePtr() != Ptr || 9470 LD->getPointerInfo().getAddrSpace() != 9471 ST->getPointerInfo().getAddrSpace()) 9472 return SDValue(); 9473 9474 // Find the type to narrow it the load / op / store to. 9475 SDValue N1 = Value.getOperand(1); 9476 unsigned BitWidth = N1.getValueSizeInBits(); 9477 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); 9478 if (Opc == ISD::AND) 9479 Imm ^= APInt::getAllOnesValue(BitWidth); 9480 if (Imm == 0 || Imm.isAllOnesValue()) 9481 return SDValue(); 9482 unsigned ShAmt = Imm.countTrailingZeros(); 9483 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; 9484 unsigned NewBW = NextPowerOf2(MSB - ShAmt); 9485 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 9486 // The narrowing should be profitable, the load/store operation should be 9487 // legal (or custom) and the store size should be equal to the NewVT width. 9488 while (NewBW < BitWidth && 9489 !(TLI.isOperationLegalOrCustom(Opc, NewVT) && 9490 TLI.isNarrowingProfitable(VT, NewVT))) { 9491 NewBW = NextPowerOf2(NewBW); 9492 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 9493 } 9494 if (NewBW >= BitWidth) 9495 return SDValue(); 9496 9497 // If the lsb changed does not start at the type bitwidth boundary, 9498 // start at the previous one. 9499 if (ShAmt % NewBW) 9500 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; 9501 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, 9502 std::min(BitWidth, ShAmt + NewBW)); 9503 if ((Imm & Mask) == Imm) { 9504 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); 9505 if (Opc == ISD::AND) 9506 NewImm ^= APInt::getAllOnesValue(NewBW); 9507 uint64_t PtrOff = ShAmt / 8; 9508 // For big endian targets, we need to adjust the offset to the pointer to 9509 // load the correct bytes. 9510 if (TLI.isBigEndian()) 9511 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; 9512 9513 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); 9514 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); 9515 if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) 9516 return SDValue(); 9517 9518 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), 9519 Ptr.getValueType(), Ptr, 9520 DAG.getConstant(PtrOff, Ptr.getValueType())); 9521 SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), 9522 LD->getChain(), NewPtr, 9523 LD->getPointerInfo().getWithOffset(PtrOff), 9524 LD->isVolatile(), LD->isNonTemporal(), 9525 LD->isInvariant(), NewAlign, 9526 LD->getAAInfo()); 9527 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, 9528 DAG.getConstant(NewImm, NewVT)); 9529 SDValue NewST = DAG.getStore(Chain, SDLoc(N), 9530 NewVal, NewPtr, 9531 ST->getPointerInfo().getWithOffset(PtrOff), 9532 false, false, NewAlign); 9533 9534 AddToWorklist(NewPtr.getNode()); 9535 AddToWorklist(NewLD.getNode()); 9536 AddToWorklist(NewVal.getNode()); 9537 WorklistRemover DeadNodes(*this); 9538 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); 9539 ++OpsNarrowed; 9540 return NewST; 9541 } 9542 } 9543 9544 return SDValue(); 9545} 9546 9547/// For a given floating point load / store pair, if the load value isn't used 9548/// by any other operations, then consider transforming the pair to integer 9549/// load / store operations if the target deems the transformation profitable. 9550SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { 9551 StoreSDNode *ST = cast<StoreSDNode>(N); 9552 SDValue Chain = ST->getChain(); 9553 SDValue Value = ST->getValue(); 9554 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && 9555 Value.hasOneUse() && 9556 Chain == SDValue(Value.getNode(), 1)) { 9557 LoadSDNode *LD = cast<LoadSDNode>(Value); 9558 EVT VT = LD->getMemoryVT(); 9559 if (!VT.isFloatingPoint() || 9560 VT != ST->getMemoryVT() || 9561 LD->isNonTemporal() || 9562 ST->isNonTemporal() || 9563 LD->getPointerInfo().getAddrSpace() != 0 || 9564 ST->getPointerInfo().getAddrSpace() != 0) 9565 return SDValue(); 9566 9567 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); 9568 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || 9569 !TLI.isOperationLegal(ISD::STORE, IntVT) || 9570 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || 9571 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) 9572 return SDValue(); 9573 9574 unsigned LDAlign = LD->getAlignment(); 9575 unsigned STAlign = ST->getAlignment(); 9576 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); 9577 unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy); 9578 if (LDAlign < ABIAlign || STAlign < ABIAlign) 9579 return SDValue(); 9580 9581 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), 9582 LD->getChain(), LD->getBasePtr(), 9583 LD->getPointerInfo(), 9584 false, false, false, LDAlign); 9585 9586 SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N), 9587 NewLD, ST->getBasePtr(), 9588 ST->getPointerInfo(), 9589 false, false, STAlign); 9590 9591 AddToWorklist(NewLD.getNode()); 9592 AddToWorklist(NewST.getNode()); 9593 WorklistRemover DeadNodes(*this); 9594 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); 9595 ++LdStFP2Int; 9596 return NewST; 9597 } 9598 9599 return SDValue(); 9600} 9601 9602/// Helper struct to parse and store a memory address as base + index + offset. 9603/// We ignore sign extensions when it is safe to do so. 9604/// The following two expressions are not equivalent. To differentiate we need 9605/// to store whether there was a sign extension involved in the index 9606/// computation. 9607/// (load (i64 add (i64 copyfromreg %c) 9608/// (i64 signextend (add (i8 load %index) 9609/// (i8 1)))) 9610/// vs 9611/// 9612/// (load (i64 add (i64 copyfromreg %c) 9613/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) 9614/// (i32 1))))) 9615struct BaseIndexOffset { 9616 SDValue Base; 9617 SDValue Index; 9618 int64_t Offset; 9619 bool IsIndexSignExt; 9620 9621 BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} 9622 9623 BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, 9624 bool IsIndexSignExt) : 9625 Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} 9626 9627 bool equalBaseIndex(const BaseIndexOffset &Other) { 9628 return Other.Base == Base && Other.Index == Index && 9629 Other.IsIndexSignExt == IsIndexSignExt; 9630 } 9631 9632 /// Parses tree in Ptr for base, index, offset addresses. 9633 static BaseIndexOffset match(SDValue Ptr) { 9634 bool IsIndexSignExt = false; 9635 9636 // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD 9637 // instruction, then it could be just the BASE or everything else we don't 9638 // know how to handle. Just use Ptr as BASE and give up. 9639 if (Ptr->getOpcode() != ISD::ADD) 9640 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 9641 9642 // We know that we have at least an ADD instruction. Try to pattern match 9643 // the simple case of BASE + OFFSET. 9644 if (isa<ConstantSDNode>(Ptr->getOperand(1))) { 9645 int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); 9646 return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, 9647 IsIndexSignExt); 9648 } 9649 9650 // Inside a loop the current BASE pointer is calculated using an ADD and a 9651 // MUL instruction. In this case Ptr is the actual BASE pointer. 9652 // (i64 add (i64 %array_ptr) 9653 // (i64 mul (i64 %induction_var) 9654 // (i64 %element_size))) 9655 if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) 9656 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 9657 9658 // Look at Base + Index + Offset cases. 9659 SDValue Base = Ptr->getOperand(0); 9660 SDValue IndexOffset = Ptr->getOperand(1); 9661 9662 // Skip signextends. 9663 if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { 9664 IndexOffset = IndexOffset->getOperand(0); 9665 IsIndexSignExt = true; 9666 } 9667 9668 // Either the case of Base + Index (no offset) or something else. 9669 if (IndexOffset->getOpcode() != ISD::ADD) 9670 return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); 9671 9672 // Now we have the case of Base + Index + offset. 9673 SDValue Index = IndexOffset->getOperand(0); 9674 SDValue Offset = IndexOffset->getOperand(1); 9675 9676 if (!isa<ConstantSDNode>(Offset)) 9677 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 9678 9679 // Ignore signextends. 9680 if (Index->getOpcode() == ISD::SIGN_EXTEND) { 9681 Index = Index->getOperand(0); 9682 IsIndexSignExt = true; 9683 } else IsIndexSignExt = false; 9684 9685 int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); 9686 return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); 9687 } 9688}; 9689 9690/// Holds a pointer to an LSBaseSDNode as well as information on where it 9691/// is located in a sequence of memory operations connected by a chain. 9692struct MemOpLink { 9693 MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): 9694 MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } 9695 // Ptr to the mem node. 9696 LSBaseSDNode *MemNode; 9697 // Offset from the base ptr. 9698 int64_t OffsetFromBase; 9699 // What is the sequence number of this mem node. 9700 // Lowest mem operand in the DAG starts at zero. 9701 unsigned SequenceNum; 9702}; 9703 9704bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { 9705 EVT MemVT = St->getMemoryVT(); 9706 int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; 9707 bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes(). 9708 hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); 9709 9710 // Don't merge vectors into wider inputs. 9711 if (MemVT.isVector() || !MemVT.isSimple()) 9712 return false; 9713 9714 // Perform an early exit check. Do not bother looking at stored values that 9715 // are not constants or loads. 9716 SDValue StoredVal = St->getValue(); 9717 bool IsLoadSrc = isa<LoadSDNode>(StoredVal); 9718 if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) && 9719 !IsLoadSrc) 9720 return false; 9721 9722 // Only look at ends of store sequences. 9723 SDValue Chain = SDValue(St, 0); 9724 if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) 9725 return false; 9726 9727 // This holds the base pointer, index, and the offset in bytes from the base 9728 // pointer. 9729 BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); 9730 9731 // We must have a base and an offset. 9732 if (!BasePtr.Base.getNode()) 9733 return false; 9734 9735 // Do not handle stores to undef base pointers. 9736 if (BasePtr.Base.getOpcode() == ISD::UNDEF) 9737 return false; 9738 9739 // Save the LoadSDNodes that we find in the chain. 9740 // We need to make sure that these nodes do not interfere with 9741 // any of the store nodes. 9742 SmallVector<LSBaseSDNode*, 8> AliasLoadNodes; 9743 9744 // Save the StoreSDNodes that we find in the chain. 9745 SmallVector<MemOpLink, 8> StoreNodes; 9746 9747 // Walk up the chain and look for nodes with offsets from the same 9748 // base pointer. Stop when reaching an instruction with a different kind 9749 // or instruction which has a different base pointer. 9750 unsigned Seq = 0; 9751 StoreSDNode *Index = St; 9752 while (Index) { 9753 // If the chain has more than one use, then we can't reorder the mem ops. 9754 if (Index != St && !SDValue(Index, 0)->hasOneUse()) 9755 break; 9756 9757 // Find the base pointer and offset for this memory node. 9758 BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); 9759 9760 // Check that the base pointer is the same as the original one. 9761 if (!Ptr.equalBaseIndex(BasePtr)) 9762 break; 9763 9764 // Check that the alignment is the same. 9765 if (Index->getAlignment() != St->getAlignment()) 9766 break; 9767 9768 // The memory operands must not be volatile. 9769 if (Index->isVolatile() || Index->isIndexed()) 9770 break; 9771 9772 // No truncation. 9773 if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index)) 9774 if (St->isTruncatingStore()) 9775 break; 9776 9777 // The stored memory type must be the same. 9778 if (Index->getMemoryVT() != MemVT) 9779 break; 9780 9781 // We do not allow unaligned stores because we want to prevent overriding 9782 // stores. 9783 if (Index->getAlignment()*8 != MemVT.getSizeInBits()) 9784 break; 9785 9786 // We found a potential memory operand to merge. 9787 StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); 9788 9789 // Find the next memory operand in the chain. If the next operand in the 9790 // chain is a store then move up and continue the scan with the next 9791 // memory operand. If the next operand is a load save it and use alias 9792 // information to check if it interferes with anything. 9793 SDNode *NextInChain = Index->getChain().getNode(); 9794 while (1) { 9795 if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { 9796 // We found a store node. Use it for the next iteration. 9797 Index = STn; 9798 break; 9799 } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { 9800 if (Ldn->isVolatile()) { 9801 Index = nullptr; 9802 break; 9803 } 9804 9805 // Save the load node for later. Continue the scan. 9806 AliasLoadNodes.push_back(Ldn); 9807 NextInChain = Ldn->getChain().getNode(); 9808 continue; 9809 } else { 9810 Index = nullptr; 9811 break; 9812 } 9813 } 9814 } 9815 9816 // Check if there is anything to merge. 9817 if (StoreNodes.size() < 2) 9818 return false; 9819 9820 // Sort the memory operands according to their distance from the base pointer. 9821 std::sort(StoreNodes.begin(), StoreNodes.end(), 9822 [](MemOpLink LHS, MemOpLink RHS) { 9823 return LHS.OffsetFromBase < RHS.OffsetFromBase || 9824 (LHS.OffsetFromBase == RHS.OffsetFromBase && 9825 LHS.SequenceNum > RHS.SequenceNum); 9826 }); 9827 9828 // Scan the memory operations on the chain and find the first non-consecutive 9829 // store memory address. 9830 unsigned LastConsecutiveStore = 0; 9831 int64_t StartAddress = StoreNodes[0].OffsetFromBase; 9832 for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) { 9833 9834 // Check that the addresses are consecutive starting from the second 9835 // element in the list of stores. 9836 if (i > 0) { 9837 int64_t CurrAddress = StoreNodes[i].OffsetFromBase; 9838 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 9839 break; 9840 } 9841 9842 bool Alias = false; 9843 // Check if this store interferes with any of the loads that we found. 9844 for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld) 9845 if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) { 9846 Alias = true; 9847 break; 9848 } 9849 // We found a load that alias with this store. Stop the sequence. 9850 if (Alias) 9851 break; 9852 9853 // Mark this node as useful. 9854 LastConsecutiveStore = i; 9855 } 9856 9857 // The node with the lowest store address. 9858 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 9859 9860 // Store the constants into memory as one consecutive store. 9861 if (!IsLoadSrc) { 9862 unsigned LastLegalType = 0; 9863 unsigned LastLegalVectorType = 0; 9864 bool NonZero = false; 9865 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 9866 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 9867 SDValue StoredVal = St->getValue(); 9868 9869 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) { 9870 NonZero |= !C->isNullValue(); 9871 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) { 9872 NonZero |= !C->getConstantFPValue()->isNullValue(); 9873 } else { 9874 // Non-constant. 9875 break; 9876 } 9877 9878 // Find a legal type for the constant store. 9879 unsigned StoreBW = (i+1) * ElementSizeBytes * 8; 9880 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 9881 if (TLI.isTypeLegal(StoreTy)) 9882 LastLegalType = i+1; 9883 // Or check whether a truncstore is legal. 9884 else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == 9885 TargetLowering::TypePromoteInteger) { 9886 EVT LegalizedStoredValueTy = 9887 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); 9888 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy)) 9889 LastLegalType = i+1; 9890 } 9891 9892 // Find a legal type for the vector store. 9893 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); 9894 if (TLI.isTypeLegal(Ty)) 9895 LastLegalVectorType = i + 1; 9896 } 9897 9898 // We only use vectors if the constant is known to be zero and the 9899 // function is not marked with the noimplicitfloat attribute. 9900 if (NonZero || NoVectors) 9901 LastLegalVectorType = 0; 9902 9903 // Check if we found a legal integer type to store. 9904 if (LastLegalType == 0 && LastLegalVectorType == 0) 9905 return false; 9906 9907 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; 9908 unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; 9909 9910 // Make sure we have something to merge. 9911 if (NumElem < 2) 9912 return false; 9913 9914 unsigned EarliestNodeUsed = 0; 9915 for (unsigned i=0; i < NumElem; ++i) { 9916 // Find a chain for the new wide-store operand. Notice that some 9917 // of the store nodes that we found may not be selected for inclusion 9918 // in the wide store. The chain we use needs to be the chain of the 9919 // earliest store node which is *used* and replaced by the wide store. 9920 if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) 9921 EarliestNodeUsed = i; 9922 } 9923 9924 // The earliest Node in the DAG. 9925 LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; 9926 SDLoc DL(StoreNodes[0].MemNode); 9927 9928 SDValue StoredVal; 9929 if (UseVector) { 9930 // Find a legal type for the vector store. 9931 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); 9932 assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); 9933 StoredVal = DAG.getConstant(0, Ty); 9934 } else { 9935 unsigned StoreBW = NumElem * ElementSizeBytes * 8; 9936 APInt StoreInt(StoreBW, 0); 9937 9938 // Construct a single integer constant which is made of the smaller 9939 // constant inputs. 9940 bool IsLE = TLI.isLittleEndian(); 9941 for (unsigned i = 0; i < NumElem ; ++i) { 9942 unsigned Idx = IsLE ?(NumElem - 1 - i) : i; 9943 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); 9944 SDValue Val = St->getValue(); 9945 StoreInt<<=ElementSizeBytes*8; 9946 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { 9947 StoreInt|=C->getAPIntValue().zext(StoreBW); 9948 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { 9949 StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); 9950 } else { 9951 llvm_unreachable("Invalid constant element type"); 9952 } 9953 } 9954 9955 // Create the new Load and Store operations. 9956 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 9957 StoredVal = DAG.getConstant(StoreInt, StoreTy); 9958 } 9959 9960 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, 9961 FirstInChain->getBasePtr(), 9962 FirstInChain->getPointerInfo(), 9963 false, false, 9964 FirstInChain->getAlignment()); 9965 9966 // Replace the first store with the new store 9967 CombineTo(EarliestOp, NewStore); 9968 // Erase all other stores. 9969 for (unsigned i = 0; i < NumElem ; ++i) { 9970 if (StoreNodes[i].MemNode == EarliestOp) 9971 continue; 9972 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 9973 // ReplaceAllUsesWith will replace all uses that existed when it was 9974 // called, but graph optimizations may cause new ones to appear. For 9975 // example, the case in pr14333 looks like 9976 // 9977 // St's chain -> St -> another store -> X 9978 // 9979 // And the only difference from St to the other store is the chain. 9980 // When we change it's chain to be St's chain they become identical, 9981 // get CSEed and the net result is that X is now a use of St. 9982 // Since we know that St is redundant, just iterate. 9983 while (!St->use_empty()) 9984 DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); 9985 deleteAndRecombine(St); 9986 } 9987 9988 return true; 9989 } 9990 9991 // Below we handle the case of multiple consecutive stores that 9992 // come from multiple consecutive loads. We merge them into a single 9993 // wide load and a single wide store. 9994 9995 // Look for load nodes which are used by the stored values. 9996 SmallVector<MemOpLink, 8> LoadNodes; 9997 9998 // Find acceptable loads. Loads need to have the same chain (token factor), 9999 // must not be zext, volatile, indexed, and they must be consecutive. 10000 BaseIndexOffset LdBasePtr; 10001 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 10002 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 10003 LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); 10004 if (!Ld) break; 10005 10006 // Loads must only have one use. 10007 if (!Ld->hasNUsesOfValue(1, 0)) 10008 break; 10009 10010 // Check that the alignment is the same as the stores. 10011 if (Ld->getAlignment() != St->getAlignment()) 10012 break; 10013 10014 // The memory operands must not be volatile. 10015 if (Ld->isVolatile() || Ld->isIndexed()) 10016 break; 10017 10018 // We do not accept ext loads. 10019 if (Ld->getExtensionType() != ISD::NON_EXTLOAD) 10020 break; 10021 10022 // The stored memory type must be the same. 10023 if (Ld->getMemoryVT() != MemVT) 10024 break; 10025 10026 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); 10027 // If this is not the first ptr that we check. 10028 if (LdBasePtr.Base.getNode()) { 10029 // The base ptr must be the same. 10030 if (!LdPtr.equalBaseIndex(LdBasePtr)) 10031 break; 10032 } else { 10033 // Check that all other base pointers are the same as this one. 10034 LdBasePtr = LdPtr; 10035 } 10036 10037 // We found a potential memory operand to merge. 10038 LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0)); 10039 } 10040 10041 if (LoadNodes.size() < 2) 10042 return false; 10043 10044 // If we have load/store pair instructions and we only have two values, 10045 // don't bother. 10046 unsigned RequiredAlignment; 10047 if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && 10048 St->getAlignment() >= RequiredAlignment) 10049 return false; 10050 10051 // Scan the memory operations on the chain and find the first non-consecutive 10052 // load memory address. These variables hold the index in the store node 10053 // array. 10054 unsigned LastConsecutiveLoad = 0; 10055 // This variable refers to the size and not index in the array. 10056 unsigned LastLegalVectorType = 0; 10057 unsigned LastLegalIntegerType = 0; 10058 StartAddress = LoadNodes[0].OffsetFromBase; 10059 SDValue FirstChain = LoadNodes[0].MemNode->getChain(); 10060 for (unsigned i = 1; i < LoadNodes.size(); ++i) { 10061 // All loads much share the same chain. 10062 if (LoadNodes[i].MemNode->getChain() != FirstChain) 10063 break; 10064 10065 int64_t CurrAddress = LoadNodes[i].OffsetFromBase; 10066 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 10067 break; 10068 LastConsecutiveLoad = i; 10069 10070 // Find a legal type for the vector store. 10071 EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); 10072 if (TLI.isTypeLegal(StoreTy)) 10073 LastLegalVectorType = i + 1; 10074 10075 // Find a legal type for the integer store. 10076 unsigned StoreBW = (i+1) * ElementSizeBytes * 8; 10077 StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 10078 if (TLI.isTypeLegal(StoreTy)) 10079 LastLegalIntegerType = i + 1; 10080 // Or check whether a truncstore and extload is legal. 10081 else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == 10082 TargetLowering::TypePromoteInteger) { 10083 EVT LegalizedStoredValueTy = 10084 TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy); 10085 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && 10086 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && 10087 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && 10088 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy)) 10089 LastLegalIntegerType = i+1; 10090 } 10091 } 10092 10093 // Only use vector types if the vector type is larger than the integer type. 10094 // If they are the same, use integers. 10095 bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; 10096 unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); 10097 10098 // We add +1 here because the LastXXX variables refer to location while 10099 // the NumElem refers to array/index size. 10100 unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; 10101 NumElem = std::min(LastLegalType, NumElem); 10102 10103 if (NumElem < 2) 10104 return false; 10105 10106 // The earliest Node in the DAG. 10107 unsigned EarliestNodeUsed = 0; 10108 LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; 10109 for (unsigned i=1; i<NumElem; ++i) { 10110 // Find a chain for the new wide-store operand. Notice that some 10111 // of the store nodes that we found may not be selected for inclusion 10112 // in the wide store. The chain we use needs to be the chain of the 10113 // earliest store node which is *used* and replaced by the wide store. 10114 if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) 10115 EarliestNodeUsed = i; 10116 } 10117 10118 // Find if it is better to use vectors or integers to load and store 10119 // to memory. 10120 EVT JointMemOpVT; 10121 if (UseVectorTy) { 10122 JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); 10123 } else { 10124 unsigned StoreBW = NumElem * ElementSizeBytes * 8; 10125 JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 10126 } 10127 10128 SDLoc LoadDL(LoadNodes[0].MemNode); 10129 SDLoc StoreDL(StoreNodes[0].MemNode); 10130 10131 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); 10132 SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, 10133 FirstLoad->getChain(), 10134 FirstLoad->getBasePtr(), 10135 FirstLoad->getPointerInfo(), 10136 false, false, false, 10137 FirstLoad->getAlignment()); 10138 10139 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, 10140 FirstInChain->getBasePtr(), 10141 FirstInChain->getPointerInfo(), false, false, 10142 FirstInChain->getAlignment()); 10143 10144 // Replace one of the loads with the new load. 10145 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode); 10146 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), 10147 SDValue(NewLoad.getNode(), 1)); 10148 10149 // Remove the rest of the load chains. 10150 for (unsigned i = 1; i < NumElem ; ++i) { 10151 // Replace all chain users of the old load nodes with the chain of the new 10152 // load node. 10153 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); 10154 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); 10155 } 10156 10157 // Replace the first store with the new store. 10158 CombineTo(EarliestOp, NewStore); 10159 // Erase all other stores. 10160 for (unsigned i = 0; i < NumElem ; ++i) { 10161 // Remove all Store nodes. 10162 if (StoreNodes[i].MemNode == EarliestOp) 10163 continue; 10164 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 10165 DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); 10166 deleteAndRecombine(St); 10167 } 10168 10169 return true; 10170} 10171 10172SDValue DAGCombiner::visitSTORE(SDNode *N) { 10173 StoreSDNode *ST = cast<StoreSDNode>(N); 10174 SDValue Chain = ST->getChain(); 10175 SDValue Value = ST->getValue(); 10176 SDValue Ptr = ST->getBasePtr(); 10177 10178 // If this is a store of a bit convert, store the input value if the 10179 // resultant store does not need a higher alignment than the original. 10180 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && 10181 ST->isUnindexed()) { 10182 unsigned OrigAlign = ST->getAlignment(); 10183 EVT SVT = Value.getOperand(0).getValueType(); 10184 unsigned Align = TLI.getDataLayout()-> 10185 getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); 10186 if (Align <= OrigAlign && 10187 ((!LegalOperations && !ST->isVolatile()) || 10188 TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) 10189 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), 10190 Ptr, ST->getPointerInfo(), ST->isVolatile(), 10191 ST->isNonTemporal(), OrigAlign, 10192 ST->getAAInfo()); 10193 } 10194 10195 // Turn 'store undef, Ptr' -> nothing. 10196 if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) 10197 return Chain; 10198 10199 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' 10200 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { 10201 // NOTE: If the original store is volatile, this transform must not increase 10202 // the number of stores. For example, on x86-32 an f64 can be stored in one 10203 // processor operation but an i64 (which is not legal) requires two. So the 10204 // transform should not be done in this case. 10205 if (Value.getOpcode() != ISD::TargetConstantFP) { 10206 SDValue Tmp; 10207 switch (CFP->getSimpleValueType(0).SimpleTy) { 10208 default: llvm_unreachable("Unknown FP type"); 10209 case MVT::f16: // We don't do this for these yet. 10210 case MVT::f80: 10211 case MVT::f128: 10212 case MVT::ppcf128: 10213 break; 10214 case MVT::f32: 10215 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || 10216 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 10217 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). 10218 bitcastToAPInt().getZExtValue(), MVT::i32); 10219 return DAG.getStore(Chain, SDLoc(N), Tmp, 10220 Ptr, ST->getMemOperand()); 10221 } 10222 break; 10223 case MVT::f64: 10224 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && 10225 !ST->isVolatile()) || 10226 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { 10227 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). 10228 getZExtValue(), MVT::i64); 10229 return DAG.getStore(Chain, SDLoc(N), Tmp, 10230 Ptr, ST->getMemOperand()); 10231 } 10232 10233 if (!ST->isVolatile() && 10234 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 10235 // Many FP stores are not made apparent until after legalize, e.g. for 10236 // argument passing. Since this is so common, custom legalize the 10237 // 64-bit integer store into two 32-bit stores. 10238 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); 10239 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); 10240 SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); 10241 if (TLI.isBigEndian()) std::swap(Lo, Hi); 10242 10243 unsigned Alignment = ST->getAlignment(); 10244 bool isVolatile = ST->isVolatile(); 10245 bool isNonTemporal = ST->isNonTemporal(); 10246 AAMDNodes AAInfo = ST->getAAInfo(); 10247 10248 SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, 10249 Ptr, ST->getPointerInfo(), 10250 isVolatile, isNonTemporal, 10251 ST->getAlignment(), AAInfo); 10252 Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, 10253 DAG.getConstant(4, Ptr.getValueType())); 10254 Alignment = MinAlign(Alignment, 4U); 10255 SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, 10256 Ptr, ST->getPointerInfo().getWithOffset(4), 10257 isVolatile, isNonTemporal, 10258 Alignment, AAInfo); 10259 return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, 10260 St0, St1); 10261 } 10262 10263 break; 10264 } 10265 } 10266 } 10267 10268 // Try to infer better alignment information than the store already has. 10269 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { 10270 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 10271 if (Align > ST->getAlignment()) 10272 return DAG.getTruncStore(Chain, SDLoc(N), Value, 10273 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 10274 ST->isVolatile(), ST->isNonTemporal(), Align, 10275 ST->getAAInfo()); 10276 } 10277 } 10278 10279 // Try transforming a pair floating point load / store ops to integer 10280 // load / store ops. 10281 SDValue NewST = TransformFPLoadStorePair(N); 10282 if (NewST.getNode()) 10283 return NewST; 10284 10285 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 10286 : DAG.getSubtarget().useAA(); 10287#ifndef NDEBUG 10288 if (CombinerAAOnlyFunc.getNumOccurrences() && 10289 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 10290 UseAA = false; 10291#endif 10292 if (UseAA && ST->isUnindexed()) { 10293 // Walk up chain skipping non-aliasing memory nodes. 10294 SDValue BetterChain = FindBetterChain(N, Chain); 10295 10296 // If there is a better chain. 10297 if (Chain != BetterChain) { 10298 SDValue ReplStore; 10299 10300 // Replace the chain to avoid dependency. 10301 if (ST->isTruncatingStore()) { 10302 ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr, 10303 ST->getMemoryVT(), ST->getMemOperand()); 10304 } else { 10305 ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr, 10306 ST->getMemOperand()); 10307 } 10308 10309 // Create token to keep both nodes around. 10310 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), 10311 MVT::Other, Chain, ReplStore); 10312 10313 // Make sure the new and old chains are cleaned up. 10314 AddToWorklist(Token.getNode()); 10315 10316 // Don't add users to work list. 10317 return CombineTo(N, Token, false); 10318 } 10319 } 10320 10321 // Try transforming N to an indexed store. 10322 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 10323 return SDValue(N, 0); 10324 10325 // FIXME: is there such a thing as a truncating indexed store? 10326 if (ST->isTruncatingStore() && ST->isUnindexed() && 10327 Value.getValueType().isInteger()) { 10328 // See if we can simplify the input to this truncstore with knowledge that 10329 // only the low bits are being used. For example: 10330 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" 10331 SDValue Shorter = 10332 GetDemandedBits(Value, 10333 APInt::getLowBitsSet( 10334 Value.getValueType().getScalarType().getSizeInBits(), 10335 ST->getMemoryVT().getScalarType().getSizeInBits())); 10336 AddToWorklist(Value.getNode()); 10337 if (Shorter.getNode()) 10338 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, 10339 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 10340 10341 // Otherwise, see if we can simplify the operation with 10342 // SimplifyDemandedBits, which only works if the value has a single use. 10343 if (SimplifyDemandedBits(Value, 10344 APInt::getLowBitsSet( 10345 Value.getValueType().getScalarType().getSizeInBits(), 10346 ST->getMemoryVT().getScalarType().getSizeInBits()))) 10347 return SDValue(N, 0); 10348 } 10349 10350 // If this is a load followed by a store to the same location, then the store 10351 // is dead/noop. 10352 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { 10353 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && 10354 ST->isUnindexed() && !ST->isVolatile() && 10355 // There can't be any side effects between the load and store, such as 10356 // a call or store. 10357 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { 10358 // The store is dead, remove it. 10359 return Chain; 10360 } 10361 } 10362 10363 // If this is a store followed by a store with the same value to the same 10364 // location, then the store is dead/noop. 10365 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { 10366 if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() && 10367 ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() && 10368 ST1->isUnindexed() && !ST1->isVolatile()) { 10369 // The store is dead, remove it. 10370 return Chain; 10371 } 10372 } 10373 10374 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a 10375 // truncating store. We can do this even if this is already a truncstore. 10376 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) 10377 && Value.getNode()->hasOneUse() && ST->isUnindexed() && 10378 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), 10379 ST->getMemoryVT())) { 10380 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), 10381 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 10382 } 10383 10384 // Only perform this optimization before the types are legal, because we 10385 // don't want to perform this optimization on every DAGCombine invocation. 10386 if (!LegalTypes) { 10387 bool EverChanged = false; 10388 10389 do { 10390 // There can be multiple store sequences on the same chain. 10391 // Keep trying to merge store sequences until we are unable to do so 10392 // or until we merge the last store on the chain. 10393 bool Changed = MergeConsecutiveStores(ST); 10394 EverChanged |= Changed; 10395 if (!Changed) break; 10396 } while (ST->getOpcode() != ISD::DELETED_NODE); 10397 10398 if (EverChanged) 10399 return SDValue(N, 0); 10400 } 10401 10402 return ReduceLoadOpStoreWidth(N); 10403} 10404 10405SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { 10406 SDValue InVec = N->getOperand(0); 10407 SDValue InVal = N->getOperand(1); 10408 SDValue EltNo = N->getOperand(2); 10409 SDLoc dl(N); 10410 10411 // If the inserted element is an UNDEF, just use the input vector. 10412 if (InVal.getOpcode() == ISD::UNDEF) 10413 return InVec; 10414 10415 EVT VT = InVec.getValueType(); 10416 10417 // If we can't generate a legal BUILD_VECTOR, exit 10418 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 10419 return SDValue(); 10420 10421 // Check that we know which element is being inserted 10422 if (!isa<ConstantSDNode>(EltNo)) 10423 return SDValue(); 10424 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 10425 10426 // Canonicalize insert_vector_elt dag nodes. 10427 // Example: 10428 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) 10429 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) 10430 // 10431 // Do this only if the child insert_vector node has one use; also 10432 // do this only if indices are both constants and Idx1 < Idx0. 10433 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() 10434 && isa<ConstantSDNode>(InVec.getOperand(2))) { 10435 unsigned OtherElt = 10436 cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue(); 10437 if (Elt < OtherElt) { 10438 // Swap nodes. 10439 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT, 10440 InVec.getOperand(0), InVal, EltNo); 10441 AddToWorklist(NewOp.getNode()); 10442 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), 10443 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); 10444 } 10445 } 10446 10447 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 10448 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 10449 // vector elements. 10450 SmallVector<SDValue, 8> Ops; 10451 // Do not combine these two vectors if the output vector will not replace 10452 // the input vector. 10453 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { 10454 Ops.append(InVec.getNode()->op_begin(), 10455 InVec.getNode()->op_end()); 10456 } else if (InVec.getOpcode() == ISD::UNDEF) { 10457 unsigned NElts = VT.getVectorNumElements(); 10458 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 10459 } else { 10460 return SDValue(); 10461 } 10462 10463 // Insert the element 10464 if (Elt < Ops.size()) { 10465 // All the operands of BUILD_VECTOR must have the same type; 10466 // we enforce that here. 10467 EVT OpVT = Ops[0].getValueType(); 10468 if (InVal.getValueType() != OpVT) 10469 InVal = OpVT.bitsGT(InVal.getValueType()) ? 10470 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : 10471 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); 10472 Ops[Elt] = InVal; 10473 } 10474 10475 // Return the new vector 10476 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); 10477} 10478 10479SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 10480 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { 10481 EVT ResultVT = EVE->getValueType(0); 10482 EVT VecEltVT = InVecVT.getVectorElementType(); 10483 unsigned Align = OriginalLoad->getAlignment(); 10484 unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment( 10485 VecEltVT.getTypeForEVT(*DAG.getContext())); 10486 10487 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) 10488 return SDValue(); 10489 10490 Align = NewAlign; 10491 10492 SDValue NewPtr = OriginalLoad->getBasePtr(); 10493 SDValue Offset; 10494 EVT PtrType = NewPtr.getValueType(); 10495 MachinePointerInfo MPI; 10496 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { 10497 int Elt = ConstEltNo->getZExtValue(); 10498 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; 10499 if (TLI.isBigEndian()) 10500 PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; 10501 Offset = DAG.getConstant(PtrOff, PtrType); 10502 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); 10503 } else { 10504 Offset = DAG.getNode( 10505 ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, 10506 DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); 10507 if (TLI.isBigEndian()) 10508 Offset = DAG.getNode( 10509 ISD::SUB, SDLoc(EVE), EltNo.getValueType(), 10510 DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); 10511 MPI = OriginalLoad->getPointerInfo(); 10512 } 10513 NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); 10514 10515 // The replacement we need to do here is a little tricky: we need to 10516 // replace an extractelement of a load with a load. 10517 // Use ReplaceAllUsesOfValuesWith to do the replacement. 10518 // Note that this replacement assumes that the extractvalue is the only 10519 // use of the load; that's okay because we don't want to perform this 10520 // transformation in other cases anyway. 10521 SDValue Load; 10522 SDValue Chain; 10523 if (ResultVT.bitsGT(VecEltVT)) { 10524 // If the result type of vextract is wider than the load, then issue an 10525 // extending load instead. 10526 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, 10527 VecEltVT) 10528 ? ISD::ZEXTLOAD 10529 : ISD::EXTLOAD; 10530 Load = DAG.getExtLoad( 10531 ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI, 10532 VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), 10533 OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); 10534 Chain = Load.getValue(1); 10535 } else { 10536 Load = DAG.getLoad( 10537 VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, 10538 OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), 10539 OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); 10540 Chain = Load.getValue(1); 10541 if (ResultVT.bitsLT(VecEltVT)) 10542 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); 10543 else 10544 Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); 10545 } 10546 WorklistRemover DeadNodes(*this); 10547 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; 10548 SDValue To[] = { Load, Chain }; 10549 DAG.ReplaceAllUsesOfValuesWith(From, To, 2); 10550 // Since we're explicitly calling ReplaceAllUses, add the new node to the 10551 // worklist explicitly as well. 10552 AddToWorklist(Load.getNode()); 10553 AddUsersToWorklist(Load.getNode()); // Add users too 10554 // Make sure to revisit this node to clean it up; it will usually be dead. 10555 AddToWorklist(EVE); 10556 ++OpsNarrowed; 10557 return SDValue(EVE, 0); 10558} 10559 10560SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { 10561 // (vextract (scalar_to_vector val, 0) -> val 10562 SDValue InVec = N->getOperand(0); 10563 EVT VT = InVec.getValueType(); 10564 EVT NVT = N->getValueType(0); 10565 10566 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { 10567 // Check if the result type doesn't match the inserted element type. A 10568 // SCALAR_TO_VECTOR may truncate the inserted element and the 10569 // EXTRACT_VECTOR_ELT may widen the extracted vector. 10570 SDValue InOp = InVec.getOperand(0); 10571 if (InOp.getValueType() != NVT) { 10572 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 10573 return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); 10574 } 10575 return InOp; 10576 } 10577 10578 SDValue EltNo = N->getOperand(1); 10579 bool ConstEltNo = isa<ConstantSDNode>(EltNo); 10580 10581 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. 10582 // We only perform this optimization before the op legalization phase because 10583 // we may introduce new vector instructions which are not backed by TD 10584 // patterns. For example on AVX, extracting elements from a wide vector 10585 // without using extract_subvector. However, if we can find an underlying 10586 // scalar value, then we can always use that. 10587 if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE 10588 && ConstEltNo) { 10589 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 10590 int NumElem = VT.getVectorNumElements(); 10591 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); 10592 // Find the new index to extract from. 10593 int OrigElt = SVOp->getMaskElt(Elt); 10594 10595 // Extracting an undef index is undef. 10596 if (OrigElt == -1) 10597 return DAG.getUNDEF(NVT); 10598 10599 // Select the right vector half to extract from. 10600 SDValue SVInVec; 10601 if (OrigElt < NumElem) { 10602 SVInVec = InVec->getOperand(0); 10603 } else { 10604 SVInVec = InVec->getOperand(1); 10605 OrigElt -= NumElem; 10606 } 10607 10608 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { 10609 SDValue InOp = SVInVec.getOperand(OrigElt); 10610 if (InOp.getValueType() != NVT) { 10611 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 10612 InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); 10613 } 10614 10615 return InOp; 10616 } 10617 10618 // FIXME: We should handle recursing on other vector shuffles and 10619 // scalar_to_vector here as well. 10620 10621 if (!LegalOperations) { 10622 EVT IndexTy = TLI.getVectorIdxTy(); 10623 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, 10624 SVInVec, DAG.getConstant(OrigElt, IndexTy)); 10625 } 10626 } 10627 10628 bool BCNumEltsChanged = false; 10629 EVT ExtVT = VT.getVectorElementType(); 10630 EVT LVT = ExtVT; 10631 10632 // If the result of load has to be truncated, then it's not necessarily 10633 // profitable. 10634 if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) 10635 return SDValue(); 10636 10637 if (InVec.getOpcode() == ISD::BITCAST) { 10638 // Don't duplicate a load with other uses. 10639 if (!InVec.hasOneUse()) 10640 return SDValue(); 10641 10642 EVT BCVT = InVec.getOperand(0).getValueType(); 10643 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) 10644 return SDValue(); 10645 if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) 10646 BCNumEltsChanged = true; 10647 InVec = InVec.getOperand(0); 10648 ExtVT = BCVT.getVectorElementType(); 10649 } 10650 10651 // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) 10652 if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && 10653 ISD::isNormalLoad(InVec.getNode()) && 10654 !N->getOperand(1)->hasPredecessor(InVec.getNode())) { 10655 SDValue Index = N->getOperand(1); 10656 if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) 10657 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, 10658 OrigLoad); 10659 } 10660 10661 // Perform only after legalization to ensure build_vector / vector_shuffle 10662 // optimizations have already been done. 10663 if (!LegalOperations) return SDValue(); 10664 10665 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) 10666 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) 10667 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) 10668 10669 if (ConstEltNo) { 10670 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 10671 10672 LoadSDNode *LN0 = nullptr; 10673 const ShuffleVectorSDNode *SVN = nullptr; 10674 if (ISD::isNormalLoad(InVec.getNode())) { 10675 LN0 = cast<LoadSDNode>(InVec); 10676 } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && 10677 InVec.getOperand(0).getValueType() == ExtVT && 10678 ISD::isNormalLoad(InVec.getOperand(0).getNode())) { 10679 // Don't duplicate a load with other uses. 10680 if (!InVec.hasOneUse()) 10681 return SDValue(); 10682 10683 LN0 = cast<LoadSDNode>(InVec.getOperand(0)); 10684 } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { 10685 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) 10686 // => 10687 // (load $addr+1*size) 10688 10689 // Don't duplicate a load with other uses. 10690 if (!InVec.hasOneUse()) 10691 return SDValue(); 10692 10693 // If the bit convert changed the number of elements, it is unsafe 10694 // to examine the mask. 10695 if (BCNumEltsChanged) 10696 return SDValue(); 10697 10698 // Select the input vector, guarding against out of range extract vector. 10699 unsigned NumElems = VT.getVectorNumElements(); 10700 int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); 10701 InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); 10702 10703 if (InVec.getOpcode() == ISD::BITCAST) { 10704 // Don't duplicate a load with other uses. 10705 if (!InVec.hasOneUse()) 10706 return SDValue(); 10707 10708 InVec = InVec.getOperand(0); 10709 } 10710 if (ISD::isNormalLoad(InVec.getNode())) { 10711 LN0 = cast<LoadSDNode>(InVec); 10712 Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; 10713 EltNo = DAG.getConstant(Elt, EltNo.getValueType()); 10714 } 10715 } 10716 10717 // Make sure we found a non-volatile load and the extractelement is 10718 // the only use. 10719 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) 10720 return SDValue(); 10721 10722 // If Idx was -1 above, Elt is going to be -1, so just return undef. 10723 if (Elt == -1) 10724 return DAG.getUNDEF(LVT); 10725 10726 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); 10727 } 10728 10729 return SDValue(); 10730} 10731 10732// Simplify (build_vec (ext )) to (bitcast (build_vec )) 10733SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { 10734 // We perform this optimization post type-legalization because 10735 // the type-legalizer often scalarizes integer-promoted vectors. 10736 // Performing this optimization before may create bit-casts which 10737 // will be type-legalized to complex code sequences. 10738 // We perform this optimization only before the operation legalizer because we 10739 // may introduce illegal operations. 10740 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) 10741 return SDValue(); 10742 10743 unsigned NumInScalars = N->getNumOperands(); 10744 SDLoc dl(N); 10745 EVT VT = N->getValueType(0); 10746 10747 // Check to see if this is a BUILD_VECTOR of a bunch of values 10748 // which come from any_extend or zero_extend nodes. If so, we can create 10749 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR 10750 // optimizations. We do not handle sign-extend because we can't fill the sign 10751 // using shuffles. 10752 EVT SourceType = MVT::Other; 10753 bool AllAnyExt = true; 10754 10755 for (unsigned i = 0; i != NumInScalars; ++i) { 10756 SDValue In = N->getOperand(i); 10757 // Ignore undef inputs. 10758 if (In.getOpcode() == ISD::UNDEF) continue; 10759 10760 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; 10761 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; 10762 10763 // Abort if the element is not an extension. 10764 if (!ZeroExt && !AnyExt) { 10765 SourceType = MVT::Other; 10766 break; 10767 } 10768 10769 // The input is a ZeroExt or AnyExt. Check the original type. 10770 EVT InTy = In.getOperand(0).getValueType(); 10771 10772 // Check that all of the widened source types are the same. 10773 if (SourceType == MVT::Other) 10774 // First time. 10775 SourceType = InTy; 10776 else if (InTy != SourceType) { 10777 // Multiple income types. Abort. 10778 SourceType = MVT::Other; 10779 break; 10780 } 10781 10782 // Check if all of the extends are ANY_EXTENDs. 10783 AllAnyExt &= AnyExt; 10784 } 10785 10786 // In order to have valid types, all of the inputs must be extended from the 10787 // same source type and all of the inputs must be any or zero extend. 10788 // Scalar sizes must be a power of two. 10789 EVT OutScalarTy = VT.getScalarType(); 10790 bool ValidTypes = SourceType != MVT::Other && 10791 isPowerOf2_32(OutScalarTy.getSizeInBits()) && 10792 isPowerOf2_32(SourceType.getSizeInBits()); 10793 10794 // Create a new simpler BUILD_VECTOR sequence which other optimizations can 10795 // turn into a single shuffle instruction. 10796 if (!ValidTypes) 10797 return SDValue(); 10798 10799 bool isLE = TLI.isLittleEndian(); 10800 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); 10801 assert(ElemRatio > 1 && "Invalid element size ratio"); 10802 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): 10803 DAG.getConstant(0, SourceType); 10804 10805 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); 10806 SmallVector<SDValue, 8> Ops(NewBVElems, Filler); 10807 10808 // Populate the new build_vector 10809 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 10810 SDValue Cast = N->getOperand(i); 10811 assert((Cast.getOpcode() == ISD::ANY_EXTEND || 10812 Cast.getOpcode() == ISD::ZERO_EXTEND || 10813 Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); 10814 SDValue In; 10815 if (Cast.getOpcode() == ISD::UNDEF) 10816 In = DAG.getUNDEF(SourceType); 10817 else 10818 In = Cast->getOperand(0); 10819 unsigned Index = isLE ? (i * ElemRatio) : 10820 (i * ElemRatio + (ElemRatio - 1)); 10821 10822 assert(Index < Ops.size() && "Invalid index"); 10823 Ops[Index] = In; 10824 } 10825 10826 // The type of the new BUILD_VECTOR node. 10827 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); 10828 assert(VecVT.getSizeInBits() == VT.getSizeInBits() && 10829 "Invalid vector size"); 10830 // Check if the new vector type is legal. 10831 if (!isTypeLegal(VecVT)) return SDValue(); 10832 10833 // Make the new BUILD_VECTOR. 10834 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); 10835 10836 // The new BUILD_VECTOR node has the potential to be further optimized. 10837 AddToWorklist(BV.getNode()); 10838 // Bitcast to the desired type. 10839 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 10840} 10841 10842SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { 10843 EVT VT = N->getValueType(0); 10844 10845 unsigned NumInScalars = N->getNumOperands(); 10846 SDLoc dl(N); 10847 10848 EVT SrcVT = MVT::Other; 10849 unsigned Opcode = ISD::DELETED_NODE; 10850 unsigned NumDefs = 0; 10851 10852 for (unsigned i = 0; i != NumInScalars; ++i) { 10853 SDValue In = N->getOperand(i); 10854 unsigned Opc = In.getOpcode(); 10855 10856 if (Opc == ISD::UNDEF) 10857 continue; 10858 10859 // If all scalar values are floats and converted from integers. 10860 if (Opcode == ISD::DELETED_NODE && 10861 (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { 10862 Opcode = Opc; 10863 } 10864 10865 if (Opc != Opcode) 10866 return SDValue(); 10867 10868 EVT InVT = In.getOperand(0).getValueType(); 10869 10870 // If all scalar values are typed differently, bail out. It's chosen to 10871 // simplify BUILD_VECTOR of integer types. 10872 if (SrcVT == MVT::Other) 10873 SrcVT = InVT; 10874 if (SrcVT != InVT) 10875 return SDValue(); 10876 NumDefs++; 10877 } 10878 10879 // If the vector has just one element defined, it's not worth to fold it into 10880 // a vectorized one. 10881 if (NumDefs < 2) 10882 return SDValue(); 10883 10884 assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) 10885 && "Should only handle conversion from integer to float."); 10886 assert(SrcVT != MVT::Other && "Cannot determine source type!"); 10887 10888 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); 10889 10890 if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) 10891 return SDValue(); 10892 10893 SmallVector<SDValue, 8> Opnds; 10894 for (unsigned i = 0; i != NumInScalars; ++i) { 10895 SDValue In = N->getOperand(i); 10896 10897 if (In.getOpcode() == ISD::UNDEF) 10898 Opnds.push_back(DAG.getUNDEF(SrcVT)); 10899 else 10900 Opnds.push_back(In.getOperand(0)); 10901 } 10902 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds); 10903 AddToWorklist(BV.getNode()); 10904 10905 return DAG.getNode(Opcode, dl, VT, BV); 10906} 10907 10908SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { 10909 unsigned NumInScalars = N->getNumOperands(); 10910 SDLoc dl(N); 10911 EVT VT = N->getValueType(0); 10912 10913 // A vector built entirely of undefs is undef. 10914 if (ISD::allOperandsUndef(N)) 10915 return DAG.getUNDEF(VT); 10916 10917 SDValue V = reduceBuildVecExtToExtBuildVec(N); 10918 if (V.getNode()) 10919 return V; 10920 10921 V = reduceBuildVecConvertToConvertBuildVec(N); 10922 if (V.getNode()) 10923 return V; 10924 10925 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT 10926 // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from 10927 // at most two distinct vectors, turn this into a shuffle node. 10928 10929 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. 10930 if (!isTypeLegal(VT)) 10931 return SDValue(); 10932 10933 // May only combine to shuffle after legalize if shuffle is legal. 10934 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) 10935 return SDValue(); 10936 10937 SDValue VecIn1, VecIn2; 10938 bool UsesZeroVector = false; 10939 for (unsigned i = 0; i != NumInScalars; ++i) { 10940 SDValue Op = N->getOperand(i); 10941 // Ignore undef inputs. 10942 if (Op.getOpcode() == ISD::UNDEF) continue; 10943 10944 // See if we can combine this build_vector into a blend with a zero vector. 10945 if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant && 10946 cast<ConstantSDNode>(Op.getNode())->isNullValue()) || 10947 (Op.getOpcode() == ISD::ConstantFP && 10948 cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) { 10949 UsesZeroVector = true; 10950 continue; 10951 } 10952 10953 // If this input is something other than a EXTRACT_VECTOR_ELT with a 10954 // constant index, bail out. 10955 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 10956 !isa<ConstantSDNode>(Op.getOperand(1))) { 10957 VecIn1 = VecIn2 = SDValue(nullptr, 0); 10958 break; 10959 } 10960 10961 // We allow up to two distinct input vectors. 10962 SDValue ExtractedFromVec = Op.getOperand(0); 10963 if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) 10964 continue; 10965 10966 if (!VecIn1.getNode()) { 10967 VecIn1 = ExtractedFromVec; 10968 } else if (!VecIn2.getNode() && !UsesZeroVector) { 10969 VecIn2 = ExtractedFromVec; 10970 } else { 10971 // Too many inputs. 10972 VecIn1 = VecIn2 = SDValue(nullptr, 0); 10973 break; 10974 } 10975 } 10976 10977 // If everything is good, we can make a shuffle operation. 10978 if (VecIn1.getNode()) { 10979 unsigned InNumElements = VecIn1.getValueType().getVectorNumElements(); 10980 SmallVector<int, 8> Mask; 10981 for (unsigned i = 0; i != NumInScalars; ++i) { 10982 unsigned Opcode = N->getOperand(i).getOpcode(); 10983 if (Opcode == ISD::UNDEF) { 10984 Mask.push_back(-1); 10985 continue; 10986 } 10987 10988 // Operands can also be zero. 10989 if (Opcode != ISD::EXTRACT_VECTOR_ELT) { 10990 assert(UsesZeroVector && 10991 (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) && 10992 "Unexpected node found!"); 10993 Mask.push_back(NumInScalars+i); 10994 continue; 10995 } 10996 10997 // If extracting from the first vector, just use the index directly. 10998 SDValue Extract = N->getOperand(i); 10999 SDValue ExtVal = Extract.getOperand(1); 11000 unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 11001 if (Extract.getOperand(0) == VecIn1) { 11002 Mask.push_back(ExtIndex); 11003 continue; 11004 } 11005 11006 // Otherwise, use InIdx + InputVecSize 11007 Mask.push_back(InNumElements + ExtIndex); 11008 } 11009 11010 // Avoid introducing illegal shuffles with zero. 11011 if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT)) 11012 return SDValue(); 11013 11014 // We can't generate a shuffle node with mismatched input and output types. 11015 // Attempt to transform a single input vector to the correct type. 11016 if ((VT != VecIn1.getValueType())) { 11017 // If the input vector type has a different base type to the output 11018 // vector type, bail out. 11019 EVT VTElemType = VT.getVectorElementType(); 11020 if ((VecIn1.getValueType().getVectorElementType() != VTElemType) || 11021 (VecIn2.getNode() && 11022 (VecIn2.getValueType().getVectorElementType() != VTElemType))) 11023 return SDValue(); 11024 11025 // If the input vector is too small, widen it. 11026 // We only support widening of vectors which are half the size of the 11027 // output registers. For example XMM->YMM widening on X86 with AVX. 11028 EVT VecInT = VecIn1.getValueType(); 11029 if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) { 11030 // If we only have one small input, widen it by adding undef values. 11031 if (!VecIn2.getNode()) 11032 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, 11033 DAG.getUNDEF(VecIn1.getValueType())); 11034 else if (VecIn1.getValueType() == VecIn2.getValueType()) { 11035 // If we have two small inputs of the same type, try to concat them. 11036 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2); 11037 VecIn2 = SDValue(nullptr, 0); 11038 } else 11039 return SDValue(); 11040 } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { 11041 // If the input vector is too large, try to split it. 11042 // We don't support having two input vectors that are too large.
|