• Home
  • History
  • Annotate
  • Raw
  • Download
  • only in /freebsd-13-stable/contrib/llvm-project/llvm/lib/Target/X86/

Lines Matching refs:ISD

188   int ISD = TLI->InstructionOpcodeToISD(Opcode);
189 assert(ISD && "Invalid opcode");
192 { ISD::FDIV, MVT::f32, 18 }, // divss
193 { ISD::FDIV, MVT::v4f32, 35 }, // divps
194 { ISD::FDIV, MVT::f64, 33 }, // divsd
195 { ISD::FDIV, MVT::v2f64, 65 }, // divpd
199 if (const auto *Entry = CostTableLookup(GLMCostTable, ISD,
204 { ISD::MUL, MVT::v4i32, 11 }, // pmulld
205 { ISD::MUL, MVT::v8i16, 2 }, // pmullw
206 { ISD::MUL, MVT::v16i8, 14 }, // extend/pmullw/trunc sequence.
207 { ISD::FMUL, MVT::f64, 2 }, // mulsd
208 { ISD::FMUL, MVT::v2f64, 4 }, // mulpd
209 { ISD::FMUL, MVT::v4f32, 2 }, // mulps
210 { ISD::FDIV, MVT::f32, 17 }, // divss
211 { ISD::FDIV, MVT::v4f32, 39 }, // divps
212 { ISD::FDIV, MVT::f64, 32 }, // divsd
213 { ISD::FDIV, MVT::v2f64, 69 }, // divpd
214 { ISD::FADD, MVT::v2f64, 2 }, // addpd
215 { ISD::FSUB, MVT::v2f64, 2 }, // subpd
221 { ISD::MUL, MVT::v2i64, 17 },
223 { ISD::ADD, MVT::v2i64, 4 },
224 { ISD::SUB, MVT::v2i64, 4 },
228 if (Args.size() == 2 && ISD == ISD::MUL && LT.second == MVT::v4i32) {
248 if (const auto *Entry = CostTableLookup(SLMCostTable, ISD,
254 if ((ISD == ISD::SDIV || ISD == ISD::SREM || ISD == ISD::UDIV ||
255 ISD == ISD::UREM) &&
259 if (ISD == ISD::SDIV || ISD == ISD::SREM) {
278 if (ISD == ISD::SREM) {
290 if (ISD == ISD::UDIV)
304 { ISD::SHL, MVT::v64i8, 2 }, // psllw + pand.
305 { ISD::SRL, MVT::v64i8, 2 }, // psrlw + pand.
306 { ISD::SRA, MVT::v64i8, 4 }, // psrlw, pand, pxor, psubb.
311 if (const auto *Entry = CostTableLookup(AVX512BWUniformConstCostTable, ISD,
317 { ISD::SRA, MVT::v2i64, 1 },
318 { ISD::SRA, MVT::v4i64, 1 },
319 { ISD::SRA, MVT::v8i64, 1 },
321 { ISD::SHL, MVT::v64i8, 4 }, // psllw + pand.
322 { ISD::SRL, MVT::v64i8, 4 }, // psrlw + pand.
323 { ISD::SRA, MVT::v64i8, 8 }, // psrlw, pand, pxor, psubb.
328 if (const auto *Entry = CostTableLookup(AVX512UniformConstCostTable, ISD,
334 { ISD::SHL, MVT::v32i8, 2 }, // psllw + pand.
335 { ISD::SRL, MVT::v32i8, 2 }, // psrlw + pand.
336 { ISD::SRA, MVT::v32i8, 4 }, // psrlw, pand, pxor, psubb.
338 { ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle.
343 if (const auto *Entry = CostTableLookup(AVX2UniformConstCostTable, ISD,
349 { ISD::SHL, MVT::v16i8, 2 }, // psllw + pand.
350 { ISD::SRL, MVT::v16i8, 2 }, // psrlw + pand.
351 { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
353 { ISD::SHL, MVT::v32i8, 4+2 }, // 2*(psllw + pand) + split.
354 { ISD::SRL, MVT::v32i8, 4+2 }, // 2*(psrlw + pand) + split.
355 { ISD::SRA, MVT::v32i8, 8+2 }, // 2*(psrlw, pand, pxor, psubb) + split.
362 CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second))
367 { ISD::SDIV, MVT::v64i8, 14 }, // 2*ext+2*pmulhw sequence
368 { ISD::SREM, MVT::v64i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
369 { ISD::UDIV, MVT::v64i8, 14 }, // 2*ext+2*pmulhw sequence
370 { ISD::UREM, MVT::v64i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
371 { ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence
372 { ISD::SREM, MVT::v32i16, 8 }, // vpmulhw+mul+sub sequence
373 { ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence
374 { ISD::UREM, MVT::v32i16, 8 }, // vpmulhuw+mul+sub sequence
381 CostTableLookup(AVX512BWConstCostTable, ISD, LT.second))
386 { ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence
387 { ISD::SREM, MVT::v16i32, 17 }, // vpmuldq+mul+sub sequence
388 { ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence
389 { ISD::UREM, MVT::v16i32, 17 }, // vpmuludq+mul+sub sequence
390 { ISD::SDIV, MVT::v64i8, 28 }, // 4*ext+4*pmulhw sequence
391 { ISD::SREM, MVT::v64i8, 32 }, // 4*ext+4*pmulhw+mul+sub sequence
392 { ISD::UDIV, MVT::v64i8, 28 }, // 4*ext+4*pmulhw sequence
393 { ISD::UREM, MVT::v64i8, 32 }, // 4*ext+4*pmulhw+mul+sub sequence
394 { ISD::SDIV, MVT::v32i16, 12 }, // 2*vpmulhw sequence
395 { ISD::SREM, MVT::v32i16, 16 }, // 2*vpmulhw+mul+sub sequence
396 { ISD::UDIV, MVT::v32i16, 12 }, // 2*vpmulhuw sequence
397 { ISD::UREM, MVT::v32i16, 16 }, // 2*vpmulhuw+mul+sub sequence
404 CostTableLookup(AVX512ConstCostTable, ISD, LT.second))
409 { ISD::SDIV, MVT::v32i8, 14 }, // 2*ext+2*pmulhw sequence
410 { ISD::SREM, MVT::v32i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
411 { ISD::UDIV, MVT::v32i8, 14 }, // 2*ext+2*pmulhw sequence
412 { ISD::UREM, MVT::v32i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
413 { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
414 { ISD::SREM, MVT::v16i16, 8 }, // vpmulhw+mul+sub sequence
415 { ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence
416 { ISD::UREM, MVT::v16i16, 8 }, // vpmulhuw+mul+sub sequence
417 { ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence
418 { ISD::SREM, MVT::v8i32, 19 }, // vpmuldq+mul+sub sequence
419 { ISD::UDIV, MVT::v8i32, 15 }, // vpmuludq sequence
420 { ISD::UREM, MVT::v8i32, 19 }, // vpmuludq+mul+sub sequence
426 if (const auto *Entry = CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
431 { ISD::SDIV, MVT::v32i8, 28+2 }, // 4*ext+4*pmulhw sequence + split.
432 { ISD::SREM, MVT::v32i8, 32+2 }, // 4*ext+4*pmulhw+mul+sub sequence + split.
433 { ISD::SDIV, MVT::v16i8, 14 }, // 2*ext+2*pmulhw sequence
434 { ISD::SREM, MVT::v16i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
435 { ISD::UDIV, MVT::v32i8, 28+2 }, // 4*ext+4*pmulhw sequence + split.
436 { ISD::UREM, MVT::v32i8, 32+2 }, // 4*ext+4*pmulhw+mul+sub sequence + split.
437 { ISD::UDIV, MVT::v16i8, 14 }, // 2*ext+2*pmulhw sequence
438 { ISD::UREM, MVT::v16i8, 16 }, // 2*ext+2*pmulhw+mul+sub sequence
439 { ISD::SDIV, MVT::v16i16, 12+2 }, // 2*pmulhw sequence + split.
440 { ISD::SREM, MVT::v16i16, 16+2 }, // 2*pmulhw+mul+sub sequence + split.
441 { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
442 { ISD::SREM, MVT::v8i16, 8 }, // pmulhw+mul+sub sequence
443 { ISD::UDIV, MVT::v16i16, 12+2 }, // 2*pmulhuw sequence + split.
444 { ISD::UREM, MVT::v16i16, 16+2 }, // 2*pmulhuw+mul+sub sequence + split.
445 { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence
446 { ISD::UREM, MVT::v8i16, 8 }, // pmulhuw+mul+sub sequence
447 { ISD::SDIV, MVT::v8i32, 38+2 }, // 2*pmuludq sequence + split.
448 { ISD::SREM, MVT::v8i32, 48+2 }, // 2*pmuludq+mul+sub sequence + split.
449 { ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence
450 { ISD::SREM, MVT::v4i32, 24 }, // pmuludq+mul+sub sequence
451 { ISD::UDIV, MVT::v8i32, 30+2 }, // 2*pmuludq sequence + split.
452 { ISD::UREM, MVT::v8i32, 40+2 }, // 2*pmuludq+mul+sub sequence + split.
453 { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence
454 { ISD::UREM, MVT::v4i32, 20 }, // pmuludq+mul+sub sequence
461 if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX())
463 if (ISD == ISD::SREM && LT.second == MVT::v8i32 && ST->hasAVX())
465 if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41())
467 if (ISD == ISD::SREM && LT.second == MVT::v4i32 && ST->hasSSE41())
470 if (const auto *Entry = CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
475 { ISD::SHL, MVT::v8i16, 1 }, // vpsllvw
476 { ISD::SRL, MVT::v8i16, 1 }, // vpsrlvw
477 { ISD::SRA, MVT::v8i16, 1 }, // vpsravw
479 { ISD::SHL, MVT::v16i16, 1 }, // vpsllvw
480 { ISD::SRL, MVT::v16i16, 1 }, // vpsrlvw
481 { ISD::SRA, MVT::v16i16, 1 }, // vpsravw
483 { ISD::SHL, MVT::v32i16, 1 }, // vpsllvw
484 { ISD::SRL, MVT::v32i16, 1 }, // vpsrlvw
485 { ISD::SRA, MVT::v32i16, 1 }, // vpsravw
489 if (const auto *Entry = CostTableLookup(AVX512BWShiftCostTable, ISD, LT.second))
494 { ISD::SHL, MVT::v16i16, 1 }, // psllw.
495 { ISD::SRL, MVT::v16i16, 1 }, // psrlw.
496 { ISD::SRA, MVT::v16i16, 1 }, // psraw.
497 { ISD::SHL, MVT::v32i16, 2 }, // 2*psllw.
498 { ISD::SRL, MVT::v32i16, 2 }, // 2*psrlw.
499 { ISD::SRA, MVT::v32i16, 2 }, // 2*psraw.
506 CostTableLookup(AVX2UniformCostTable, ISD, LT.second))
512 { ISD::SHL, MVT::v8i16, 1 }, // psllw.
513 { ISD::SHL, MVT::v4i32, 1 }, // pslld
514 { ISD::SHL, MVT::v2i64, 1 }, // psllq.
516 { ISD::SRL, MVT::v8i16, 1 }, // psrlw.
517 { ISD::SRL, MVT::v4i32, 1 }, // psrld.
518 { ISD::SRL, MVT::v2i64, 1 }, // psrlq.
520 { ISD::SRA, MVT::v8i16, 1 }, // psraw.
521 { ISD::SRA, MVT::v4i32, 1 }, // psrad.
528 CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
533 { ISD::MUL, MVT::v2i64, 1 },
534 { ISD::MUL, MVT::v4i64, 1 },
535 { ISD::MUL, MVT::v8i64, 1 }
540 if (const auto *Entry = CostTableLookup(AVX512DQCostTable, ISD, LT.second))
544 { ISD::SHL, MVT::v64i8, 11 }, // vpblendvb sequence.
545 { ISD::SRL, MVT::v64i8, 11 }, // vpblendvb sequence.
546 { ISD::SRA, MVT::v64i8, 24 }, // vpblendvb sequence.
548 { ISD::MUL, MVT::v64i8, 11 }, // extend/pmullw/trunc sequence.
549 { ISD::MUL, MVT::v32i8, 4 }, // extend/pmullw/trunc sequence.
550 { ISD::MUL, MVT::v16i8, 4 }, // extend/pmullw/trunc sequence.
555 if (const auto *Entry = CostTableLookup(AVX512BWCostTable, ISD, LT.second))
559 { ISD::SHL, MVT::v16i32, 1 },
560 { ISD::SRL, MVT::v16i32, 1 },
561 { ISD::SRA, MVT::v16i32, 1 },
563 { ISD::SHL, MVT::v8i64, 1 },
564 { ISD::SRL, MVT::v8i64, 1 },
566 { ISD::SRA, MVT::v2i64, 1 },
567 { ISD::SRA, MVT::v4i64, 1 },
568 { ISD::SRA, MVT::v8i64, 1 },
570 { ISD::MUL, MVT::v64i8, 26 }, // extend/pmullw/trunc sequence.
571 { ISD::MUL, MVT::v32i8, 13 }, // extend/pmullw/trunc sequence.
572 { ISD::MUL, MVT::v16i8, 5 }, // extend/pmullw/trunc sequence.
573 { ISD::MUL, MVT::v16i32, 1 }, // pmulld (Skylake from agner.org)
574 { ISD::MUL, MVT::v8i32, 1 }, // pmulld (Skylake from agner.org)
575 { ISD::MUL, MVT::v4i32, 1 }, // pmulld (Skylake from agner.org)
576 { ISD::MUL, MVT::v8i64, 8 }, // 3*pmuludq/3*shift/2*add
578 { ISD::FADD, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/
579 { ISD::FSUB, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/
580 { ISD::FMUL, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/
582 { ISD::FADD, MVT::v16f32, 1 }, // Skylake from http://www.agner.org/
583 { ISD::FSUB, MVT::v16f32, 1 }, // Skylake from http://www.agner.org/
584 { ISD::FMUL, MVT::v16f32, 1 }, // Skylake from http://www.agner.org/
588 if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second))
594 { ISD::SHL, MVT::v4i32, 1 },
595 { ISD::SRL, MVT::v4i32, 1 },
596 { ISD::SRA, MVT::v4i32, 1 },
597 { ISD::SHL, MVT::v8i32, 1 },
598 { ISD::SRL, MVT::v8i32, 1 },
599 { ISD::SRA, MVT::v8i32, 1 },
600 { ISD::SHL, MVT::v2i64, 1 },
601 { ISD::SRL, MVT::v2i64, 1 },
602 { ISD::SHL, MVT::v4i64, 1 },
603 { ISD::SRL, MVT::v4i64, 1 },
607 if (ISD == ISD::SHL && LT.second == MVT::v32i16 &&
620 if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
630 if (const auto *Entry = CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
636 { ISD::SHL, MVT::v16i8, 1 },
637 { ISD::SRL, MVT::v16i8, 2 },
638 { ISD::SRA, MVT::v16i8, 2 },
639 { ISD::SHL, MVT::v8i16, 1 },
640 { ISD::SRL, MVT::v8i16, 2 },
641 { ISD::SRA, MVT::v8i16, 2 },
642 { ISD::SHL, MVT::v4i32, 1 },
643 { ISD::SRL, MVT::v4i32, 2 },
644 { ISD::SRA, MVT::v4i32, 2 },
645 { ISD::SHL, MVT::v2i64, 1 },
646 { ISD::SRL, MVT::v2i64, 2 },
647 { ISD::SRA, MVT::v2i64, 2 },
649 { ISD::SHL, MVT::v32i8, 2+2 },
650 { ISD::SRL, MVT::v32i8, 4+2 },
651 { ISD::SRA, MVT::v32i8, 4+2 },
652 { ISD::SHL, MVT::v16i16, 2+2 },
653 { ISD::SRL, MVT::v16i16, 4+2 },
654 { ISD::SRA, MVT::v16i16, 4+2 },
655 { ISD::SHL, MVT::v8i32, 2+2 },
656 { ISD::SRL, MVT::v8i32, 4+2 },
657 { ISD::SRA, MVT::v8i32, 4+2 },
658 { ISD::SHL, MVT::v4i64, 2+2 },
659 { ISD::SRL, MVT::v4i64, 4+2 },
660 { ISD::SRA, MVT::v4i64, 4+2 },
667 int ShiftISD = ISD;
668 if ((ShiftISD == ISD::SRL || ShiftISD == ISD::SRA) &&
671 ShiftISD = ISD::SHL;
679 { ISD::SHL, MVT::v16i16, 2+2 }, // 2*psllw + split.
680 { ISD::SHL, MVT::v8i32, 2+2 }, // 2*pslld + split.
681 { ISD::SHL, MVT::v4i64, 2+2 }, // 2*psllq + split.
683 { ISD::SRL, MVT::v16i16, 2+2 }, // 2*psrlw + split.
684 { ISD::SRL, MVT::v8i32, 2+2 }, // 2*psrld + split.
685 { ISD::SRL, MVT::v4i64, 2+2 }, // 2*psrlq + split.
687 { ISD::SRA, MVT::v16i16, 2+2 }, // 2*psraw + split.
688 { ISD::SRA, MVT::v8i32, 2+2 }, // 2*psrad + split.
689 { ISD::SRA, MVT::v2i64, 4 }, // 2*psrad + shuffle.
690 { ISD::SRA, MVT::v4i64, 8+2 }, // 2*(2*psrad + shuffle) + split.
697 // Handle AVX2 uniform v4i64 ISD::SRA, it's not worth a table.
698 if (ISD == ISD::SRA && LT.second == MVT::v4i64 && ST->hasAVX2())
702 CostTableLookup(SSE2UniformShiftCostTable, ISD, LT.second))
706 if (ISD == ISD::SHL &&
713 ISD = ISD::MUL;
717 { ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence.
718 { ISD::SHL, MVT::v64i8, 22 }, // 2*vpblendvb sequence.
719 { ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
720 { ISD::SHL, MVT::v32i16, 20 }, // 2*extend/vpsrlvd/pack sequence.
722 { ISD::SRL, MVT::v32i8, 11 }, // vpblendvb sequence.
723 { ISD::SRL, MVT::v64i8, 22 }, // 2*vpblendvb sequence.
724 { ISD::SRL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
725 { ISD::SRL, MVT::v32i16, 20 }, // 2*extend/vpsrlvd/pack sequence.
727 { ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence.
728 { ISD::SRA, MVT::v64i8, 48 }, // 2*vpblendvb sequence.
729 { ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence.
730 { ISD::SRA, MVT::v32i16, 20 }, // 2*extend/vpsravd/pack sequence.
731 { ISD::SRA, MVT::v2i64, 4 }, // srl/xor/sub sequence.
732 { ISD::SRA, MVT::v4i64, 4 }, // srl/xor/sub sequence.
734 { ISD::SUB, MVT::v32i8, 1 }, // psubb
735 { ISD::ADD, MVT::v32i8, 1 }, // paddb
736 { ISD::SUB, MVT::v16i16, 1 }, // psubw
737 { ISD::ADD, MVT::v16i16, 1 }, // paddw
738 { ISD::SUB, MVT::v8i32, 1 }, // psubd
739 { ISD::ADD, MVT::v8i32, 1 }, // paddd
740 { ISD::SUB, MVT::v4i64, 1 }, // psubq
741 { ISD::ADD, MVT::v4i64, 1 }, // paddq
743 { ISD::MUL, MVT::v32i8, 17 }, // extend/pmullw/trunc sequence.
744 { ISD::MUL, MVT::v16i8, 7 }, // extend/pmullw/trunc sequence.
745 { ISD::MUL, MVT::v16i16, 1 }, // pmullw
746 { ISD::MUL, MVT::v8i32, 2 }, // pmulld (Haswell from agner.org)
747 { ISD::MUL, MVT::v4i64, 8 }, // 3*pmuludq/3*shift/2*add
749 { ISD::FADD, MVT::v4f64, 1 }, // Haswell from http://www.agner.org/
750 { ISD::FADD, MVT::v8f32, 1 }, // Haswell from http://www.agner.org/
751 { ISD::FSUB, MVT::v4f64, 1 }, // Haswell from http://www.agner.org/
752 { ISD::FSUB, MVT::v8f32, 1 }, // Haswell from http://www.agner.org/
753 { ISD::FMUL, MVT::v4f64, 1 }, // Haswell from http://www.agner.org/
754 { ISD::FMUL, MVT::v8f32, 1 }, // Haswell from http://www.agner.org/
756 { ISD::FDIV, MVT::f32, 7 }, // Haswell from http://www.agner.org/
757 { ISD::FDIV, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/
758 { ISD::FDIV, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/
759 { ISD::FDIV, MVT::f64, 14 }, // Haswell from http://www.agner.org/
760 { ISD::FDIV, MVT::v2f64, 14 }, // Haswell from http://www.agner.org/
761 { ISD::FDIV, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/
766 if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second))
773 { ISD::MUL, MVT::v16i16, 4 },
774 { ISD::MUL, MVT::v8i32, 4 },
775 { ISD::SUB, MVT::v32i8, 4 },
776 { ISD::ADD, MVT::v32i8, 4 },
777 { ISD::SUB, MVT::v16i16, 4 },
778 { ISD::ADD, MVT::v16i16, 4 },
779 { ISD::SUB, MVT::v8i32, 4 },
780 { ISD::ADD, MVT::v8i32, 4 },
781 { ISD::SUB, MVT::v4i64, 4 },
782 { ISD::ADD, MVT::v4i64, 4 },
789 { ISD::MUL, MVT::v4i64, 18 },
791 { ISD::MUL, MVT::v32i8, 26 }, // extend/pmullw/trunc sequence.
793 { ISD::FDIV, MVT::f32, 14 }, // SNB from http://www.agner.org/
794 { ISD::FDIV, MVT::v4f32, 14 }, // SNB from http://www.agner.org/
795 { ISD::FDIV, MVT::v8f32, 28 }, // SNB from http://www.agner.org/
796 { ISD::FDIV, MVT::f64, 22 }, // SNB from http://www.agner.org/
797 { ISD::FDIV, MVT::v2f64, 22 }, // SNB from http://www.agner.org/
798 { ISD::FDIV, MVT::v4f64, 44 }, // SNB from http://www.agner.org/
802 if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second))
806 { ISD::FADD, MVT::f64, 1 }, // Nehalem from http://www.agner.org/
807 { ISD::FADD, MVT::f32, 1 }, // Nehalem from http://www.agner.org/
808 { ISD::FADD, MVT::v2f64, 1 }, // Nehalem from http://www.agner.org/
809 { ISD::FADD, MVT::v4f32, 1 }, // Nehalem from http://www.agner.org/
811 { ISD::FSUB, MVT::f64, 1 }, // Nehalem from http://www.agner.org/
812 { ISD::FSUB, MVT::f32 , 1 }, // Nehalem from http://www.agner.org/
813 { ISD::FSUB, MVT::v2f64, 1 }, // Nehalem from http://www.agner.org/
814 { ISD::FSUB, MVT::v4f32, 1 }, // Nehalem from http://www.agner.org/
816 { ISD::FMUL, MVT::f64, 1 }, // Nehalem from http://www.agner.org/
817 { ISD::FMUL, MVT::f32, 1 }, // Nehalem from http://www.agner.org/
818 { ISD::FMUL, MVT::v2f64, 1 }, // Nehalem from http://www.agner.org/
819 { ISD::FMUL, MVT::v4f32, 1 }, // Nehalem from http://www.agner.org/
821 { ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/
822 { ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/
823 { ISD::FDIV, MVT::f64, 22 }, // Nehalem from http://www.agner.org/
824 { ISD::FDIV, MVT::v2f64, 22 }, // Nehalem from http://www.agner.org/
828 if (const auto *Entry = CostTableLookup(SSE42CostTable, ISD, LT.second))
832 { ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence.
833 { ISD::SHL, MVT::v32i8, 2*11+2 }, // pblendvb sequence + split.
834 { ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence.
835 { ISD::SHL, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split.
836 { ISD::SHL, MVT::v4i32, 4 }, // pslld/paddd/cvttps2dq/pmulld
837 { ISD::SHL, MVT::v8i32, 2*4+2 }, // pslld/paddd/cvttps2dq/pmulld + split
839 { ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence.
840 { ISD::SRL, MVT::v32i8, 2*12+2 }, // pblendvb sequence + split.
841 { ISD::SRL, MVT::v8i16, 14 }, // pblendvb sequence.
842 { ISD::SRL, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split.
843 { ISD::SRL, MVT::v4i32, 11 }, // Shift each lane + blend.
844 { ISD::SRL, MVT::v8i32, 2*11+2 }, // Shift each lane + blend + split.
846 { ISD::SRA, MVT::v16i8, 24 }, // pblendvb sequence.
847 { ISD::SRA, MVT::v32i8, 2*24+2 }, // pblendvb sequence + split.
848 { ISD::SRA, MVT::v8i16, 14 }, // pblendvb sequence.
849 { ISD::SRA, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split.
850 { ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend.
851 { ISD::SRA, MVT::v8i32, 2*12+2 }, // Shift each lane + blend + split.
853 { ISD::MUL, MVT::v4i32, 2 } // pmulld (Nehalem from agner.org)
857 if (const auto *Entry = CostTableLookup(SSE41CostTable, ISD, LT.second))
863 { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence.
864 { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence.
865 { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
866 { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence.
867 { ISD::SHL, MVT::v4i64, 2*4+2 }, // splat+shuffle sequence + split.
869 { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence.
870 { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence.
871 { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend.
872 { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence.
873 { ISD::SRL, MVT::v4i64, 2*4+2 }, // splat+shuffle sequence + split.
875 { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence.
876 { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence.
877 { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend.
878 { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence.
879 { ISD::SRA, MVT::v4i64, 2*12+2 }, // srl/xor/sub sequence+split.
881 { ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence.
882 { ISD::MUL, MVT::v8i16, 1 }, // pmullw
883 { ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle
884 { ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add
886 { ISD::FDIV, MVT::f32, 23 }, // Pentium IV from http://www.agner.org/
887 { ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/
888 { ISD::FDIV, MVT::f64, 38 }, // Pentium IV from http://www.agner.org/
889 { ISD::FDIV, MVT::v2f64, 69 }, // Pentium IV from http://www.agner.org/
891 { ISD::FADD, MVT::f32, 2 }, // Pentium IV from http://www.agner.org/
892 { ISD::FADD, MVT::f64, 2 }, // Pentium IV from http://www.agner.org/
894 { ISD::FSUB, MVT::f32, 2 }, // Pentium IV from http://www.agner.org/
895 { ISD::FSUB, MVT::f64, 2 }, // Pentium IV from http://www.agner.org/
899 if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second))
903 { ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/
904 { ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/
906 { ISD::FADD, MVT::f32, 1 }, // Pentium III from http://www.agner.org/
907 { ISD::FADD, MVT::v4f32, 2 }, // Pentium III from http://www.agner.org/
909 { ISD::FSUB, MVT::f32, 1 }, // Pentium III from http://www.agner.org/
910 { ISD::FSUB, MVT::v4f32, 2 }, // Pentium III from http://www.agner.org/
912 { ISD::ADD, MVT::i8, 1 }, // Pentium III from http://www.agner.org/
913 { ISD::ADD, MVT::i16, 1 }, // Pentium III from http://www.agner.org/
914 { ISD::ADD, MVT::i32, 1 }, // Pentium III from http://www.agner.org/
916 { ISD::SUB, MVT::i8, 1 }, // Pentium III from http://www.agner.org/
917 { ISD::SUB, MVT::i16, 1 }, // Pentium III from http://www.agner.org/
918 { ISD::SUB, MVT::i32, 1 }, // Pentium III from http://www.agner.org/
922 if (const auto *Entry = CostTableLookup(SSE1CostTable, ISD, LT.second))
931 if (LT.second.isVector() && (ISD == ISD::SDIV || ISD == ISD::SREM ||
932 ISD == ISD::UDIV || ISD == ISD::UREM)) {
1372 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1373 assert(ISD && "Invalid opcode");
1386 { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8, 1 },
1387 { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 1 },
1390 { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 },
1391 { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 },
1392 { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 },
1393 { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 },
1394 { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 },
1395 { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 },
1396 { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 },
1397 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
1398 { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 },
1399 { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1, 1 },
1400 { ISD::SIGN_EXTEND, MVT::v64i8, MVT::v64i1, 1 },
1403 { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 },
1404 { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 },
1405 { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 },
1406 { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 },
1407 { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 },
1408 { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 },
1409 { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 },
1410 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
1411 { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },
1412 { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 },
1413 { ISD::ZERO_EXTEND, MVT::v64i8, MVT::v64i1, 2 },
1415 { ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 2 },
1416 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, // widen to zmm
1417 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, // widen to zmm
1418 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // widen to zmm
1419 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // widen to zmm
1420 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // widen to zmm
1421 { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // widen to zmm
1422 { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // widen to zmm
1423 { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // widen to zmm
1424 { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // widen to zmm
1425 { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // widen to zmm
1426 { ISD::TRUNCATE, MVT::v32i1, MVT::v32i16, 2 },
1427 { ISD::TRUNCATE, MVT::v64i1, MVT::v64i8, 2 },
1431 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 },
1432 { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 },
1434 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 },
1435 { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 },
1437 { ISD::FP_TO_SINT, MVT::v8i64, MVT::v8f32, 1 },
1438 { ISD::FP_TO_SINT, MVT::v8i64, MVT::v8f64, 1 },
1440 { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f32, 1 },
1441 { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f64, 1 },
1448 { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
1449 { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
1450 { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 },
1452 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // sext+vpslld+vptestmd
1453 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 3 }, // sext+vpslld+vptestmd
1454 { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 3 }, // sext+vpslld+vptestmd
1455 { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 3 }, // sext+vpslld+vptestmd
1456 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 3 }, // sext+vpsllq+vptestmq
1457 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 3 }, // sext+vpsllq+vptestmq
1458 { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 3 }, // sext+vpsllq+vptestmq
1459 { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 3 }, // sext+vpslld+vptestmd
1460 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 2 }, // zmm vpslld+vptestmd
1461 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, // zmm vpslld+vptestmd
1462 { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, // zmm vpslld+vptestmd
1463 { ISD::TRUNCATE, MVT::v16i1, MVT::v16i32, 2 }, // vpslld+vptestmd
1464 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, // zmm vpsllq+vptestmq
1465 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 }, // zmm vpsllq+vptestmq
1466 { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 2 }, // vpsllq+vptestmq
1467 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 2 },
1468 { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2 },
1469 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 2 },
1470 { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 2 },
1471 { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 },
1472 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, // zmm vpmovqd
1473 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 5 },// 2*vpmovqd+concat+vpmovdb
1475 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, // extend to v16i32
1476 { ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 8 },
1480 { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 3 },
1481 { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 4 },
1482 { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 3 },
1483 { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 4 },
1484 { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 3 },
1485 { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 4 },
1486 { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 3 },
1487 { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 4 },
1491 { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 3 },
1492 { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 4 },
1493 { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 3 },
1494 { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 4 },
1495 { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 3 },
1496 { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 4 },
1497 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 3 },
1498 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 4 },
1500 { ISD::SIGN_EXTEND, MVT::v2i32, MVT::v2i1, 1 }, // zmm vpternlogd
1501 { ISD::ZERO_EXTEND, MVT::v2i32, MVT::v2i1, 2 }, // zmm vpternlogd+psrld
1502 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i1, 1 }, // zmm vpternlogd
1503 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i1, 2 }, // zmm vpternlogd+psrld
1504 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 1 }, // zmm vpternlogd
1505 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 2 }, // zmm vpternlogd+psrld
1506 { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i1, 1 }, // zmm vpternlogq
1507 { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i1, 2 }, // zmm vpternlogq+psrlq
1508 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 }, // zmm vpternlogq
1509 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 }, // zmm vpternlogq+psrlq
1511 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 1 }, // vpternlogd
1512 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 }, // vpternlogd+psrld
1513 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i1, 1 }, // vpternlogq
1514 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i1, 2 }, // vpternlogq+psrlq
1516 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
1517 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
1518 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
1519 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
1520 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 1 },
1521 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 1 },
1522 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
1523 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
1524 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
1525 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
1527 { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8, 3 }, // FIXME: May not be right
1528 { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 3 }, // FIXME: May not be right
1530 { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
1531 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
1532 { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },
1533 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
1534 { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
1535 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
1536 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
1537 { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
1539 { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
1540 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
1541 { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },
1542 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
1543 { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
1544 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
1545 { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
1546 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
1547 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 },
1548 { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 },
1550 { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f64, 3 },
1551 { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f64, 3 },
1552 { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 3 },
1553 { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 3 },
1555 { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 },
1556 { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 3 },
1557 { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 3 },
1558 { ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 },
1559 { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 3 },
1560 { ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 3 },
1565 { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 },
1566 { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 },
1567 { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 },
1568 { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 },
1569 { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 },
1570 { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 },
1571 { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 },
1572 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
1573 { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 },
1576 { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 },
1577 { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 },
1578 { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 },
1579 { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 },
1580 { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 },
1581 { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 },
1582 { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 },
1583 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
1584 { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },
1586 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 },
1587 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, // vpsllw+vptestmb
1588 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // vpsllw+vptestmw
1589 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // vpsllw+vptestmb
1590 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // vpsllw+vptestmw
1591 { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // vpsllw+vptestmb
1592 { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // vpsllw+vptestmw
1593 { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // vpsllw+vptestmb
1594 { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // vpsllw+vptestmw
1595 { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // vpsllw+vptestmb
1599 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 },
1600 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
1601 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 },
1602 { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 },
1604 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 },
1605 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
1606 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 },
1607 { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 },
1609 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 1 },
1610 { ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f32, 1 },
1611 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
1612 { ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f64, 1 },
1614 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 1 },
1615 { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f32, 1 },
1616 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
1617 { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f64, 1 },
1621 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // sext+vpslld+vptestmd
1622 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 3 }, // sext+vpslld+vptestmd
1623 { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 3 }, // sext+vpslld+vptestmd
1624 { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 8 }, // split+2*v8i8
1625 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 3 }, // sext+vpsllq+vptestmq
1626 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 3 }, // sext+vpsllq+vptestmq
1627 { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 3 }, // sext+vpsllq+vptestmq
1628 { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 8 }, // split+2*v8i16
1629 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 2 }, // vpslld+vptestmd
1630 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, // vpslld+vptestmd
1631 { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, // vpslld+vptestmd
1632 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, // vpsllq+vptestmq
1633 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 }, // vpsllq+vptestmq
1634 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, // vpmovqd
1638 { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 5 },
1639 { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 6 },
1640 { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 5 },
1641 { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 6 },
1642 { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 5 },
1643 { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 6 },
1644 { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 10 },
1645 { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 12 },
1649 { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 4 },
1650 { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 5 },
1651 { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 4 },
1652 { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 5 },
1653 { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 4 },
1654 { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 5 },
1655 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 10 },
1656 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 12 },
1658 { ISD::SIGN_EXTEND, MVT::v2i32, MVT::v2i1, 1 }, // vpternlogd
1659 { ISD::ZERO_EXTEND, MVT::v2i32, MVT::v2i1, 2 }, // vpternlogd+psrld
1660 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i1, 1 }, // vpternlogd
1661 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i1, 2 }, // vpternlogd+psrld
1662 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 1 }, // vpternlogd
1663 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 2 }, // vpternlogd+psrld
1664 { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i1, 1 }, // vpternlogq
1665 { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i1, 2 }, // vpternlogq+psrlq
1666 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 }, // vpternlogq
1667 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 }, // vpternlogq+psrlq
1669 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 2 },
1670 { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
1671 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 2 },
1672 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 5 },
1673 { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
1674 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 },
1675 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 },
1676 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
1677 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
1678 { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
1679 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
1680 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 5 },
1681 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
1682 { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 5 },
1684 { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 1 },
1685 { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 },
1687 { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 3 },
1688 { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 3 },
1690 { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 1 },
1691 { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 1 },
1693 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
1694 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
1695 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 },
1696 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 1 },
1697 { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 1 },
1701 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
1702 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
1703 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
1704 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
1705 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 1 },
1706 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 },
1707 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
1708 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
1709 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
1710 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
1711 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
1712 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
1713 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 },
1714 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 1 },
1715 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
1716 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
1717 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
1718 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
1719 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 3 },
1720 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 3 },
1722 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 },
1723 { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 },
1725 { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 2 },
1726 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2 },
1727 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 },
1728 { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 },
1730 { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 },
1731 { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 },
1733 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 },
1737 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 },
1738 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 },
1739 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 },
1740 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 },
1741 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
1742 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
1743 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
1744 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
1745 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 4 },
1746 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 4 },
1747 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
1748 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
1749 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 },
1750 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
1751 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
1752 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
1753 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 4 },
1754 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 4 },
1756 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 4 },
1757 { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 5 },
1758 { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 4 },
1759 { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 9 },
1760 { ISD::TRUNCATE, MVT::v16i1, MVT::v16i64, 11 },
1762 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 4 },
1763 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
1764 { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
1765 { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 },
1766 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 4 },
1767 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 },
1768 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 11 },
1769 { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 9 },
1770 { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
1771 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 11 },
1773 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
1774 { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 },
1775 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 },
1776 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
1777 { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 },
1778 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 },
1779 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 },
1780 { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 },
1781 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
1782 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
1783 { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
1784 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
1786 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 },
1787 { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 },
1788 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 },
1789 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 },
1790 { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
1791 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 },
1792 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
1793 { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
1794 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
1795 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 6 },
1796 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 },
1797 { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
1798 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 },
1799 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
1800 { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 6 },
1806 { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
1807 { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
1809 { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 4 },
1810 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f64, 3 },
1811 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f64, 2 },
1812 { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 3 },
1814 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f64, 3 },
1815 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f64, 2 },
1816 { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 4 },
1817 { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 3 },
1823 { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 8*4 },
1824 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4*4 },
1826 { ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 1 },
1827 { ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 1 },
1831 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 2 },
1832 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 2 },
1833 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 2 },
1834 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 2 },
1835 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
1836 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
1838 { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
1839 { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 2 },
1840 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 1 },
1841 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 1 },
1842 { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
1843 { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
1844 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 2 },
1845 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 2 },
1846 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
1847 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
1848 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 4 },
1849 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 4 },
1850 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
1851 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
1852 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
1853 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
1854 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
1855 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
1858 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 1 }, // PMOVXZBQ
1859 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 1 }, // PMOVXZWQ
1860 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 1 }, // PMOVXZBD
1862 { ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 1 },
1863 { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 1 },
1864 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1 },
1865 { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1 },
1866 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
1867 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
1868 { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 },
1869 { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
1870 { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1 }, // PSHUFB
1872 { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 4 },
1873 { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 },
1875 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 3 },
1876 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 3 },
1878 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 3 },
1879 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 3 },
1880 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
1887 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
1888 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
1889 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
1890 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
1891 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
1892 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 2*10 },
1893 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2*10 },
1894 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
1895 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
1897 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
1898 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
1899 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
1900 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
1901 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
1902 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
1903 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 },
1904 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
1906 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 4 },
1907 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 2 },
1908 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
1909 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
1910 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
1911 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 4 },
1913 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
1915 { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 6 },
1916 { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 6 },
1918 { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 },
1919 { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 4 },
1920 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 4 },
1921 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 4 },
1922 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
1923 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 2 },
1924 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
1925 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 },
1927 { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
1928 { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },
1929 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
1930 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 3 },
1931 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
1932 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 },
1933 { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
1934 { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 2 },
1935 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 6 },
1936 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 6 },
1937 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3 },
1938 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
1939 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 9 },
1940 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 12 },
1941 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
1942 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 2 },
1943 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
1944 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 10 },
1945 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 },
1946 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
1947 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 },
1948 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 8 },
1949 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 },
1950 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 5 },
1953 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 1 }, // PSHUFD
1954 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // PUNPCKLWD+DQ
1955 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // PUNPCKLBW+WD+PSHUFD
1956 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 1 }, // PUNPCKLWD
1957 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // PUNPCKLBW+WD
1958 { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 1 }, // PUNPCKLBW
1960 { ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 2 }, // PAND+PACKUSWB
1961 { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, // PAND+PACKUSWB
1962 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, // PAND+PACKUSWB
1963 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 },
1964 { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 3 }, // PAND+2*PACKUSWB
1965 { ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 1 },
1966 { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 },
1967 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 },
1968 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
1969 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7 },
1970 { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
1971 { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 10 },
1972 { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 4 }, // PAND+3*PACKUSWB
1973 { ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 2 }, // PSHUFD+PSHUFLW
1974 { ISD::TRUNCATE, MVT::v2i32, MVT::v2i64, 1 }, // PSHUFD
1981 if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
1998 if (const auto *Entry = ConvertCostTableLookup(AVX512BWConversionTbl, ISD,
2003 if (const auto *Entry = ConvertCostTableLookup(AVX512DQConversionTbl, ISD,
2008 if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD,
2014 if (const auto *Entry = ConvertCostTableLookup(AVX512BWVLConversionTbl, ISD,
2019 if (const auto *Entry = ConvertCostTableLookup(AVX512DQVLConversionTbl, ISD,
2024 if (const auto *Entry = ConvertCostTableLookup(AVX512VLConversionTbl, ISD,
2029 if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
2035 if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD,
2041 if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
2047 if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
2067 int ISD = TLI->InstructionOpcodeToISD(Opcode);
2068 assert(ISD && "Invalid opcode");
2113 { ISD::SETCC, MVT::v2i64, 2 },
2117 { ISD::SETCC, MVT::v32i16, 1 },
2118 { ISD::SETCC, MVT::v64i8, 1 },
2120 { ISD::SELECT, MVT::v32i16, 1 },
2121 { ISD::SELECT, MVT::v64i8, 1 },
2125 { ISD::SETCC, MVT::v8i64, 1 },
2126 { ISD::SETCC, MVT::v16i32, 1 },
2127 { ISD::SETCC, MVT::v8f64, 1 },
2128 { ISD::SETCC, MVT::v16f32, 1 },
2130 { ISD::SELECT, MVT::v8i64, 1 },
2131 { ISD::SELECT, MVT::v16i32, 1 },
2132 { ISD::SELECT, MVT::v8f64, 1 },
2133 { ISD::SELECT, MVT::v16f32, 1 },
2135 { ISD::SETCC, MVT::v32i16, 2 }, // FIXME: should probably be 4
2136 { ISD::SETCC, MVT::v64i8, 2 }, // FIXME: should probably be 4
2138 { ISD::SELECT, MVT::v32i16, 2 }, // FIXME: should be 3
2139 { ISD::SELECT, MVT::v64i8, 2 }, // FIXME: should be 3
2143 { ISD::SETCC, MVT::v4i64, 1 },
2144 { ISD::SETCC, MVT::v8i32, 1 },
2145 { ISD::SETCC, MVT::v16i16, 1 },
2146 { ISD::SETCC, MVT::v32i8, 1 },
2148 { ISD::SELECT, MVT::v4i64, 1 }, // pblendvb
2149 { ISD::SELECT, MVT::v8i32, 1 }, // pblendvb
2150 { ISD::SELECT, MVT::v16i16, 1 }, // pblendvb
2151 { ISD::SELECT, MVT::v32i8, 1 }, // pblendvb
2155 { ISD::SETCC, MVT::v4f64, 1 },
2156 { ISD::SETCC, MVT::v8f32, 1 },
2158 { ISD::SETCC, MVT::v4i64, 4 },
2159 { ISD::SETCC, MVT::v8i32, 4 },
2160 { ISD::SETCC, MVT::v16i16, 4 },
2161 { ISD::SETCC, MVT::v32i8, 4 },
2163 { ISD::SELECT, MVT::v4f64, 1 }, // vblendvpd
2164 { ISD::SELECT, MVT::v8f32, 1 }, // vblendvps
2165 { ISD::SELECT, MVT::v4i64, 1 }, // vblendvpd
2166 { ISD::SELECT, MVT::v8i32, 1 }, // vblendvps
2167 { ISD::SELECT, MVT::v16i16, 3 }, // vandps + vandnps + vorps
2168 { ISD::SELECT, MVT::v32i8, 3 }, // vandps + vandnps + vorps
2172 { ISD::SETCC, MVT::v2f64, 1 },
2173 { ISD::SETCC, MVT::v4f32, 1 },
2174 { ISD::SETCC, MVT::v2i64, 1 },
2178 { ISD::SELECT, MVT::v2f64, 1 }, // blendvpd
2179 { ISD::SELECT, MVT::v4f32, 1 }, // blendvps
2180 { ISD::SELECT, MVT::v2i64, 1 }, // pblendvb
2181 { ISD::SELECT, MVT::v4i32, 1 }, // pblendvb
2182 { ISD::SELECT, MVT::v8i16, 1 }, // pblendvb
2183 { ISD::SELECT, MVT::v16i8, 1 }, // pblendvb
2187 { ISD::SETCC, MVT::v2f64, 2 },
2188 { ISD::SETCC, MVT::f64, 1 },
2189 { ISD::SETCC, MVT::v2i64, 8 },
2190 { ISD::SETCC, MVT::v4i32, 1 },
2191 { ISD::SETCC, MVT::v8i16, 1 },
2192 { ISD::SETCC, MVT::v16i8, 1 },
2194 { ISD::SELECT, MVT::v2f64, 3 }, // andpd + andnpd + orpd
2195 { ISD::SELECT, MVT::v2i64, 3 }, // pand + pandn + por
2196 { ISD::SELECT, MVT::v4i32, 3 }, // pand + pandn + por
2197 { ISD::SELECT, MVT::v8i16, 3 }, // pand + pandn + por
2198 { ISD::SELECT, MVT::v16i8, 3 }, // pand + pandn + por
2202 { ISD::SETCC, MVT::v4f32, 2 },
2203 { ISD::SETCC, MVT::f32, 1 },
2205 { ISD::SELECT, MVT::v4f32, 3 }, // andps + andnps + orps
2209 if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
2213 if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
2217 if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
2221 if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
2225 if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
2229 if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
2233 if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
2237 if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
2241 if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
2259 { ISD::CTLZ, MVT::v8i64, 1 },
2260 { ISD::CTLZ, MVT::v16i32, 1 },
2261 { ISD::CTLZ, MVT::v32i16, 8 },
2262 { ISD::CTLZ, MVT::v64i8, 20 },
2263 { ISD::CTLZ, MVT::v4i64, 1 },
2264 { ISD::CTLZ, MVT::v8i32, 1 },
2265 { ISD::CTLZ, MVT::v16i16, 4 },
2266 { ISD::CTLZ, MVT::v32i8, 10 },
2267 { ISD::CTLZ, MVT::v2i64, 1 },
2268 { ISD::CTLZ, MVT::v4i32, 1 },
2269 { ISD::CTLZ, MVT::v8i16, 4 },
2270 { ISD::CTLZ, MVT::v16i8, 4 },
2273 { ISD::BITREVERSE, MVT::v8i64, 5 },
2274 { ISD::BITREVERSE, MVT::v16i32, 5 },
2275 { ISD::BITREVERSE, MVT::v32i16, 5 },
2276 { ISD::BITREVERSE, MVT::v64i8, 5 },
2277 { ISD::CTLZ, MVT::v8i64, 23 },
2278 { ISD::CTLZ, MVT::v16i32, 22 },
2279 { ISD::CTLZ, MVT::v32i16, 18 },
2280 { ISD::CTLZ, MVT::v64i8, 17 },
2281 { ISD::CTPOP, MVT::v8i64, 7 },
2282 { ISD::CTPOP, MVT::v16i32, 11 },
2283 { ISD::CTPOP, MVT::v32i16, 9 },
2284 { ISD::CTPOP, MVT::v64i8, 6 },
2285 { ISD::CTTZ, MVT::v8i64, 10 },
2286 { ISD::CTTZ, MVT::v16i32, 14 },
2287 { ISD::CTTZ, MVT::v32i16, 12 },
2288 { ISD::CTTZ, MVT::v64i8, 9 },
2289 { ISD::SADDSAT, MVT::v32i16, 1 },
2290 { ISD::SADDSAT, MVT::v64i8, 1 },
2291 { ISD::SSUBSAT, MVT::v32i16, 1 },
2292 { ISD::SSUBSAT, MVT::v64i8, 1 },
2293 { ISD::UADDSAT, MVT::v32i16, 1 },
2294 { ISD::UADDSAT, MVT::v64i8, 1 },
2295 { ISD::USUBSAT, MVT::v32i16, 1 },
2296 { ISD::USUBSAT, MVT::v64i8, 1 },
2299 { ISD::BITREVERSE, MVT::v8i64, 36 },
2300 { ISD::BITREVERSE, MVT::v16i32, 24 },
2301 { ISD::BITREVERSE, MVT::v32i16, 10 },
2302 { ISD::BITREVERSE, MVT::v64i8, 10 },
2303 { ISD::CTLZ, MVT::v8i64, 29 },
2304 { ISD::CTLZ, MVT::v16i32, 35 },
2305 { ISD::CTLZ, MVT::v32i16, 28 },
2306 { ISD::CTLZ, MVT::v64i8, 18 },
2307 { ISD::CTPOP, MVT::v8i64, 16 },
2308 { ISD::CTPOP, MVT::v16i32, 24 },
2309 { ISD::CTPOP, MVT::v32i16, 18 },
2310 { ISD::CTPOP, MVT::v64i8, 12 },
2311 { ISD::CTTZ, MVT::v8i64, 20 },
2312 { ISD::CTTZ, MVT::v16i32, 28 },
2313 { ISD::CTTZ, MVT::v32i16, 24 },
2314 { ISD::CTTZ, MVT::v64i8, 18 },
2315 { ISD::USUBSAT, MVT::v16i32, 2 }, // pmaxud + psubd
2316 { ISD::USUBSAT, MVT::v2i64, 2 }, // pmaxuq + psubq
2317 { ISD::USUBSAT, MVT::v4i64, 2 }, // pmaxuq + psubq
2318 { ISD::USUBSAT, MVT::v8i64, 2 }, // pmaxuq + psubq
2319 { ISD::UADDSAT, MVT::v16i32, 3 }, // not + pminud + paddd
2320 { ISD::UADDSAT, MVT::v2i64, 3 }, // not + pminuq + paddq
2321 { ISD::UADDSAT, MVT::v4i64, 3 }, // not + pminuq + paddq
2322 { ISD::UADDSAT, MVT::v8i64, 3 }, // not + pminuq + paddq
2323 { ISD::SADDSAT, MVT::v32i16, 2 }, // FIXME: include split
2324 { ISD::SADDSAT, MVT::v64i8, 2 }, // FIXME: include split
2325 { ISD::SSUBSAT, MVT::v32i16, 2 }, // FIXME: include split
2326 { ISD::SSUBSAT, MVT::v64i8, 2 }, // FIXME: include split
2327 { ISD::UADDSAT, MVT::v32i16, 2 }, // FIXME: include split
2328 { ISD::UADDSAT, MVT::v64i8, 2 }, // FIXME: include split
2329 { ISD::USUBSAT, MVT::v32i16, 2 }, // FIXME: include split
2330 { ISD::USUBSAT, MVT::v64i8, 2 }, // FIXME: include split
2331 { ISD::FMAXNUM, MVT::f32, 2 },
2332 { ISD::FMAXNUM, MVT::v4f32, 2 },
2333 { ISD::FMAXNUM, MVT::v8f32, 2 },
2334 { ISD::FMAXNUM, MVT::v16f32, 2 },
2335 { ISD::FMAXNUM, MVT::f64, 2 },
2336 { ISD::FMAXNUM, MVT::v2f64, 2 },
2337 { ISD::FMAXNUM, MVT::v4f64, 2 },
2338 { ISD::FMAXNUM, MVT::v8f64, 2 },
2341 { ISD::BITREVERSE, MVT::v4i64, 4 },
2342 { ISD::BITREVERSE, MVT::v8i32, 4 },
2343 { ISD::BITREVERSE, MVT::v16i16, 4 },
2344 { ISD::BITREVERSE, MVT::v32i8, 4 },
2345 { ISD::BITREVERSE, MVT::v2i64, 1 },
2346 { ISD::BITREVERSE, MVT::v4i32, 1 },
2347 { ISD::BITREVERSE, MVT::v8i16, 1 },
2348 { ISD::BITREVERSE, MVT::v16i8, 1 },
2349 { ISD::BITREVERSE, MVT::i64, 3 },
2350 { ISD::BITREVERSE, MVT::i32, 3 },
2351 { ISD::BITREVERSE, MVT::i16, 3 },
2352 { ISD::BITREVERSE, MVT::i8, 3 }
2355 { ISD::BITREVERSE, MVT::v4i64, 5 },
2356 { ISD::BITREVERSE, MVT::v8i32, 5 },
2357 { ISD::BITREVERSE, MVT::v16i16, 5 },
2358 { ISD::BITREVERSE, MVT::v32i8, 5 },
2359 { ISD::BSWAP, MVT::v4i64, 1 },
2360 { ISD::BSWAP, MVT::v8i32, 1 },
2361 { ISD::BSWAP, MVT::v16i16, 1 },
2362 { ISD::CTLZ, MVT::v4i64, 23 },
2363 { ISD::CTLZ, MVT::v8i32, 18 },
2364 { ISD::CTLZ, MVT::v16i16, 14 },
2365 { ISD::CTLZ, MVT::v32i8, 9 },
2366 { ISD::CTPOP, MVT::v4i64, 7 },
2367 { ISD::CTPOP, MVT::v8i32, 11 },
2368 { ISD::CTPOP, MVT::v16i16, 9 },
2369 { ISD::CTPOP, MVT::v32i8, 6 },
2370 { ISD::CTTZ, MVT::v4i64, 10 },
2371 { ISD::CTTZ, MVT::v8i32, 14 },
2372 { ISD::CTTZ, MVT::v16i16, 12 },
2373 { ISD::CTTZ, MVT::v32i8, 9 },
2374 { ISD::SADDSAT, MVT::v16i16, 1 },
2375 { ISD::SADDSAT, MVT::v32i8, 1 },
2376 { ISD::SSUBSAT, MVT::v16i16, 1 },
2377 { ISD::SSUBSAT, MVT::v32i8, 1 },
2378 { ISD::UADDSAT, MVT::v16i16, 1 },
2379 { ISD::UADDSAT, MVT::v32i8, 1 },
2380 { ISD::UADDSAT, MVT::v8i32, 3 }, // not + pminud + paddd
2381 { ISD::USUBSAT, MVT::v16i16, 1 },
2382 { ISD::USUBSAT, MVT::v32i8, 1 },
2383 { ISD::USUBSAT, MVT::v8i32, 2 }, // pmaxud + psubd
2384 { ISD::FSQRT, MVT::f32, 7 }, // Haswell from http://www.agner.org/
2385 { ISD::FSQRT, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/
2386 { ISD::FSQRT, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/
2387 { ISD::FSQRT, MVT::f64, 14 }, // Haswell from http://www.agner.org/
2388 { ISD::FSQRT, MVT::v2f64, 14 }, // Haswell from http://www.agner.org/
2389 { ISD::FSQRT, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/
2392 { ISD::BITREVERSE, MVT::v4i64, 12 }, // 2 x 128-bit Op + extract/insert
2393 { ISD::BITREVERSE, MVT::v8i32, 12 }, // 2 x 128-bit Op + extract/insert
2394 { ISD::BITREVERSE, MVT::v16i16, 12 }, // 2 x 128-bit Op + extract/insert
2395 { ISD::BITREVERSE, MVT::v32i8, 12 }, // 2 x 128-bit Op + extract/insert
2396 { ISD::BSWAP, MVT::v4i64, 4 },
2397 { ISD::BSWAP, MVT::v8i32, 4 },
2398 { ISD::BSWAP, MVT::v16i16, 4 },
2399 { ISD::CTLZ, MVT::v4i64, 48 }, // 2 x 128-bit Op + extract/insert
2400 { ISD::CTLZ, MVT::v8i32, 38 }, // 2 x 128-bit Op + extract/insert
2401 { ISD::CTLZ, MVT::v16i16, 30 }, // 2 x 128-bit Op + extract/insert
2402 { ISD::CTLZ, MVT::v32i8, 20 }, // 2 x 128-bit Op + extract/insert
2403 { ISD::CTPOP, MVT::v4i64, 16 }, // 2 x 128-bit Op + extract/insert
2404 { ISD::CTPOP, MVT::v8i32, 24 }, // 2 x 128-bit Op + extract/insert
2405 { ISD::CTPOP, MVT::v16i16, 20 }, // 2 x 128-bit Op + extract/insert
2406 { ISD::CTPOP, MVT::v32i8, 14 }, // 2 x 128-bit Op + extract/insert
2407 { ISD::CTTZ, MVT::v4i64, 22 }, // 2 x 128-bit Op + extract/insert
2408 { ISD::CTTZ, MVT::v8i32, 30 }, // 2 x 128-bit Op + extract/insert
2409 { ISD::CTTZ, MVT::v16i16, 26 }, // 2 x 128-bit Op + extract/insert
2410 { ISD::CTTZ, MVT::v32i8, 20 }, // 2 x 128-bit Op + extract/insert
2411 { ISD::SADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
2412 { ISD::SADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
2413 { ISD::SSUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
2414 { ISD::SSUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
2415 { ISD::UADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
2416 { ISD::UADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
2417 { ISD::UADDSAT, MVT::v8i32, 8 }, // 2 x 128-bit Op + extract/insert
2418 { ISD::USUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
2419 { ISD::USUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
2420 { ISD::USUBSAT, MVT::v8i32, 6 }, // 2 x 128-bit Op + extract/insert
2421 { ISD::FMAXNUM, MVT::f32, 3 },
2422 { ISD::FMAXNUM, MVT::v4f32, 3 },
2423 { ISD::FMAXNUM, MVT::v8f32, 5 },
2424 { ISD::FMAXNUM, MVT::f64, 3 },
2425 { ISD::FMAXNUM, MVT::v2f64, 3 },
2426 { ISD::FMAXNUM, MVT::v4f64, 5 },
2427 { ISD::FSQRT, MVT::f32, 14 }, // SNB from http://www.agner.org/
2428 { ISD::FSQRT, MVT::v4f32, 14 }, // SNB from http://www.agner.org/
2429 { ISD::FSQRT, MVT::v8f32, 28 }, // SNB from http://www.agner.org/
2430 { ISD::FSQRT, MVT::f64, 21 }, // SNB from http://www.agner.org/
2431 { ISD::FSQRT, MVT::v2f64, 21 }, // SNB from http://www.agner.org/
2432 { ISD::FSQRT, MVT::v4f64, 43 }, // SNB from http://www.agner.org/
2435 { ISD::FSQRT, MVT::f32, 19 }, // sqrtss
2436 { ISD::FSQRT, MVT::v4f32, 37 }, // sqrtps
2437 { ISD::FSQRT, MVT::f64, 34 }, // sqrtsd
2438 { ISD::FSQRT, MVT::v2f64, 67 }, // sqrtpd
2441 { ISD::FSQRT, MVT::f32, 20 }, // sqrtss
2442 { ISD::FSQRT, MVT::v4f32, 40 }, // sqrtps
2443 { ISD::FSQRT, MVT::f64, 35 }, // sqrtsd
2444 { ISD::FSQRT, MVT::v2f64, 70 }, // sqrtpd
2447 { ISD::USUBSAT, MVT::v4i32, 2 }, // pmaxud + psubd
2448 { ISD::UADDSAT, MVT::v4i32, 3 }, // not + pminud + paddd
2449 { ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/
2450 { ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/
2453 { ISD::BITREVERSE, MVT::v2i64, 5 },
2454 { ISD::BITREVERSE, MVT::v4i32, 5 },
2455 { ISD::BITREVERSE, MVT::v8i16, 5 },
2456 { ISD::BITREVERSE, MVT::v16i8, 5 },
2457 { ISD::BSWAP, MVT::v2i64, 1 },
2458 { ISD::BSWAP, MVT::v4i32, 1 },
2459 { ISD::BSWAP, MVT::v8i16, 1 },
2460 { ISD::CTLZ, MVT::v2i64, 23 },
2461 { ISD::CTLZ, MVT::v4i32, 18 },
2462 { ISD::CTLZ, MVT::v8i16, 14 },
2463 { ISD::CTLZ, MVT::v16i8, 9 },
2464 { ISD::CTPOP, MVT::v2i64, 7 },
2465 { ISD::CTPOP, MVT::v4i32, 11 },
2466 { ISD::CTPOP, MVT::v8i16, 9 },
2467 { ISD::CTPOP, MVT::v16i8, 6 },
2468 { ISD::CTTZ, MVT::v2i64, 10 },
2469 { ISD::CTTZ, MVT::v4i32, 14 },
2470 { ISD::CTTZ, MVT::v8i16, 12 },
2471 { ISD::CTTZ, MVT::v16i8, 9 }
2474 { ISD::BITREVERSE, MVT::v2i64, 29 },
2475 { ISD::BITREVERSE, MVT::v4i32, 27 },
2476 { ISD::BITREVERSE, MVT::v8i16, 27 },
2477 { ISD::BITREVERSE, MVT::v16i8, 20 },
2478 { ISD::BSWAP, MVT::v2i64, 7 },
2479 { ISD::BSWAP, MVT::v4i32, 7 },
2480 { ISD::BSWAP, MVT::v8i16, 7 },
2481 { ISD::CTLZ, MVT::v2i64, 25 },
2482 { ISD::CTLZ, MVT::v4i32, 26 },
2483 { ISD::CTLZ, MVT::v8i16, 20 },
2484 { ISD::CTLZ, MVT::v16i8, 17 },
2485 { ISD::CTPOP, MVT::v2i64, 12 },
2486 { ISD::CTPOP, MVT::v4i32, 15 },
2487 { ISD::CTPOP, MVT::v8i16, 13 },
2488 { ISD::CTPOP, MVT::v16i8, 10 },
2489 { ISD::CTTZ, MVT::v2i64, 14 },
2490 { ISD::CTTZ, MVT::v4i32, 18 },
2491 { ISD::CTTZ, MVT::v8i16, 16 },
2492 { ISD::CTTZ, MVT::v16i8, 13 },
2493 { ISD::SADDSAT, MVT::v8i16, 1 },
2494 { ISD::SADDSAT, MVT::v16i8, 1 },
2495 { ISD::SSUBSAT, MVT::v8i16, 1 },
2496 { ISD::SSUBSAT, MVT::v16i8, 1 },
2497 { ISD::UADDSAT, MVT::v8i16, 1 },
2498 { ISD::UADDSAT, MVT::v16i8, 1 },
2499 { ISD::USUBSAT, MVT::v8i16, 1 },
2500 { ISD::USUBSAT, MVT::v16i8, 1 },
2501 { ISD::FMAXNUM, MVT::f64, 4 },
2502 { ISD::FMAXNUM, MVT::v2f64, 4 },
2503 { ISD::FSQRT, MVT::f64, 32 }, // Nehalem from http://www.agner.org/
2504 { ISD::FSQRT, MVT::v2f64, 32 }, // Nehalem from http://www.agner.org/
2507 { ISD::FMAXNUM, MVT::f32, 4 },
2508 { ISD::FMAXNUM, MVT::v4f32, 4 },
2509 { ISD::FSQRT, MVT::f32, 28 }, // Pentium III from http://www.agner.org/
2510 { ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/
2513 { ISD::CTTZ, MVT::i64, 1 },
2516 { ISD::CTTZ, MVT::i32, 1 },
2517 { ISD::CTTZ, MVT::i16, 1 },
2518 { ISD::CTTZ, MVT::i8, 1 },
2521 { ISD::CTLZ, MVT::i64, 1 },
2524 { ISD::CTLZ, MVT::i32, 1 },
2525 { ISD::CTLZ, MVT::i16, 1 },
2526 { ISD::CTLZ, MVT::i8, 1 },
2529 { ISD::CTPOP, MVT::i64, 1 },
2532 { ISD::CTPOP, MVT::i32, 1 },
2533 { ISD::CTPOP, MVT::i16, 1 },
2534 { ISD::CTPOP, MVT::i8, 1 },
2537 { ISD::BITREVERSE, MVT::i64, 14 },
2538 { ISD::CTLZ, MVT::i64, 4 }, // BSR+XOR or BSR+XOR+CMOV
2539 { ISD::CTTZ, MVT::i64, 3 }, // TEST+BSF+CMOV/BRANCH
2540 { ISD::CTPOP, MVT::i64, 10 },
2541 { ISD::SADDO, MVT::i64, 1 },
2542 { ISD::UADDO, MVT::i64, 1 },
2545 { ISD::BITREVERSE, MVT::i32, 14 },
2546 { ISD::BITREVERSE, MVT::i16, 14 },
2547 { ISD::BITREVERSE, MVT::i8, 11 },
2548 { ISD::CTLZ, MVT::i32, 4 }, // BSR+XOR or BSR+XOR+CMOV
2549 { ISD::CTLZ, MVT::i16, 4 }, // BSR+XOR or BSR+XOR+CMOV
2550 { ISD::CTLZ, MVT::i8, 4 }, // BSR+XOR or BSR+XOR+CMOV
2551 { ISD::CTTZ, MVT::i32, 3 }, // TEST+BSF+CMOV/BRANCH
2552 { ISD::CTTZ, MVT::i16, 3 }, // TEST+BSF+CMOV/BRANCH
2553 { ISD::CTTZ, MVT::i8, 3 }, // TEST+BSF+CMOV/BRANCH
2554 { ISD::CTPOP, MVT::i32, 8 },
2555 { ISD::CTPOP, MVT::i16, 9 },
2556 { ISD::CTPOP, MVT::i8, 7 },
2557 { ISD::SADDO, MVT::i32, 1 },
2558 { ISD::SADDO, MVT::i16, 1 },
2559 { ISD::SADDO, MVT::i8, 1 },
2560 { ISD::UADDO, MVT::i32, 1 },
2561 { ISD::UADDO, MVT::i16, 1 },
2562 { ISD::UADDO, MVT::i8, 1 },
2568 unsigned ISD = ISD::DELETED_NODE;
2573 ISD = ISD::BITREVERSE;
2576 ISD = ISD::BSWAP;
2579 ISD = ISD::CTLZ;
2582 ISD = ISD::CTPOP;
2585 ISD = ISD::CTTZ;
2590 ISD = ISD::FMAXNUM;
2593 ISD = ISD::SADDSAT;
2596 ISD = ISD::SSUBSAT;
2599 ISD = ISD::UADDSAT;
2602 ISD = ISD::USUBSAT;
2605 ISD = ISD::FSQRT;
2610 ISD = ISD::SADDO;
2616 ISD = ISD::UADDO;
2621 if (ISD != ISD::DELETED_NODE) {
2628 if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy))
2632 if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
2636 if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
2640 if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
2644 if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
2648 if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
2652 if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
2656 if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
2660 if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
2664 if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy))
2668 if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
2672 if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
2677 if (const auto *Entry = CostTableLookup(BMI64CostTbl, ISD, MTy))
2680 if (const auto *Entry = CostTableLookup(BMI32CostTbl, ISD, MTy))
2686 if (const auto *Entry = CostTableLookup(LZCNT64CostTbl, ISD, MTy))
2689 if (const auto *Entry = CostTableLookup(LZCNT32CostTbl, ISD, MTy))
2695 if (const auto *Entry = CostTableLookup(POPCNT64CostTbl, ISD, MTy))
2698 if (const auto *Entry = CostTableLookup(POPCNT32CostTbl, ISD, MTy))
2705 if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
2708 if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
2724 { ISD::ROTL, MVT::v8i64, 1 },
2725 { ISD::ROTL, MVT::v4i64, 1 },
2726 { ISD::ROTL, MVT::v2i64, 1 },
2727 { ISD::ROTL, MVT::v16i32, 1 },
2728 { ISD::ROTL, MVT::v8i32, 1 },
2729 { ISD::ROTL, MVT::v4i32, 1 },
2730 { ISD::ROTR, MVT::v8i64, 1 },
2731 { ISD::ROTR, MVT::v4i64, 1 },
2732 { ISD::ROTR, MVT::v2i64, 1 },
2733 { ISD::ROTR, MVT::v16i32, 1 },
2734 { ISD::ROTR, MVT::v8i32, 1 },
2735 { ISD::ROTR, MVT::v4i32, 1 }
2739 { ISD::ROTL, MVT::v4i64, 4 },
2740 { ISD::ROTL, MVT::v8i32, 4 },
2741 { ISD::ROTL, MVT::v16i16, 4 },
2742 { ISD::ROTL, MVT::v32i8, 4 },
2743 { ISD::ROTL, MVT::v2i64, 1 },
2744 { ISD::ROTL, MVT::v4i32, 1 },
2745 { ISD::ROTL, MVT::v8i16, 1 },
2746 { ISD::ROTL, MVT::v16i8, 1 },
2747 { ISD::ROTR, MVT::v4i64, 6 },
2748 { ISD::ROTR, MVT::v8i32, 6 },
2749 { ISD::ROTR, MVT::v16i16, 6 },
2750 { ISD::ROTR, MVT::v32i8, 6 },
2751 { ISD::ROTR, MVT::v2i64, 2 },
2752 { ISD::ROTR, MVT::v4i32, 2 },
2753 { ISD::ROTR, MVT::v8i16, 2 },
2754 { ISD::ROTR, MVT::v16i8, 2 }
2757 { ISD::ROTL, MVT::i64, 1 },
2758 { ISD::ROTR, MVT::i64, 1 },
2759 { ISD::FSHL, MVT::i64, 4 }
2762 { ISD::ROTL, MVT::i32, 1 },
2763 { ISD::ROTL, MVT::i16, 1 },
2764 { ISD::ROTL, MVT::i8, 1 },
2765 { ISD::ROTR, MVT::i32, 1 },
2766 { ISD::ROTR, MVT::i16, 1 },
2767 { ISD::ROTR, MVT::i8, 1 },
2768 { ISD::FSHL, MVT::i32, 4 },
2769 { ISD::FSHL, MVT::i16, 4 },
2770 { ISD::FSHL, MVT::i8, 4 }
2776 unsigned ISD = ISD::DELETED_NODE;
2781 ISD = ISD::FSHL;
2783 ISD = ISD::ROTL;
2787 ISD = ISD::FSHL;
2789 ISD = ISD::ROTR;
2793 if (ISD != ISD::DELETED_NODE) {
2800 if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
2804 if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
2808 if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
2811 if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
2820 { ISD::EXTRACT_VECTOR_ELT, MVT::i8, 4 },
2821 { ISD::EXTRACT_VECTOR_ELT, MVT::i16, 4 },
2822 { ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4 },
2823 { ISD::EXTRACT_VECTOR_ELT, MVT::i64, 7 }
2868 int ISD = TLI->InstructionOpcodeToISD(Opcode);
2869 assert(ISD && "Unexpected vector opcode");
2872 if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy))
2916 // For insertions, a ISD::BUILD_VECTOR style vector initialization can be much
2917 // cheaper than an accumulation of ISD::INSERT_VECTOR_ELT.
3131 { ISD::FADD, MVT::v2f64, 3 },
3132 { ISD::ADD, MVT::v2i64, 5 },
3136 { ISD::FADD, MVT::v2f64, 2 },
3137 { ISD::FADD, MVT::v4f32, 4 },
3138 { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
3139 { ISD::ADD, MVT::v2i32, 2 }, // FIXME: chosen to be less than v4i32
3140 { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3".
3141 { ISD::ADD, MVT::v2i16, 2 }, // The data reported by the IACA tool is "4.3".
3142 { ISD::ADD, MVT::v4i16, 3 }, // The data reported by the IACA tool is "4.3".
3143 { ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3".
3144 { ISD::ADD, MVT::v2i8, 2 },
3145 { ISD::ADD, MVT::v4i8, 2 },
3146 { ISD::ADD, MVT::v8i8, 2 },
3147 { ISD::ADD, MVT::v16i8, 3 },
3151 { ISD::FADD, MVT::v4f64, 3 },
3152 { ISD::FADD, MVT::v4f32, 3 },
3153 { ISD::FADD, MVT::v8f32, 4 },
3154 { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
3155 { ISD::ADD, MVT::v4i64, 3 },
3156 { ISD::ADD, MVT::v8i32, 5 },
3157 { ISD::ADD, MVT::v16i16, 5 },
3158 { ISD::ADD, MVT::v32i8, 4 },
3161 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3162 assert(ISD && "Invalid opcode");
3171 if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
3175 if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
3179 if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
3200 if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
3204 if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
3208 if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
3214 { ISD::AND, MVT::v2i1, 3 },
3215 { ISD::AND, MVT::v4i1, 5 },
3216 { ISD::AND, MVT::v8i1, 7 },
3217 { ISD::AND, MVT::v16i1, 9 },
3218 { ISD::AND, MVT::v32i1, 11 },
3219 { ISD::AND, MVT::v64i1, 13 },
3220 { ISD::OR, MVT::v2i1, 3 },
3221 { ISD::OR, MVT::v4i1, 5 },
3222 { ISD::OR, MVT::v8i1, 7 },
3223 { ISD::OR, MVT::v16i1, 9 },
3224 { ISD::OR, MVT::v32i1, 11 },
3225 { ISD::OR, MVT::v64i1, 13 },
3229 { ISD::AND, MVT::v16i16, 2 }, // vpmovmskb + cmp
3230 { ISD::AND, MVT::v32i8, 2 }, // vpmovmskb + cmp
3231 { ISD::OR, MVT::v16i16, 2 }, // vpmovmskb + cmp
3232 { ISD::OR, MVT::v32i8, 2 }, // vpmovmskb + cmp
3236 { ISD::AND, MVT::v4i64, 2 }, // vmovmskpd + cmp
3237 { ISD::AND, MVT::v8i32, 2 }, // vmovmskps + cmp
3238 { ISD::AND, MVT::v16i16, 4 }, // vextractf128 + vpand + vpmovmskb + cmp
3239 { ISD::AND, MVT::v32i8, 4 }, // vextractf128 + vpand + vpmovmskb + cmp
3240 { ISD::OR, MVT::v4i64, 2 }, // vmovmskpd + cmp
3241 { ISD::OR, MVT::v8i32, 2 }, // vmovmskps + cmp
3242 { ISD::OR, MVT::v16i16, 4 }, // vextractf128 + vpor + vpmovmskb + cmp
3243 { ISD::OR, MVT::v32i8, 4 }, // vextractf128 + vpor + vpmovmskb + cmp
3247 { ISD::AND, MVT::v2i64, 2 }, // movmskpd + cmp
3248 { ISD::AND, MVT::v4i32, 2 }, // movmskps + cmp
3249 { ISD::AND, MVT::v8i16, 2 }, // pmovmskb + cmp
3250 { ISD::AND, MVT::v16i8, 2 }, // pmovmskb + cmp
3251 { ISD::OR, MVT::v2i64, 2 }, // movmskpd + cmp
3252 { ISD::OR, MVT::v4i32, 2 }, // movmskps + cmp
3253 { ISD::OR, MVT::v8i16, 2 }, // pmovmskb + cmp
3254 { ISD::OR, MVT::v16i8, 2 }, // pmovmskb + cmp
3270 if (const auto *Entry = CostTableLookup(AVX512BoolReduction, ISD, MTy))
3273 if (const auto *Entry = CostTableLookup(AVX2BoolReduction, ISD, MTy))
3276 if (const auto *Entry = CostTableLookup(AVX1BoolReduction, ISD, MTy))
3279 if (const auto *Entry = CostTableLookup(SSE2BoolReduction, ISD, MTy))
3366 int ISD;
3368 ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN;
3372 ISD = ISD::FMINNUM;
3376 {ISD::FMINNUM, MVT::v4f32, 1},
3380 {ISD::FMINNUM, MVT::v2f64, 1},
3381 {ISD::SMIN, MVT::v8i16, 1},
3382 {ISD::UMIN, MVT::v16i8, 1},
3386 {ISD::SMIN, MVT::v4i32, 1},
3387 {ISD::UMIN, MVT::v4i32, 1},
3388 {ISD::UMIN, MVT::v8i16, 1},
3389 {ISD::SMIN, MVT::v16i8, 1},
3393 {ISD::UMIN, MVT::v2i64, 3}, // xor+pcmpgtq+blendvpd
3397 {ISD::FMINNUM, MVT::v8f32, 1},
3398 {ISD::FMINNUM, MVT::v4f64, 1},
3399 {ISD::SMIN, MVT::v8i32, 3},
3400 {ISD::UMIN, MVT::v8i32, 3},
3401 {ISD::SMIN, MVT::v16i16, 3},
3402 {ISD::UMIN, MVT::v16i16, 3},
3403 {ISD::SMIN, MVT::v32i8, 3},
3404 {ISD::UMIN, MVT::v32i8, 3},
3408 {ISD::SMIN, MVT::v8i32, 1},
3409 {ISD::UMIN, MVT::v8i32, 1},
3410 {ISD::SMIN, MVT::v16i16, 1},
3411 {ISD::UMIN, MVT::v16i16, 1},
3412 {ISD::SMIN, MVT::v32i8, 1},
3413 {ISD::UMIN, MVT::v32i8, 1},
3417 {ISD::FMINNUM, MVT::v16f32, 1},
3418 {ISD::FMINNUM, MVT::v8f64, 1},
3419 {ISD::SMIN, MVT::v2i64, 1},
3420 {ISD::UMIN, MVT::v2i64, 1},
3421 {ISD::SMIN, MVT::v4i64, 1},
3422 {ISD::UMIN, MVT::v4i64, 1},
3423 {ISD::SMIN, MVT::v8i64, 1},
3424 {ISD::UMIN, MVT::v8i64, 1},
3425 {ISD::SMIN, MVT::v16i32, 1},
3426 {ISD::UMIN, MVT::v16i32, 1},
3430 {ISD::SMIN, MVT::v32i16, 1},
3431 {ISD::UMIN, MVT::v32i16, 1},
3432 {ISD::SMIN, MVT::v64i8, 1},
3433 {ISD::UMIN, MVT::v64i8, 1},
3438 if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
3442 if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
3446 if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
3450 if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
3454 if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
3458 if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
3462 if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
3466 if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
3496 int ISD;
3498 ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN;
3502 ISD = ISD::FMINNUM;
3509 {ISD::UMIN, MVT::v2i16, 5}, // need pxors to use pminsw/pmaxsw
3510 {ISD::UMIN, MVT::v4i16, 7}, // need pxors to use pminsw/pmaxsw
3511 {ISD::UMIN, MVT::v8i16, 9}, // need pxors to use pminsw/pmaxsw
3515 {ISD::SMIN, MVT::v2i16, 3}, // same as sse2
3516 {ISD::SMIN, MVT::v4i16, 5}, // same as sse2
3517 {ISD::UMIN, MVT::v2i16, 5}, // same as sse2
3518 {ISD::UMIN, MVT::v4i16, 7}, // same as sse2
3519 {ISD::SMIN, MVT::v8i16, 4}, // phminposuw+xor
3520 {ISD::UMIN, MVT::v8i16, 4}, // FIXME: umin is cheaper than umax
3521 {ISD::SMIN, MVT::v2i8, 3}, // pminsb
3522 {ISD::SMIN, MVT::v4i8, 5}, // pminsb
3523 {ISD::SMIN, MVT::v8i8, 7}, // pminsb
3524 {ISD::SMIN, MVT::v16i8, 6},
3525 {ISD::UMIN, MVT::v2i8, 3}, // same as sse2
3526 {ISD::UMIN, MVT::v4i8, 5}, // same as sse2
3527 {ISD::UMIN, MVT::v8i8, 7}, // same as sse2
3528 {ISD::UMIN, MVT::v16i8, 6}, // FIXME: umin is cheaper than umax
3532 {ISD::SMIN, MVT::v16i16, 6},
3533 {ISD::UMIN, MVT::v16i16, 6}, // FIXME: umin is cheaper than umax
3534 {ISD::SMIN, MVT::v32i8, 8},
3535 {ISD::UMIN, MVT::v32i8, 8},
3539 {ISD::SMIN, MVT::v32i16, 8},
3540 {ISD::UMIN, MVT::v32i16, 8}, // FIXME: umin is cheaper than umax
3541 {ISD::SMIN, MVT::v64i8, 10},
3542 {ISD::UMIN, MVT::v64i8, 10},
3552 if (const auto *Entry = CostTableLookup(AVX512BWCostTblNoPairWise, ISD, MTy))
3556 if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
3560 if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy))
3564 if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
3586 if (const auto *Entry = CostTableLookup(AVX512BWCostTblNoPairWise, ISD, MTy))
3590 if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
3594 if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy))
3598 if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
4158 return TLI->isOperationLegal(IsSigned ? ISD::SDIVREM : ISD::UDIVREM, VT);