Lines Matching refs:H2

419 my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) =
714 vmovd $h0#d,$H2
816 vmovd $h0#d,$H2
846 vmovd 4*2($ctx),$H2
981 vmovdqa $H2,0x20(%r11) # offload hash
983 vmovdqa 0x10(%rsp),$H2 # r1^2
990 vpmuludq $T3,$H2,$H1 # h3*r1
994 vpmuludq $T2,$H2,$H0 # h2*r1
995 vpmuludq $T1,$H2,$H1 # h1*r1
1000 vpmuludq $T0,$H2,$H2 # h0*r1
1002 vpaddq $H2,$D1,$D1 # d1 += h0*r1
1009 vmovdqa 0x50(%rsp),$H2 # r3^2
1018 vpmuludq $T1,$H2,$H1 # h1*r3
1019 vpmuludq $T0,$H2,$H2 # h0*r3
1021 vpaddq $H2,$D3,$D3 # d3 += h0*r3
1035 vpsrldq \$6,$H0,$H2 # splat input
1047 vpunpcklqdq $H3,$H2,$H3 # 2:3
1053 vpsrlq \$4,$H3,$H2
1057 vpand $MASK,$H2,$H2 # 2
1063 vpaddq 0x20(%r11),$H2,$H2
1086 vpmuludq $H2,$T4,$T0 # h2*r0
1095 vpmuludq $H2,$T2,$T1 # h2*r1
1106 vpmuludq $H2,$T3,$T0 # h2*r2
1128 vpmuludq $H2,$T3,$T3 # h2*s3
1133 vpmuludq $H2,$T4,$H2 # h2*s4
1136 vpaddq $H2,$D1,$D1 # d1 += h2*s4
1139 vpaddq $H3,$D2,$H2 # h2 = d2 + h3*s4
1180 vpaddq $D1,$H2,$H2 # h1 -> h2
1186 vpsrlq \$26,$H2,$D2
1187 vpand $MASK,$H2,$H2
1208 vpaddq $H2,$T2,$T2
1215 vmovdqa $H2,0x20(%r11)
1229 vpshufd \$0x10,`16*1-64`($ctx),$H2 # r1^n
1234 vpmuludq $T3,$H2,$H0 # h3*r1
1237 vpmuludq $T2,$H2,$H1 # h2*r1
1240 vpmuludq $T1,$H2,$H0 # h1*r1
1242 vpmuludq $T0,$H2,$H2 # h0*r1
1243 vpaddq $H2,$D1,$D1 # d1 += h0*r1
1247 vpshufd \$0x10,`16*4-64`($ctx),$H2 # s2^n
1255 vpmuludq $T4,$H2,$H1 # h4*s2
1258 vpmuludq $T3,$H2,$H2 # h3*s2
1259 vpaddq $H2,$D0,$D0 # d0 += h3*s2
1265 vpshufd \$0x10,`16*7-64`($ctx),$H2 # r4^n
1274 vpmuludq $T0,$H2,$H2 # h0*r4
1275 vpaddq $H2,$D4,$D4 # h4 = d4 + h0*r4
1290 vpsrldq \$6,$H0,$H2 # splat input
1294 vpunpcklqdq $H3,$H2,$H3 # 2:3
1299 vpsrlq \$4,$H3,$H2
1302 vpand $MASK,$H2,$H2 # 2
1309 vpaddq 0x20(%r11),$H2,$H2
1320 vpmuludq $H2,$T4,$T0 # h2*r0
1331 vpmuludq $H2,$T2,$T1 # h2*r1
1342 vpmuludq $H2,$T4,$T1 # h2*r2
1365 vpmuludq $H2,$T4,$T4 # h2*s3
1374 vpmuludq $H2,$T3,$T1 # h2*s4
1416 vpsrlq \$26,$D2,$H2
1418 vpaddq $H2,$D3,$D3 # h2 -> h3
1516 my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) =
1642 vmovd $h0#d,%x#$H2
1749 vmovd $h0#d,%x#$H2
1790 vmovd 4*2($ctx),%x#$H2
1889 vpaddq $H2,$T2,$H2 # accumulate input
1903 #vpaddq $H2,$T2,$H2 # accumulate input
1929 vpmuludq $H2,$T0,$D2 # d2 = h2*r0
1930 vpmuludq $H2,$T1,$D3 # d3 = h2*r1
1931 vpmuludq $H2,$T2,$D4 # d4 = h2*r2
1932 vpmuludq $H2,$T3,$D0 # d0 = h2*s3
1933 vpmuludq $H2,$S4,$D1 # d1 = h2*s4
1936 vpmuludq $H1,$T1,$H2 # h1*r1, borrow $H2 as temp
1938 vpaddq $H2,$D2,$D2 # d2 += h1*r1
1940 vpmuludq `32*2`(%rsp),$H4,$H2 # h4*s1
1942 vpaddq $H2,$D0,$D0 # d0 += h4*s1
1946 vpmuludq $H1,$T0,$H2 # h1*r0
1948 vpaddq $H2,$D1,$D1 # d1 += h1*r0
1950 vpmuludq $H4,$T0,$H2 # h4*r0
1953 vpaddq $H2,$D4,$D4 # d4 += h4*r0
1957 vpmuludq $H4,$T1,$H2 # h4*s2
1960 vpaddq $H2,$D1,$D1 # d1 += h4*s2
1961 vmovdqa `32*5-0x90`(%rax),$H2 # r3
1969 vpmuludq $H1,$H2,$T4 # h1*r3
1970 vpmuludq $H0,$H2,$H2 # h0*r3
1973 vpaddq $H2,$D3,$D3 # d3 += h0*r3
1975 vpmuludq $H4,$T3,$H2 # h4*s3
1978 vpaddq $H2,$D2,$D2 # d2 += h4*s3
1984 vpaddq $H3,$D2,$H2 # h2 = d2 + h3*r4
2011 vpaddq $D1,$H2,$H2 # h1 -> h2
2020 vpsrlq \$26,$H2,$D2
2021 vpand $MASK,$H2,$H2
2024 vpaddq $T2,$H2,$H2 # modulo-scheduled
2053 #vpaddq $H2,$T2,$H2 # accumulate input
2064 vpmuludq $H2,$T0,$D2 # d2 = h2*r0
2065 vpmuludq $H2,$T1,$D3 # d3 = h2*r1
2066 vpmuludq $H2,$T2,$D4 # d4 = h2*r2
2067 vpmuludq $H2,$T3,$D0 # d0 = h2*s3
2068 vpmuludq $H2,$S4,$D1 # d1 = h2*s4
2071 vpmuludq $H1,$T1,$H2 # h1*r1
2073 vpaddq $H2,$D2,$D2 # d2 += h1*r1
2075 vpmuludq `32*2+4`(%rsp),$H4,$H2 # h4*s1
2077 vpaddq $H2,$D0,$D0 # d0 += h4*s1
2080 vpmuludq $H1,$T0,$H2 # h1*r0
2083 vpaddq $H2,$D1,$D1 # d1 += h1*r0
2085 vpmuludq $H4,$T0,$H2 # h4*r0
2087 vpaddq $H2,$D4,$D4 # d4 += h4*r0
2090 vpmuludq $H4,$T1,$H2 # h4*s2
2092 vpaddq $H2,$D1,$D1 # d1 += h4*s2
2093 vmovdqu `32*5+4-0x90`(%rax),$H2 # r3
2099 vpmuludq $H1,$H2,$T4 # h1*r3
2100 vpmuludq $H0,$H2,$H2 # h0*r3
2102 vpaddq $H2,$D3,$D3 # d3 += h0*r3
2104 vpmuludq $H4,$T3,$H2 # h4*s3
2106 vpaddq $H2,$D2,$D2 # d2 += h4*s3
2110 vpaddq $H3,$D2,$H2 # h2 = d2 + h3*r4
2122 vpsrldq \$8,$H2,$T2
2127 vpaddq $T2,$H2,$H2
2136 vpermq \$0x2,$H2,$T2
2141 vpaddq $T2,$H2,$H2
2159 vpaddq $D1,$H2,$H2 # h1 -> h2
2165 vpsrlq \$26,$H2,$D2
2166 vpand $MASK,$H2,$H2
2179 vmovd %x#$H2,`4*2-48-64`($ctx)
2213 map(s/%y/%z/,($H0,$H1,$H2,$H3,$H4));
2434 vpaddq $H2,$T2,$H2 # accumulate input
2452 #vpaddq $H2,$T2,$H2 # accumulate input
2469 vpmuludq $H2,$R1,$D3 # d3 = h2*r1
2471 vpmuludq $H2,$R2,$D4 # d4 = h2*r2
2473 vpmuludq $H2,$S3,$D0 # d0 = h2*s3
2475 vpmuludq $H2,$S4,$D1 # d1 = h2*s4
2477 vpmuludq $H2,$R0,$D2 # d2 = h2*r0
2531 vpaddq $M2,$D2,$H2 # h2 = d3 + h4*s3
2556 vpaddq $D1,$H2,$H2 # h1 -> h2
2562 vpaddq $T2,$H2,$H2 # modulo-scheduled
2565 vpsrlq \$26,$H2,$D2
2566 vpandq $MASK,$H2,$H2
2609 #vpaddq $H2,$T2,$H2 # accumulate input
2612 vpmuludq $H2,$R1,$D3 # d3 = h2*r1
2613 vpmuludq $H2,$R2,$D4 # d4 = h2*r2
2614 vpmuludq $H2,$S3,$D0 # d0 = h2*s3
2616 vpmuludq $H2,$S4,$D1 # d1 = h2*s4
2618 vpmuludq $H2,$R0,$D2 # d2 = h2*r0
2671 vpaddq $M2,$D2,$H2 # h2 = d3 + h4*s3
2681 vpermq \$0xb1,$H2,$D2
2686 vpaddq $D2,$H2,$H2
2693 vpermq \$0x2,$H2,$D2
2698 vpaddq $D2,$H2,$H2
2704 vextracti64x4 \$0x1,$H2,%y#$D2
2709 vpaddq $D2,$H2,${H2}{%k3}{z}
2712 map(s/%z/%y/,($H0,$H1,$H2,$H3,$H4, $D0,$D1,$D2,$D3,$D4, $MASK));
2737 vpaddq $D1,$H2,$H2 # h1 -> h2
2745 vpsrlq \$26,$H2,$D2
2746 vpand $MASK,$H2,$H2
2753 vpaddq $H2,$T2,$H2 # accumulate input for .Ltail_avx2
2767 vpsubq $T2,$H2,$H2 # undo input accumulation
2770 vmovd %x#$H2,`4*2-48-64`($ctx)
2899 my ($H0,$H1,$H2,$r2r1r0,$r1r0s2,$r0s2s1,$Dlo,$Dhi) = map("%ymm$_",(0..5,16,17));
2965 vpermq \$0b10101010,$Dlo,${H2}{%k7}{z}
2976 vpmadd52luq $r0s2s1,$H2,$Dlo
2977 vpmadd52huq $r0s2s1,$H2,$Dhi
3022 my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17));
3049 vmovq 16($ctx),%x#$H2
3079 vpaddq $T2,$H2,$H2 # accumulate input
3094 vmovq 56($ctx),%x#$H2
3101 vmovdqa $H2,$R2
3107 vpmadd52luq $H2,$S1,$D0lo
3109 vpmadd52huq $H2,$S1,$D0hi
3111 vpmadd52luq $H2,$S2,$D1lo
3113 vpmadd52huq $H2,$S2,$D1hi
3115 vpmadd52luq $H2,$R0,$D2lo
3117 vpmadd52huq $H2,$R0,$D2hi
3151 vpandq $mask42,$D2lo,$H2
3169 vpunpcklqdq $R2,$H2,$R2
3170 vpbroadcastq %x#$H2,%x#$H2
3187 vinserti128 \$1,%x#$R2,$H2,$R2
3200 vmovq 16($ctx),%x#$H2
3249 vpaddq $T2,$H2,$H2 # accumulate input
3261 #vpaddq $T2,$H2,$H2 # accumulate input
3266 vpmadd52luq $H2,$S1,$D0lo
3268 vpmadd52huq $H2,$S1,$D0hi
3270 vpmadd52luq $H2,$S2,$D1lo
3272 vpmadd52huq $H2,$S2,$D1hi
3274 vpmadd52luq $H2,$R0,$D2lo
3276 vpmadd52huq $H2,$R0,$D2hi
3320 vpandq $mask42,$D2lo,$H2
3323 vpaddq $T2,$H2,$H2 # accumulate input
3350 #vpaddq $T2,$H2,$H2 # accumulate input
3355 vpmadd52luq $H2,$S1,$D0lo
3357 vpmadd52huq $H2,$S1,$D0hi
3359 vpmadd52luq $H2,$S2,$D1lo
3361 vpmadd52huq $H2,$S2,$D1hi
3363 vpmadd52luq $H2,$R0,$D2lo
3365 vpmadd52huq $H2,$R0,$D2hi
3393 vpsrldq \$8,$D2hi,$H2
3399 vpaddq $H2,$D2hi,$D2hi
3406 vpermq \$0x2,$D2hi,$H2
3410 vpaddq $H2,$D2hi,${D2hi}{%k1}{z}
3430 vpandq $mask42,$D2lo,$H2
3449 vmovq %x#$H2,16($ctx)
3463 my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17));
3486 vmovq 16($ctx),%x#$H2
3575 map(s/%y/%z/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2);
3613 vpaddq $T2,$H2,$H2 # accumulate input
3626 #vpaddq $T2,$H2,$H2 # accumulate input
3631 vpmadd52luq $H2,$S1,$D0lo
3633 vpmadd52huq $H2,$S1,$D0hi
3635 vpmadd52luq $H2,$S2,$D1lo
3637 vpmadd52huq $H2,$S2,$D1hi
3639 vpmadd52luq $H2,$R0,$D2lo
3641 vpmadd52huq $H2,$R0,$D2hi
3685 vpandq $mask42,$D2lo,$H2
3688 vpaddq $T2,$H2,$H2 # accumulate input
3705 #vpaddq $T2,$H2,$H2 # accumulate input
3710 vpmadd52luq $H2,$SS1,$D0lo
3712 vpmadd52huq $H2,$SS1,$D0hi
3714 vpmadd52luq $H2,$SS2,$D1lo
3716 vpmadd52huq $H2,$SS2,$D1hi
3718 vpmadd52luq $H2,$RR0,$D2lo
3720 vpmadd52huq $H2,$RR0,$D2hi
3748 vpsrldq \$8,$D2hi,$H2
3754 vpaddq $H2,$D2hi,$D2hi
3761 vpermq \$0x2,$D2hi,$H2
3767 vpaddq $H2,$D2hi,$D2hi
3772 vextracti64x4 \$1,$D2hi,%y#$H2
3775 map(s/%z/%y/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2);
3785 vpaddq $H2,$D2hi,${D2hi}{%k1}{z}
3805 vpandq $mask42,$D2lo,$H2
3822 vmovq %x#$H2,16($ctx)