Lines Matching refs:W28

73 	1. W0 = rotate_left(left_shift(concatenate(W8,W4),64) ^ W16 ^ W28 ^ W32, 2); 
76 1. In total, we need 8 16-byte registers or memory for W0,W4,...,W28. W0 and W32 can be the same register or memory.
77 2. The registers are used in a circular buffering mode. For example, we start with W28,W24,...,W0 (with W0 indicating the most recent 16-byte)
78 i=0, W28,W24,...,W0
79 i=4, W24,W20,...,W28
112 #define stack_size (12+16*2+16*11+16*4) // 12-bytes (alignment) + extra 2 + 3 (W24/W28/XMM_SHUFB_BSWAP) + 8 (xmm0-xmm7) + 4 (WK(t))
121 // symbolizing registers or stack memory with algorithmic variables W0,W4,...,W28 + W_TMP, W_TMP2, and XMM_SHUFB_BSWAP for code with ssse3 support
133 #define W28 %xmm9
137 #define W28 13*16(sp)
213 .macro W_PRECALC_00_15_1_ssse3 // input argument $0 : current 16-bytes in the circular buffer, one of W0,W4,W8,...,W28
267 .macro W_PRECALC_00_15_1_nossse3 // input argument $0 : current 16-bytes in the circular buffer, one of W0,W4,W8,...,W28
348 W = rotate_left(left_shift(concatenate(W8,W4),64) ^ W16 ^ W28 ^ W32, 2);
353 0. W_tmp = W6; W = W28 ^ W32;
354 1. W = W_tmp = W6 ^ W16 ^ W28 ^ W32;
355 2. W_tmp = (W6 ^ W16 ^ W28 ^ W32) rol 2;
361 .macro W_PRECALC_32_79_0_ssse3 // inputr arguments : W28,W8,W4,W
363 pxor $0, $3 // W = W28 ^ W32;
368 .macro W_PRECALC_32_79_0_nossse3 // input arguments : W28,W8,W4,W
371 pxor $0, $3 // W = W28 ^ W32
377 // this is a variant of W_PRECALC_32_79_0_ssse3 for i386 (as W24/W28 are stored in memory, not in registers)
378 .macro W_PRECALC_32_79_0_i386_ssse3 // input arguments : W28,W8,W4,W
380 pxor $0, W_TMP // W28 ^ W32
381 xmov W_TMP, $3 // W = W28 ^ W32;
386 // this is a variant of W_PRECALC_32_79_0_nossse3 for i386 (as W24/W28 are stored in memory, not in registers)
387 .macro W_PRECALC_32_79_0_i386_nossse3 // input arguments : W28,W8,W4,W
389 pxor $0, W_TMP // W28 ^ W32
390 xmov W_TMP, $3 // W = W28 ^ W32
400 pxor $1, W_TMP // W_tmp = W6 ^ W16 ^ W28 ^ W32
401 xmov W_TMP, $1 // W = W_tmp = W6 ^ W16 ^ W28 ^ W32
407 por $0, W_TMP // W_tmp = (W6 ^ W16 ^ W28 ^ W32) rol 2
410 // this is a variant of W_PRECALC_32_79_2 for i386 (as W24/W28 are stored in memory, not in registers)
411 // this should be used when the input is either W24 or W28 on i386 architecture
417 por $0, W_TMP // W_tmp = (W6 ^ W16 ^ W28 ^ W32) rol 2
422 xmov W_TMP, $0 // W = (W6 ^ W16 ^ W28 ^ W32) rol 2
427 xmov W_TMP, $0 // W = (W6 ^ W16 ^ W28 ^ W32) rol 2
530 // i=0 : W28,W24,W20,W16,W12,W8,W4,W0
536 // i=4 : W24,W20,W16,W12,W8,W4,W0,W28
538 W_PRECALC_00_15_1 W28 // convert W_TMP to big-endian, and save W28 = W_TMP
539 W_PRECALC_00_15_2 // W_TMP = W28 + K
540 W_PRECALC_00_15_3 7 // 16(sp) = W_TMP = W28 + K
542 // i=8 : W20,W16,W12,W8,W4,W0,W28,W24
548 // i=12 : W16,W12,W8,W4,W0,W28,W24,W20
559 // i=16 : W12,W8,W4,W0,W28,W24,W20,W16
560 W_PRECALC_16_31_0 W0,W28,W24,W20,W16
569 // i=20 : W8,W4,W0,W28,W24,W20,W16,W12
570 W_PRECALC_16_31_0 W28,W24,W20,W16,W12
572 W_PRECALC_16_31_1 W28,W12
579 // i=24 : W4,W0,W28,W24,W20,W16,W12,W8
589 // i=28 : W0,W28,W24,W20,W16,W12,W8,W4
599 // i=32 : W28,W24,W20,W16,W12,W8,W4,W0
600 W_PRECALC_32_79_0 W28,W8,W4,W0
611 // i=36 : W24,W20,W16,W12,W8,W4,W0,W28
613 W_PRECALC_32_79_0 W24,W4,W0,W28
615 W_PRECALC_32_79_0_i386 W24,W4,W0,W28
618 W_PRECALC_32_79_1 W12,W28
621 W_PRECALC_32_79_2 W28
623 W_PRECALC_32_79_2_i386 W28
626 W_PRECALC_32_79_3 W28,22,16
629 // i=40 : W20,W16,W12,W8,W4,W0,W28,W24
633 W_PRECALC_32_79_0 W20,W0,W28,W24
635 W_PRECALC_32_79_0_i386 W20,W0,W28,W24
649 // i=44 : W16,W12,W8,W4,W0,W28,W24,W20
650 W_PRECALC_32_79_0 W16,W28,W24,W20
659 // i=48 : W12,W8,W4,W0,W28,W24,W20,W16
669 // i=52 : W8,W4,W0,W28,W24,W20,W16,W12
672 W_PRECALC_32_79_1 W28,W12
681 // i=56 : W4,W0,W28,W24,W20,W16,W12,W8
691 // i=60 : W0,W28,W24,W20,W16,W12,W8,W4
703 // i=64 : W28,W24,W20,W16,W12,W8,W4,W0
704 W_PRECALC_32_79_0 W28,W8,W4,W0
713 // i=68 : W24,W20,W16,W12,W8,W4,W0,W28
715 W_PRECALC_32_79_0 W24,W4,W0,W28
717 W_PRECALC_32_79_0_i386 W24,W4,W0,W28
720 W_PRECALC_32_79_1 W12,W28
723 W_PRECALC_32_79_2 W28
725 W_PRECALC_32_79_2_i386 W28
728 W_PRECALC_32_79_3 W28,54,K_XMM
731 // i=72 : W20,W16,W12,W8,W4,W0,W28,W24
733 W_PRECALC_32_79_0 W20,W0,W28,W24
735 W_PRECALC_32_79_0_i386 W20,W0,W28,W24
751 // i=76 : W16,W12,W8,W4,W0,W28,W24,W20
752 W_PRECALC_32_79_0 W16,W28,W24,W20
764 // i=0 : W28,W24,W20,W16,W12,W8,W4,W0
774 // i=4 : W24,W20,W16,W12,W8,W4,W0,W28
777 W_PRECALC_00_15_1 W28 // convert W_TMP to big-endian, and save W28 = W_TMP
779 W_PRECALC_00_15_2 // W_TMP = W28 + K
781 W_PRECALC_00_15_3 7 // 16(sp) = W_TMP = W28 + K[0]
784 // i=8 : W20,W16,W12,W8,W4,W0,W28,W24
794 // i=12 : W16,W12,W8,W4,W0,W28,W24,W20
835 // i=0 : W28,W24,W20,W16,W12,W8,W4,W0
841 // i=4 : W24,W20,W16,W12,W8,W4,W0,W28
843 W_PRECALC_00_15_1 W28 // convert W_TMP to big-endian, and save W28 = W_TMP
844 W_PRECALC_00_15_2 // W_TMP = W28 + K
845 W_PRECALC_00_15_3 7 // 16(sp) = W_TMP = W28 + K
847 // i=8 : W20,W16,W12,W8,W4,W0,W28,W24
853 // i=12 : W16,W12,W8,W4,W0,W28,W24,W20
864 // circular buffer : W12,W8,W4,W0,W28,W24,W20,W16
865 W_PRECALC_16_31_0 W0,W28,W24,W20,W16
875 // W8,W4,W0,W28,W24,W20,W16,W12
876 W_PRECALC_16_31_0 W28,W24,W20,W16,W12
878 W_PRECALC_16_31_1 W28,W12
887 // W4,W0,W28,W24,W20,W16,W12,W8
899 // W0,W28,W24,W20,W16,W12,W8,W4
911 // W28,W24,W20,W16,W12,W8,W4,W0
912 W_PRECALC_32_79_0 W28,W8,W4,W0
922 // W24,W20,W16,W12,W8,W4,W0,W28
924 W_PRECALC_32_79_0 W24,W4,W0,W28
926 W_PRECALC_32_79_0_i386 W24,W4,W0,W28
929 W_PRECALC_32_79_1 W12,W28
932 W_PRECALC_32_79_2 W28
934 W_PRECALC_32_79_2_i386 W28
937 W_PRECALC_32_79_3 W28,22,16
943 // W20,W16,W12,W8,W4,W0,W28,W24
945 W_PRECALC_32_79_0 W20,W0,W28,W24
947 W_PRECALC_32_79_0_i386 W20,W0,W28,W24
962 // W16,W12,W8,W4,W0,W28,W24,W20
963 W_PRECALC_32_79_0 W16,W28,W24,W20
973 // W12,W8,W4,W0,W28,W24,W20,W16
984 // W8,W4,W0,W28,W24,W20,W16,W12
987 W_PRECALC_32_79_1 W28,W12
995 // W4,W0,W28,W24,W20,W16,W12,W8
1008 // W0,W28,W24,W20,W16,W12,W8,W4
1019 // W28,W24,W20,W16,W12,W8,W4,W0
1020 W_PRECALC_32_79_0 W28,W8,W4,W0
1030 // W24,W20,W16,W12,W8,W4,W0,W28
1032 W_PRECALC_32_79_0 W24,W4,W0,W28
1034 W_PRECALC_32_79_0_i386 W24,W4,W0,W28
1037 W_PRECALC_32_79_1 W12,W28
1040 W_PRECALC_32_79_2 W28
1042 W_PRECALC_32_79_2_i386 W28
1045 W_PRECALC_32_79_3 W28,54,K_XMM
1049 // W20,W16,W12,W8,W4,W0,W28,W24
1051 W_PRECALC_32_79_0 W20,W0,W28,W24
1053 W_PRECALC_32_79_0_i386 W20,W0,W28,W24
1070 // W16,W12,W8,W4,W0,W28,W24,W20
1071 W_PRECALC_32_79_0 W16,W28,W24,W20
1083 // i=0 : W28,W24,W20,W16,W12,W8,W4,W0
1093 // i=4 : W24,W20,W16,W12,W8,W4,W0,W28
1096 W_PRECALC_00_15_1 W28 // convert W_TMP to big-endian, and save W28 = W_TMP
1098 W_PRECALC_00_15_2 // W_TMP = W28 + K
1100 W_PRECALC_00_15_3 7 // 16(sp) = W_TMP = W28 + K[0]
1103 // i=8 : W20,W16,W12,W8,W4,W0,W28,W24
1113 // i=12 : W16,W12,W8,W4,W0,W28,W24,W20