• Home
  • History
  • Annotate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/ap/gpl/openssl-1.0.2h/crypto/aes/asm/

Lines Matching refs:QWP

115     &$movekey		($rndkey0,&QWP(0,$key));
116 &$movekey ($rndkey1,&QWP(16,$key));
124 &$movekey ($rndkey1,&QWP(0,$key));
134 &movups ($rndkey0,&QWP(0,$key));
135 &$movekey ($rndkey1,&QWP(0x10,$key));
137 &$movekey ($rndkey0,&QWP(0x20,$key));
145 &$movekey ($rndkey1,&QWP(-0x40,$key));
147 &$movekey ($rndkey0,&QWP(-0x30,$key));
150 &$movekey ($rndkey1,&QWP(-0x20,$key));
152 &$movekey ($rndkey0,&QWP(-0x10,$key));
155 &$movekey ($rndkey1,&QWP(0,$key));
157 &$movekey ($rndkey0,&QWP(0x10,$key));
159 &$movekey ($rndkey1,&QWP(0x20,$key));
161 &$movekey ($rndkey0,&QWP(0x30,$key));
163 &$movekey ($rndkey1,&QWP(0x40,$key));
165 &$movekey ($rndkey0,&QWP(0x50,$key));
167 &$movekey ($rndkey1,&QWP(0x60,$key));
169 &$movekey ($rndkey0,&QWP(0x70,$key));
182 &movups ($inout0,&QWP(0,"eax"));
191 &movups (&QWP(0,"eax"),$inout0);
201 &movups ($inout0,&QWP(0,"eax"));
210 &movups (&QWP(0,"eax"),$inout0);
234 &$movekey ($rndkey0,&QWP(0,$key));
236 &$movekey ($rndkey1,&QWP(16,$key));
239 &$movekey ($rndkey0,&QWP(32,$key));
247 &$movekey ($rndkey1,&QWP(0,$key,$rounds));
251 &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
265 &$movekey ($rndkey0,&QWP(0,$key));
267 &$movekey ($rndkey1,&QWP(16,$key));
271 &$movekey ($rndkey0,&QWP(32,$key));
280 &$movekey ($rndkey1,&QWP(0,$key,$rounds));
285 &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
305 &$movekey ($rndkey0,&QWP(0,$key));
306 &$movekey ($rndkey1,&QWP(16,$key));
312 &$movekey ($rndkey0,&QWP(32,$key));
323 &$movekey ($rndkey1,&QWP(0,$key,$rounds));
329 &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
349 &$movekey ($rndkey0,&QWP(0,$key));
351 &$movekey ($rndkey1,&QWP(16,$key));
363 &$movekey ($rndkey0,&QWP(0,$key,$rounds));
376 &$movekey ($rndkey1,&QWP(0,$key,$rounds));
384 &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
434 &movdqu ($inout0,&QWP(0,$inp));
435 &movdqu ($inout1,&QWP(0x10,$inp));
436 &movdqu ($inout2,&QWP(0x20,$inp));
437 &movdqu ($inout3,&QWP(0x30,$inp));
438 &movdqu ($inout4,&QWP(0x40,$inp));
439 &movdqu ($inout5,&QWP(0x50,$inp));
445 &movups (&QWP(0,$out),$inout0);
446 &movdqu ($inout0,&QWP(0,$inp));
447 &movups (&QWP(0x10,$out),$inout1);
448 &movdqu ($inout1,&QWP(0x10,$inp));
449 &movups (&QWP(0x20,$out),$inout2);
450 &movdqu ($inout2,&QWP(0x20,$inp));
451 &movups (&QWP(0x30,$out),$inout3);
452 &movdqu ($inout3,&QWP(0x30,$inp));
453 &movups (&QWP(0x40,$out),$inout4);
454 &movdqu ($inout4,&QWP(0x40,$inp));
455 &movups (&QWP(0x50,$out),$inout5);
457 &movdqu ($inout5,&QWP(0x50,$inp));
468 &movups (&QWP(0,$out),$inout0);
469 &movups (&QWP(0x10,$out),$inout1);
470 &movups (&QWP(0x20,$out),$inout2);
471 &movups (&QWP(0x30,$out),$inout3);
472 &movups (&QWP(0x40,$out),$inout4);
473 &movups (&QWP(0x50,$out),$inout5);
479 &movups ($inout0,&QWP(0,$inp));
482 &movups ($inout1,&QWP(0x10,$inp));
484 &movups ($inout2,&QWP(0x20,$inp));
487 &movups ($inout3,&QWP(0x30,$inp));
489 &movups ($inout4,&QWP(0x40,$inp));
492 &movups (&QWP(0,$out),$inout0);
493 &movups (&QWP(0x10,$out),$inout1);
494 &movups (&QWP(0x20,$out),$inout2);
495 &movups (&QWP(0x30,$out),$inout3);
496 &movups (&QWP(0x40,$out),$inout4);
504 &movups (&QWP(0,$out),$inout0);
509 &movups (&QWP(0,$out),$inout0);
510 &movups (&QWP(0x10,$out),$inout1);
515 &movups (&QWP(0,$out),$inout0);
516 &movups (&QWP(0x10,$out),$inout1);
517 &movups (&QWP(0x20,$out),$inout2);
522 &movups (&QWP(0,$out),$inout0);
523 &movups (&QWP(0x10,$out),$inout1);
524 &movups (&QWP(0x20,$out),$inout2);
525 &movups (&QWP(0x30,$out),$inout3);
534 &movdqu ($inout0,&QWP(0,$inp));
535 &movdqu ($inout1,&QWP(0x10,$inp));
536 &movdqu ($inout2,&QWP(0x20,$inp));
537 &movdqu ($inout3,&QWP(0x30,$inp));
538 &movdqu ($inout4,&QWP(0x40,$inp));
539 &movdqu ($inout5,&QWP(0x50,$inp));
545 &movups (&QWP(0,$out),$inout0);
546 &movdqu ($inout0,&QWP(0,$inp));
547 &movups (&QWP(0x10,$out),$inout1);
548 &movdqu ($inout1,&QWP(0x10,$inp));
549 &movups (&QWP(0x20,$out),$inout2);
550 &movdqu ($inout2,&QWP(0x20,$inp));
551 &movups (&QWP(0x30,$out),$inout3);
552 &movdqu ($inout3,&QWP(0x30,$inp));
553 &movups (&QWP(0x40,$out),$inout4);
554 &movdqu ($inout4,&QWP(0x40,$inp));
555 &movups (&QWP(0x50,$out),$inout5);
557 &movdqu ($inout5,&QWP(0x50,$inp));
568 &movups (&QWP(0,$out),$inout0);
569 &movups (&QWP(0x10,$out),$inout1);
570 &movups (&QWP(0x20,$out),$inout2);
571 &movups (&QWP(0x30,$out),$inout3);
572 &movups (&QWP(0x40,$out),$inout4);
573 &movups (&QWP(0x50,$out),$inout5);
579 &movups ($inout0,&QWP(0,$inp));
582 &movups ($inout1,&QWP(0x10,$inp));
584 &movups ($inout2,&QWP(0x20,$inp));
587 &movups ($inout3,&QWP(0x30,$inp));
589 &movups ($inout4,&QWP(0x40,$inp));
592 &movups (&QWP(0,$out),$inout0);
593 &movups (&QWP(0x10,$out),$inout1);
594 &movups (&QWP(0x20,$out),$inout2);
595 &movups (&QWP(0x30,$out),$inout3);
596 &movups (&QWP(0x40,$out),$inout4);
604 &movups (&QWP(0,$out),$inout0);
609 &movups (&QWP(0,$out),$inout0);
610 &movups (&QWP(0x10,$out),$inout1);
615 &movups (&QWP(0,$out),$inout0);
616 &movups (&QWP(0x10,$out),$inout1);
617 &movups (&QWP(0x20,$out),$inout2);
622 &movups (&QWP(0,$out),$inout0);
623 &movups (&QWP(0x10,$out),$inout1);
624 &movups (&QWP(0x20,$out),$inout2);
625 &movups (&QWP(0x30,$out),$inout3);
661 &movdqu ($ivec,&QWP(0,$rounds_)); # load ivec
662 &movdqu ($cmac,&QWP(0,$rounds)); # load cmac
682 &movdqa ($inout3,&QWP(0,"esp"));
689 &$movekey ($rndkey0,&QWP(0,$key_));
691 &movups ($in0,&QWP(0,$inp));
694 &$movekey ($rndkey1,&QWP(16,$key_));
697 &$movekey ($rndkey0,&QWP(32,$key_));
702 &$movekey ($rndkey1,&QWP(0,$key,$rounds));
706 &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
710 &paddq ($ivec,&QWP(16,"esp"));
718 &movups (&QWP(0,$out),$in0); # save output
725 &movups (&QWP(0,$out),$cmac);
749 &movdqu ($ivec,&QWP(0,$rounds_)); # load ivec
750 &movdqu ($cmac,&QWP(0,$rounds)); # load cmac
767 &movdqa ($inout3,&QWP(0,"esp")); # bswap mask
780 &movups ($in0,&QWP(0,$inp)); # load inp
781 &paddq ($ivec,&QWP(16,"esp"));
782 &lea ($inp,&QWP(16,$inp));
791 &movups (&QWP(0,$out),$in0); # save output
798 &$movekey ($rndkey0,&QWP(0,$key_));
800 &$movekey ($rndkey1,&QWP(16,$key_));
804 &$movekey ($rndkey0,&QWP(32,$key_));
809 &$movekey ($rndkey1,&QWP(0,$key,$rounds));
813 &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
815 &movups ($in0,&QWP(0,$inp)); # load inp
816 &paddq ($ivec,&QWP(16,"esp"));
821 &lea ($inp,&QWP(16,$inp));
834 &movups (&QWP(0,$out),$cmac);
878 &movdqu ($inout5,&QWP(0,$rounds_)); # load ivec
903 &movdqa ($inout0,&QWP(0,"esp")); # load byte-swap mask
915 &movdqa (&QWP(48,"esp"),$rndkey0); # save 1st triplet
917 &movdqu ($inout4,&QWP(0,$key)); # key[0]
918 &movdqa (&QWP(64,"esp"),$rndkey1); # save 2nd triplet
928 &movdqa (&QWP(32,"esp"),$inout5); # save counter-less ivec^key[0]
938 &movdqa ($rndkey0,&QWP(32,"esp")); # pull counter-less ivec
944 &$movekey ($rndkey1,&QWP(16,$key_));
951 &$movekey ($rndkey0,&QWP(32,$key_));
960 &movups ($rndkey1,&QWP(0,$inp));
961 &movups ($rndkey0,&QWP(0x10,$inp));
963 &movups ($rndkey1,&QWP(0x20,$inp));
965 &movups (&QWP(0,$out),$inout0);
966 &movdqa ($rndkey0,&QWP(16,"esp")); # load increment
968 &movdqa ($rndkey1,&QWP(64,"esp")); # load 2nd triplet
969 &movups (&QWP(0x10,$out),$inout1);
970 &movups (&QWP(0x20,$out),$inout2);
973 &paddd ($rndkey0,&QWP(48,"esp")); # 1st triplet increment
974 &movdqa ($inout0,&QWP(0,"esp")); # load byte swap mask
976 &movups ($inout1,&QWP(0x30,$inp));
977 &movups ($inout2,&QWP(0x40,$inp));
979 &movups ($inout1,&QWP(0x50,$inp));
981 &movdqa (&QWP(48,"esp"),$rndkey0); # save 1st triplet
984 &movups (&QWP(0x30,$out),$inout3);
986 &movdqa (&QWP(64,"esp"),$rndkey1); # save 2nd triplet
988 &movups (&QWP(0x40,$out),$inout4);
990 &movups (&QWP(0x50,$out),$inout5);
999 &movdqu ($inout5,&QWP(0,$key_));
1001 &pxor ($inout5,&QWP(32,"esp")); # restore count-less ivec
1024 &movups ($rndkey1,&QWP(0,$inp));
1025 &movups ($rndkey0,&QWP(0x10,$inp));
1027 &movups ($rndkey1,&QWP(0x20,$inp));
1029 &movups ($rndkey0,&QWP(0x30,$inp));
1031 &movups ($rndkey1,&QWP(0x40,$inp));
1033 &movups (&QWP(0,$out),$inout0);
1035 &movups (&QWP(0x10,$out),$inout1);
1036 &movups (&QWP(0x20,$out),$inout2);
1037 &movups (&QWP(0x30,$out),$inout3);
1038 &movups (&QWP(0x40,$out),$inout4);
1042 &movups ($inout0,&QWP(0,$rounds_)); # load ivec
1050 &movups ($in0,&QWP(0,$inp));
1052 &movups (&QWP(0,$out),$in0);
1057 &movups ($inout3,&QWP(0,$inp));
1058 &movups ($inout4,&QWP(0x10,$inp));
1061 &movups (&QWP(0,$out),$inout0);
1062 &movups (&QWP(0x10,$out),$inout1);
1067 &movups ($inout3,&QWP(0,$inp));
1068 &movups ($inout4,&QWP(0x10,$inp));
1070 &movups ($inout5,&QWP(0x20,$inp));
1072 &movups (&QWP(0,$out),$inout0);
1074 &movups (&QWP(0x10,$out),$inout1);
1075 &movups (&QWP(0x20,$out),$inout2);
1080 &movups ($inout4,&QWP(0,$inp));
1081 &movups ($inout5,&QWP(0x10,$inp));
1082 &movups ($rndkey1,&QWP(0x20,$inp));
1084 &movups ($rndkey0,&QWP(0x30,$inp));
1086 &movups (&QWP(0,$out),$inout0);
1088 &movups (&QWP(0x10,$out),$inout1);
1090 &movups (&QWP(0x20,$out),$inout2);
1091 &movups (&QWP(0x30,$out),$inout3);
1099 &movdqa (&QWP(32,"esp"),"xmm0"); # clear stack
1101 &movdqa (&QWP(48,"esp"),"xmm0");
1103 &movdqa (&QWP(64,"esp"),"xmm0");
1121 &movups ($inout0,&QWP(0,$inp));
1146 &movdqa ($twmask,&QWP(6*16,"esp")); # 0x0...010...87
1165 &movdqa (&QWP(16*$i,"esp"),$tweak);
1172 &movdqa (&QWP(16*$i++,"esp"),$tweak);
1174 &$movekey ($rndkey0,&QWP(0,$key_));
1176 &movups ($inout0,&QWP(0,$inp)); # load input
1181 &movdqu ($inout1,&QWP(16*1,$inp));
1183 &movdqu ($inout2,&QWP(16*2,$inp));
1185 &movdqu ($inout3,&QWP(16*3,$inp));
1187 &movdqu ($inout4,&QWP(16*4,$inp));
1189 &movdqu ($rndkey1,&QWP(16*5,$inp));
1192 &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak
1193 &movdqa (&QWP(16*$i,"esp"),$inout5); # save last tweak
1196 &$movekey ($rndkey1,&QWP(16,$key_));
1197 &pxor ($inout1,&QWP(16*1,"esp"));
1198 &pxor ($inout2,&QWP(16*2,"esp"));
1200 &pxor ($inout3,&QWP(16*3,"esp"));
1201 &pxor ($inout4,&QWP(16*4,"esp"));
1204 &$movekey ($rndkey0,&QWP(32,$key_));
1211 &movdqa ($tweak,&QWP(16*5,"esp")); # last tweak
1213 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1215 &xorps ($inout1,&QWP(16*1,"esp"));
1216 &movups (&QWP(16*0,$out),$inout0); # write output
1217 &xorps ($inout2,&QWP(16*2,"esp"));
1218 &movups (&QWP(16*1,$out),$inout1);
1219 &xorps ($inout3,&QWP(16*3,"esp"));
1220 &movups (&QWP(16*2,$out),$inout2);
1221 &xorps ($inout4,&QWP(16*4,"esp"));
1222 &movups (&QWP(16*3,$out),$inout3);
1224 &movups (&QWP(16*4,$out),$inout4);
1226 &movups (&QWP(16*5,$out),$inout5);
1228 &movdqa ($twmask,&QWP(16*6,"esp")); # 0x0...010...87
1276 &movdqa (&QWP(16*0,"esp"),$inout3);
1277 &movdqa (&QWP(16*1,"esp"),$inout4);
1280 &movdqa (&QWP(16*2,"esp"),$inout5);
1282 &movdqa (&QWP(16*3,"esp"),$tweak);
1287 &movdqu ($inout0,&QWP(16*0,$inp)); # load input
1288 &movdqu ($inout1,&QWP(16*1,$inp));
1289 &movdqu ($inout2,&QWP(16*2,$inp));
1290 &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak
1291 &movdqu ($inout3,&QWP(16*3,$inp));
1292 &pxor ($inout1,&QWP(16*1,"esp"));
1293 &movdqu ($inout4,&QWP(16*4,$inp));
1294 &pxor ($inout2,&QWP(16*2,"esp"));
1296 &pxor ($inout3,&QWP(16*3,"esp"));
1297 &movdqa (&QWP(16*4,"esp"),$inout5); # save last tweak
1302 &movaps ($tweak,&QWP(16*4,"esp")); # last tweak
1303 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1304 &xorps ($inout1,&QWP(16*1,"esp"));
1305 &xorps ($inout2,&QWP(16*2,"esp"));
1306 &movups (&QWP(16*0,$out),$inout0); # write output
1307 &xorps ($inout3,&QWP(16*3,"esp"));
1308 &movups (&QWP(16*1,$out),$inout1);
1310 &movups (&QWP(16*2,$out),$inout2);
1311 &movups (&QWP(16*3,$out),$inout3);
1312 &movups (&QWP(16*4,$out),$inout4);
1317 &movups ($inout0,&QWP(16*0,$inp)); # load input
1325 &movups (&QWP(16*0,$out),$inout0); # write output
1334 &movups ($inout0,&QWP(16*0,$inp)); # load input
1335 &movups ($inout1,&QWP(16*1,$inp));
1344 &movups (&QWP(16*0,$out),$inout0); # write output
1345 &movups (&QWP(16*1,$out),$inout1);
1353 &movups ($inout0,&QWP(16*0,$inp)); # load input
1354 &movups ($inout1,&QWP(16*1,$inp));
1355 &movups ($inout2,&QWP(16*2,$inp));
1366 &movups (&QWP(16*0,$out),$inout0); # write output
1367 &movups (&QWP(16*1,$out),$inout1);
1368 &movups (&QWP(16*2,$out),$inout2);
1377 &movups ($inout0,&QWP(16*0,$inp)); # load input
1378 &movups ($inout1,&QWP(16*1,$inp));
1379 &movups ($inout2,&QWP(16*2,$inp));
1380 &xorps ($inout0,&QWP(16*0,"esp")); # input^=tweak
1381 &movups ($inout3,&QWP(16*3,$inp));
1383 &xorps ($inout1,&QWP(16*1,"esp"));
1389 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1390 &xorps ($inout1,&QWP(16*1,"esp"));
1392 &movups (&QWP(16*0,$out),$inout0); # write output
1394 &movups (&QWP(16*1,$out),$inout1);
1395 &movups (&QWP(16*2,$out),$inout2);
1396 &movups (&QWP(16*3,$out),$inout3);
1420 &pand ($inout3,&QWP(16*6,"esp")); # isolate carry and residue
1437 &movups ($inout0,&QWP(-16,$out)); # load input
1444 &movups (&QWP(-16,$out),$inout0); # write output
1450 &movdqa (&QWP(16*0,"esp"),"xmm0"); # clear stack
1452 &movdqa (&QWP(16*1,"esp"),"xmm0");
1454 &movdqa (&QWP(16*2,"esp"),"xmm0");
1456 &movdqa (&QWP(16*3,"esp"),"xmm0");
1458 &movdqa (&QWP(16*4,"esp"),"xmm0");
1460 &movdqa (&QWP(16*5,"esp"),"xmm0");
1469 &movups ($inout0,&QWP(0,$inp));
1503 &movdqa ($twmask,&QWP(6*16,"esp")); # 0x0...010...87
1520 &movdqa (&QWP(16*$i,"esp"),$tweak);
1527 &movdqa (&QWP(16*$i++,"esp"),$tweak);
1529 &$movekey ($rndkey0,&QWP(0,$key_));
1531 &movups ($inout0,&QWP(0,$inp)); # load input
1536 &movdqu ($inout1,&QWP(16*1,$inp));
1538 &movdqu ($inout2,&QWP(16*2,$inp));
1540 &movdqu ($inout3,&QWP(16*3,$inp));
1542 &movdqu ($inout4,&QWP(16*4,$inp));
1544 &movdqu ($rndkey1,&QWP(16*5,$inp));
1547 &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak
1548 &movdqa (&QWP(16*$i,"esp"),$inout5); # save last tweak
1551 &$movekey ($rndkey1,&QWP(16,$key_));
1552 &pxor ($inout1,&QWP(16*1,"esp"));
1553 &pxor ($inout2,&QWP(16*2,"esp"));
1555 &pxor ($inout3,&QWP(16*3,"esp"));
1556 &pxor ($inout4,&QWP(16*4,"esp"));
1559 &$movekey ($rndkey0,&QWP(32,$key_));
1566 &movdqa ($tweak,&QWP(16*5,"esp")); # last tweak
1568 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1570 &xorps ($inout1,&QWP(16*1,"esp"));
1571 &movups (&QWP(16*0,$out),$inout0); # write output
1572 &xorps ($inout2,&QWP(16*2,"esp"));
1573 &movups (&QWP(16*1,$out),$inout1);
1574 &xorps ($inout3,&QWP(16*3,"esp"));
1575 &movups (&QWP(16*2,$out),$inout2);
1576 &xorps ($inout4,&QWP(16*4,"esp"));
1577 &movups (&QWP(16*3,$out),$inout3);
1579 &movups (&QWP(16*4,$out),$inout4);
1581 &movups (&QWP(16*5,$out),$inout5);
1583 &movdqa ($twmask,&QWP(16*6,"esp")); # 0x0...010...87
1631 &movdqa (&QWP(16*0,"esp"),$inout3);
1632 &movdqa (&QWP(16*1,"esp"),$inout4);
1635 &movdqa (&QWP(16*2,"esp"),$inout5);
1637 &movdqa (&QWP(16*3,"esp"),$tweak);
1642 &movdqu ($inout0,&QWP(16*0,$inp)); # load input
1643 &movdqu ($inout1,&QWP(16*1,$inp));
1644 &movdqu ($inout2,&QWP(16*2,$inp));
1645 &pxor ($inout0,&QWP(16*0,"esp")); # input^=tweak
1646 &movdqu ($inout3,&QWP(16*3,$inp));
1647 &pxor ($inout1,&QWP(16*1,"esp"));
1648 &movdqu ($inout4,&QWP(16*4,$inp));
1649 &pxor ($inout2,&QWP(16*2,"esp"));
1651 &pxor ($inout3,&QWP(16*3,"esp"));
1652 &movdqa (&QWP(16*4,"esp"),$inout5); # save last tweak
1657 &movaps ($tweak,&QWP(16*4,"esp")); # last tweak
1658 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1659 &xorps ($inout1,&QWP(16*1,"esp"));
1660 &xorps ($inout2,&QWP(16*2,"esp"));
1661 &movups (&QWP(16*0,$out),$inout0); # write output
1662 &xorps ($inout3,&QWP(16*3,"esp"));
1663 &movups (&QWP(16*1,$out),$inout1);
1665 &movups (&QWP(16*2,$out),$inout2);
1666 &movups (&QWP(16*3,$out),$inout3);
1667 &movups (&QWP(16*4,$out),$inout4);
1672 &movups ($inout0,&QWP(16*0,$inp)); # load input
1680 &movups (&QWP(16*0,$out),$inout0); # write output
1689 &movups ($inout0,&QWP(16*0,$inp)); # load input
1690 &movups ($inout1,&QWP(16*1,$inp));
1699 &movups (&QWP(16*0,$out),$inout0); # write output
1700 &movups (&QWP(16*1,$out),$inout1);
1708 &movups ($inout0,&QWP(16*0,$inp)); # load input
1709 &movups ($inout1,&QWP(16*1,$inp));
1710 &movups ($inout2,&QWP(16*2,$inp));
1721 &movups (&QWP(16*0,$out),$inout0); # write output
1722 &movups (&QWP(16*1,$out),$inout1);
1723 &movups (&QWP(16*2,$out),$inout2);
1732 &movups ($inout0,&QWP(16*0,$inp)); # load input
1733 &movups ($inout1,&QWP(16*1,$inp));
1734 &movups ($inout2,&QWP(16*2,$inp));
1735 &xorps ($inout0,&QWP(16*0,"esp")); # input^=tweak
1736 &movups ($inout3,&QWP(16*3,$inp));
1738 &xorps ($inout1,&QWP(16*1,"esp"));
1744 &xorps ($inout0,&QWP(16*0,"esp")); # output^=tweak
1745 &xorps ($inout1,&QWP(16*1,"esp"));
1747 &movups (&QWP(16*0,$out),$inout0); # write output
1749 &movups (&QWP(16*1,$out),$inout1);
1750 &movups (&QWP(16*2,$out),$inout2);
1751 &movups (&QWP(16*3,$out),$inout3);
1774 &movdqa ($twmask,&QWP(16*6,"esp"));
1790 &movups ($inout0,&QWP(0,$inp)); # load input
1797 &movups (&QWP(0,$out),$inout0); # write output
1813 &movups ($inout0,&QWP(0,$out)); # load input
1820 &movups (&QWP(0,$out),$inout0); # write output
1826 &movdqa (&QWP(16*0,"esp"),"xmm0"); # clear stack
1828 &movdqa (&QWP(16*1,"esp"),"xmm0");
1830 &movdqa (&QWP(16*2,"esp"),"xmm0");
1832 &movdqa (&QWP(16*3,"esp"),"xmm0");
1834 &movdqa (&QWP(16*4,"esp"),"xmm0");
1836 &movdqa (&QWP(16*5,"esp"),"xmm0");
1861 &movups ($ivec,&QWP(0,$key_)); # load IV
1875 &movups ($ivec,&QWP(0,$inp)); # input actually
1883 &movups (&QWP(0,$out),$inout0); # store output
1909 &movaps (&QWP(0,"esp"),$ivec); # save IV
1914 &movaps (&QWP(0,"esp"),$rndkey0); # save IV
1915 &movups (&QWP(0,$out),$inout5);
1918 &movdqu ($inout0,&QWP(0,$inp));
1919 &movdqu ($inout1,&QWP(0x10,$inp));
1920 &movdqu ($inout2,&QWP(0x20,$inp));
1921 &movdqu ($inout3,&QWP(0x30,$inp));
1922 &movdqu ($inout4,&QWP(0x40,$inp));
1923 &movdqu ($inout5,&QWP(0x50,$inp));
1927 &movups ($rndkey1,&QWP(0,$inp));
1928 &movups ($rndkey0,&QWP(0x10,$inp));
1929 &xorps ($inout0,&QWP(0,"esp")); # ^=IV
1931 &movups ($rndkey1,&QWP(0x20,$inp));
1933 &movups ($rndkey0,&QWP(0x30,$inp));
1935 &movups ($rndkey1,&QWP(0x40,$inp));
1937 &movups ($rndkey0,&QWP(0x50,$inp)); # IV
1939 &movups (&QWP(0,$out),$inout0);
1940 &movups (&QWP(0x10,$out),$inout1);
1942 &movups (&QWP(0x20,$out),$inout2);
1944 &movups (&QWP(0x30,$out),$inout3);
1946 &movups (&QWP(0x40,$out),$inout4);
1955 &movups (&QWP(0,$out),$inout0);
1958 &movups ($inout0,&QWP(0,$inp));
1963 &movups ($inout1,&QWP(0x10,$inp));
1968 &movups ($inout2,&QWP(0x20,$inp));
1972 &movups ($inout3,&QWP(0x30,$inp));
1976 &movups ($inout4,&QWP(0x40,$inp));
1977 &movaps (&QWP(0,"esp"),$ivec); # save IV
1978 &movups ($inout0,&QWP(0,$inp));
1981 &movups ($rndkey1,&QWP(0,$inp));
1982 &movups ($rndkey0,&QWP(0x10,$inp));
1983 &xorps ($inout0,&QWP(0,"esp")); # ^= IV
1985 &movups ($rndkey1,&QWP(0x20,$inp));
1987 &movups ($rndkey0,&QWP(0x30,$inp));
1989 &movups ($ivec,&QWP(0x40,$inp)); # IV
1991 &movups (&QWP(0,$out),$inout0);
1992 &movups (&QWP(0x10,$out),$inout1);
1994 &movups (&QWP(0x20,$out),$inout2);
1996 &movups (&QWP(0x30,$out),$inout3);
2018 &movups (&QWP(0,$out),$inout0);
2031 &movups (&QWP(0,$out),$inout0);
2034 &movups (&QWP(0x10,$out),$inout1);
2037 &movups ($ivec,&QWP(0x20,$inp));
2043 &movups ($rndkey1,&QWP(0x10,$inp));
2044 &movups ($rndkey0,&QWP(0x20,$inp));
2046 &movups ($ivec,&QWP(0x30,$inp));
2048 &movups (&QWP(0,$out),$inout0);
2050 &movups (&QWP(0x10,$out),$inout1);
2053 &movups (&QWP(0x20,$out),$inout2);
2069 &movups (&QWP(0,$out),$inout0);
2074 &movaps (&QWP(0,"esp"),$inout0);
2080 &movdqa (&QWP(0,"esp"),$inout0);
2087 &movups (&QWP(0,$key_),$ivec); # output IV
2119 &movups ("xmm0",&QWP(0,"eax")); # pull first 128 bits of *userKey
2136 &$movekey (&QWP(-16,$key),"xmm0"); # round 0
2157 &$movekey (&QWP(0,$key),"xmm0");
2163 &$movekey (&QWP(0,$key),"xmm0");
2175 &movdqa ("xmm5",&QWP(0x00,"ebx"));
2177 &movdqa ("xmm4",&QWP(0x20,"ebx"));
2179 &movdqu (&QWP(-16,$key),"xmm0");
2196 &movdqu (&QWP(-16,$key),"xmm0");
2202 &movdqa ("xmm4",&QWP(0x30,"ebx"));
2217 &movdqu (&QWP(0,$key),"xmm0");
2232 &movdqu (&QWP(16,$key),"xmm0");
2240 &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey
2245 &$movekey (&QWP(-16,$key),"xmm0"); # round 0
2262 &$movekey (&QWP(0,$key),"xmm0");
2268 &$movekey (&QWP(0,$key),"xmm0");
2289 &$movekey (&QWP(0,$key),"xmm5");
2291 &$movekey (&QWP(16,$key),"xmm3");
2296 &movdqa ("xmm5",&QWP(0x10,"ebx"));
2297 &movdqa ("xmm4",&QWP(0x20,"ebx"));
2299 &movdqu (&QWP(-16,$key),"xmm0");
2302 &movq (&QWP(0,$key),"xmm2");
2324 &movdqu (&QWP(-16,$key),"xmm0");
2335 &movups ("xmm2",&QWP(16,"eax")); # remaining half of *userKey
2341 &$movekey (&QWP(-32,$key),"xmm0"); # round 0
2342 &$movekey (&QWP(-16,$key),"xmm2"); # round 1
2369 &$movekey (&QWP(0,$key),"xmm0");
2376 &$movekey (&QWP(0,$key),"xmm2");
2388 &$movekey (&QWP(0,$key),"xmm0");
2400 &movdqa ("xmm5",&QWP(0x00,"ebx"));
2401 &movdqa ("xmm4",&QWP(0x20,"ebx"));
2403 &movdqu (&QWP(-32,$key),"xmm0");
2405 &movdqu (&QWP(-16,$key),"xmm2");
2421 &movdqu (&QWP(0,$key),"xmm0");
2439 &movdqu (&QWP(16,$key),"xmm2");
2496 &$movekey ("xmm0",&QWP(0,$key)); # just swap
2497 &$movekey ("xmm1",&QWP(0,"eax"));
2498 &$movekey (&QWP(0,"eax"),"xmm0");
2499 &$movekey (&QWP(0,$key),"xmm1");
2504 &$movekey ("xmm0",&QWP(0,$key)); # swap and inverse
2505 &$movekey ("xmm1",&QWP(0,"eax"));
2510 &$movekey (&QWP(16,"eax"),"xmm0");
2511 &$movekey (&QWP(-16,$key),"xmm1");
2515 &$movekey ("xmm0",&QWP(0,$key)); # inverse middle
2517 &$movekey (&QWP(0,$key),"xmm0");