linux_x86_32.s revision 1472:c18cbe5936b8
1# 2# Copyright (c) 2004, 2007, Oracle and/or its affiliates. All rights reserved. 3# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4# 5# This code is free software; you can redistribute it and/or modify it 6# under the terms of the GNU General Public License version 2 only, as 7# published by the Free Software Foundation. 8# 9# This code is distributed in the hope that it will be useful, but WITHOUT 10# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12# version 2 for more details (a copy is included in the LICENSE file that 13# accompanied this code). 14# 15# You should have received a copy of the GNU General Public License version 16# 2 along with this work; if not, write to the Free Software Foundation, 17# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18# 19# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20# or visit www.oracle.com if you need additional information or have any 21# questions. 22# 23 24 25 # NOTE WELL! The _Copy functions are called directly 26 # from server-compiler-generated code via CallLeafNoFP, 27 # which means that they *must* either not use floating 28 # point or use it in the same manner as does the server 29 # compiler. 30 31 .globl _Copy_conjoint_bytes 32 .globl _Copy_arrayof_conjoint_bytes 33 .globl _Copy_conjoint_jshorts_atomic 34 .globl _Copy_arrayof_conjoint_jshorts 35 .globl _Copy_conjoint_jints_atomic 36 .globl _Copy_arrayof_conjoint_jints 37 .globl _Copy_conjoint_jlongs_atomic 38 .globl _mmx_Copy_arrayof_conjoint_jshorts 39 40 .globl _Atomic_cmpxchg_long 41 42 .text 43 44 .globl SafeFetch32, Fetch32PFI, Fetch32Resume 45 .globl SafeFetchN 46 ## TODO: avoid exposing Fetch32PFI and Fetch32Resume. 47 ## Instead, the signal handler would call a new SafeFetchTriage(FaultingEIP) 48 ## routine to vet the address. If the address is the faulting LD then 49 ## SafeFetchTriage() would return the resume-at EIP, otherwise null. 50 .type SafeFetch32,@function 51 .p2align 4,,15 52SafeFetch32: 53SafeFetchN: 54 movl 0x8(%esp), %eax 55 movl 0x4(%esp), %ecx 56Fetch32PFI: 57 movl (%ecx), %eax 58Fetch32Resume: 59 ret 60 61 62 .globl SpinPause 63 .type SpinPause,@function 64 .p2align 4,,15 65SpinPause: 66 rep 67 nop 68 movl $1, %eax 69 ret 70 71 # Support for void Copy::conjoint_bytes(void* from, 72 # void* to, 73 # size_t count) 74 .p2align 4,,15 75 .type _Copy_conjoint_bytes,@function 76_Copy_conjoint_bytes: 77 pushl %esi 78 movl 4+12(%esp),%ecx # count 79 pushl %edi 80 movl 8+ 4(%esp),%esi # from 81 movl 8+ 8(%esp),%edi # to 82 cmpl %esi,%edi 83 leal -1(%esi,%ecx),%eax # from + count - 1 84 jbe cb_CopyRight 85 cmpl %eax,%edi 86 jbe cb_CopyLeft 87 # copy from low to high 88cb_CopyRight: 89 cmpl $3,%ecx 90 jbe 5f # <= 3 bytes 91 # align source address at dword address boundary 92 movl %ecx,%eax # original count 93 movl $4,%ecx 94 subl %esi,%ecx 95 andl $3,%ecx # prefix byte count 96 jz 1f # no prefix 97 subl %ecx,%eax # byte count less prefix 98 # copy prefix 99 subl %esi,%edi 1000: movb (%esi),%dl 101 movb %dl,(%edi,%esi,1) 102 addl $1,%esi 103 subl $1,%ecx 104 jnz 0b 105 addl %esi,%edi 1061: movl %eax,%ecx # byte count less prefix 107 shrl $2,%ecx # dword count 108 jz 4f # no dwords to move 109 cmpl $32,%ecx 110 jbe 2f # <= 32 dwords 111 # copy aligned dwords 112 rep; smovl 113 jmp 4f 114 # copy aligned dwords 1152: subl %esi,%edi 116 .p2align 4,,15 1173: movl (%esi),%edx 118 movl %edx,(%edi,%esi,1) 119 addl $4,%esi 120 subl $1,%ecx 121 jnz 3b 122 addl %esi,%edi 1234: movl %eax,%ecx # byte count less prefix 124 andl $3,%ecx # suffix byte count 125 jz 7f # no suffix 126 # copy suffix 1275: xorl %eax,%eax 1286: movb (%esi,%eax,1),%dl 129 movb %dl,(%edi,%eax,1) 130 addl $1,%eax 131 subl $1,%ecx 132 jnz 6b 1337: popl %edi 134 popl %esi 135 ret 136 # copy from high to low 137cb_CopyLeft: 138 std 139 leal -4(%edi,%ecx),%edi # to + count - 4 140 movl %eax,%esi # from + count - 1 141 movl %ecx,%eax 142 subl $3,%esi # from + count - 4 143 cmpl $3,%ecx 144 jbe 5f # <= 3 bytes 1451: shrl $2,%ecx # dword count 146 jz 4f # no dwords to move 147 cmpl $32,%ecx 148 ja 3f # > 32 dwords 149 # copy dwords, aligned or not 150 subl %esi,%edi 151 .p2align 4,,15 1522: movl (%esi),%edx 153 movl %edx,(%edi,%esi,1) 154 subl $4,%esi 155 subl $1,%ecx 156 jnz 2b 157 addl %esi,%edi 158 jmp 4f 159 # copy dwords, aligned or not 1603: rep; smovl 1614: movl %eax,%ecx # byte count 162 andl $3,%ecx # suffix byte count 163 jz 7f # no suffix 164 # copy suffix 1655: subl %esi,%edi 166 addl $3,%esi 1676: movb (%esi),%dl 168 movb %dl,(%edi,%esi,1) 169 subl $1,%esi 170 subl $1,%ecx 171 jnz 6b 1727: cld 173 popl %edi 174 popl %esi 175 ret 176 177 # Support for void Copy::arrayof_conjoint_bytes(void* from, 178 # void* to, 179 # size_t count) 180 # 181 # Same as _Copy_conjoint_bytes, except no source alignment check. 182 .p2align 4,,15 183 .type _Copy_arrayof_conjoint_bytes,@function 184_Copy_arrayof_conjoint_bytes: 185 pushl %esi 186 movl 4+12(%esp),%ecx # count 187 pushl %edi 188 movl 8+ 4(%esp),%esi # from 189 movl 8+ 8(%esp),%edi # to 190 cmpl %esi,%edi 191 leal -1(%esi,%ecx),%eax # from + count - 1 192 jbe acb_CopyRight 193 cmpl %eax,%edi 194 jbe acb_CopyLeft 195 # copy from low to high 196acb_CopyRight: 197 cmpl $3,%ecx 198 jbe 5f 1991: movl %ecx,%eax 200 shrl $2,%ecx 201 jz 4f 202 cmpl $32,%ecx 203 ja 3f 204 # copy aligned dwords 205 subl %esi,%edi 206 .p2align 4,,15 2072: movl (%esi),%edx 208 movl %edx,(%edi,%esi,1) 209 addl $4,%esi 210 subl $1,%ecx 211 jnz 2b 212 addl %esi,%edi 213 jmp 4f 214 # copy aligned dwords 2153: rep; smovl 2164: movl %eax,%ecx 217 andl $3,%ecx 218 jz 7f 219 # copy suffix 2205: xorl %eax,%eax 2216: movb (%esi,%eax,1),%dl 222 movb %dl,(%edi,%eax,1) 223 addl $1,%eax 224 subl $1,%ecx 225 jnz 6b 2267: popl %edi 227 popl %esi 228 ret 229acb_CopyLeft: 230 std 231 leal -4(%edi,%ecx),%edi # to + count - 4 232 movl %eax,%esi # from + count - 1 233 movl %ecx,%eax 234 subl $3,%esi # from + count - 4 235 cmpl $3,%ecx 236 jbe 5f 2371: shrl $2,%ecx 238 jz 4f 239 cmpl $32,%ecx 240 jbe 2f # <= 32 dwords 241 rep; smovl 242 jmp 4f 243 .=.+8 2442: subl %esi,%edi 245 .p2align 4,,15 2463: movl (%esi),%edx 247 movl %edx,(%edi,%esi,1) 248 subl $4,%esi 249 subl $1,%ecx 250 jnz 3b 251 addl %esi,%edi 2524: movl %eax,%ecx 253 andl $3,%ecx 254 jz 7f 2555: subl %esi,%edi 256 addl $3,%esi 2576: movb (%esi),%dl 258 movb %dl,(%edi,%esi,1) 259 subl $1,%esi 260 subl $1,%ecx 261 jnz 6b 2627: cld 263 popl %edi 264 popl %esi 265 ret 266 267 # Support for void Copy::conjoint_jshorts_atomic(void* from, 268 # void* to, 269 # size_t count) 270 .p2align 4,,15 271 .type _Copy_conjoint_jshorts_atomic,@function 272_Copy_conjoint_jshorts_atomic: 273 pushl %esi 274 movl 4+12(%esp),%ecx # count 275 pushl %edi 276 movl 8+ 4(%esp),%esi # from 277 movl 8+ 8(%esp),%edi # to 278 cmpl %esi,%edi 279 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 280 jbe cs_CopyRight 281 cmpl %eax,%edi 282 jbe cs_CopyLeft 283 # copy from low to high 284cs_CopyRight: 285 # align source address at dword address boundary 286 movl %esi,%eax # original from 287 andl $3,%eax # either 0 or 2 288 jz 1f # no prefix 289 # copy prefix 290 movw (%esi),%dx 291 movw %dx,(%edi) 292 addl %eax,%esi # %eax == 2 293 addl %eax,%edi 294 subl $1,%ecx 2951: movl %ecx,%eax # word count less prefix 296 sarl %ecx # dword count 297 jz 4f # no dwords to move 298 cmpl $32,%ecx 299 jbe 2f # <= 32 dwords 300 # copy aligned dwords 301 rep; smovl 302 jmp 4f 303 # copy aligned dwords 3042: subl %esi,%edi 305 .p2align 4,,15 3063: movl (%esi),%edx 307 movl %edx,(%edi,%esi,1) 308 addl $4,%esi 309 subl $1,%ecx 310 jnz 3b 311 addl %esi,%edi 3124: andl $1,%eax # suffix count 313 jz 5f # no suffix 314 # copy suffix 315 movw (%esi),%dx 316 movw %dx,(%edi) 3175: popl %edi 318 popl %esi 319 ret 320 # copy from high to low 321cs_CopyLeft: 322 std 323 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 324 movl %eax,%esi # from + count*2 - 2 325 movl %ecx,%eax 326 subl $2,%esi # from + count*2 - 4 3271: sarl %ecx # dword count 328 jz 4f # no dwords to move 329 cmpl $32,%ecx 330 ja 3f # > 32 dwords 331 subl %esi,%edi 332 .p2align 4,,15 3332: movl (%esi),%edx 334 movl %edx,(%edi,%esi,1) 335 subl $4,%esi 336 subl $1,%ecx 337 jnz 2b 338 addl %esi,%edi 339 jmp 4f 3403: rep; smovl 3414: andl $1,%eax # suffix count 342 jz 5f # no suffix 343 # copy suffix 344 addl $2,%esi 345 addl $2,%edi 346 movw (%esi),%dx 347 movw %dx,(%edi) 3485: cld 349 popl %edi 350 popl %esi 351 ret 352 353 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 354 # void* to, 355 # size_t count) 356 .p2align 4,,15 357 .type _Copy_arrayof_conjoint_jshorts,@function 358_Copy_arrayof_conjoint_jshorts: 359 pushl %esi 360 movl 4+12(%esp),%ecx # count 361 pushl %edi 362 movl 8+ 4(%esp),%esi # from 363 movl 8+ 8(%esp),%edi # to 364 cmpl %esi,%edi 365 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 366 jbe acs_CopyRight 367 cmpl %eax,%edi 368 jbe acs_CopyLeft 369acs_CopyRight: 370 movl %ecx,%eax # word count 371 sarl %ecx # dword count 372 jz 4f # no dwords to move 373 cmpl $32,%ecx 374 jbe 2f # <= 32 dwords 375 # copy aligned dwords 376 rep; smovl 377 jmp 4f 378 # copy aligned dwords 379 .=.+5 3802: subl %esi,%edi 381 .p2align 4,,15 3823: movl (%esi),%edx 383 movl %edx,(%edi,%esi,1) 384 addl $4,%esi 385 subl $1,%ecx 386 jnz 3b 387 addl %esi,%edi 3884: andl $1,%eax # suffix count 389 jz 5f # no suffix 390 # copy suffix 391 movw (%esi),%dx 392 movw %dx,(%edi) 3935: popl %edi 394 popl %esi 395 ret 396acs_CopyLeft: 397 std 398 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 399 movl %eax,%esi # from + count*2 - 2 400 movl %ecx,%eax 401 subl $2,%esi # from + count*2 - 4 402 sarl %ecx # dword count 403 jz 4f # no dwords to move 404 cmpl $32,%ecx 405 ja 3f # > 32 dwords 406 subl %esi,%edi 407 .p2align 4,,15 4082: movl (%esi),%edx 409 movl %edx,(%edi,%esi,1) 410 subl $4,%esi 411 subl $1,%ecx 412 jnz 2b 413 addl %esi,%edi 414 jmp 4f 4153: rep; smovl 4164: andl $1,%eax # suffix count 417 jz 5f # no suffix 418 # copy suffix 419 addl $2,%esi 420 addl $2,%edi 421 movw (%esi),%dx 422 movw %dx,(%edi) 4235: cld 424 popl %edi 425 popl %esi 426 ret 427 428 # Support for void Copy::conjoint_jints_atomic(void* from, 429 # void* to, 430 # size_t count) 431 # Equivalent to 432 # arrayof_conjoint_jints 433 .p2align 4,,15 434 .type _Copy_conjoint_jints_atomic,@function 435 .type _Copy_arrayof_conjoint_jints,@function 436_Copy_conjoint_jints_atomic: 437_Copy_arrayof_conjoint_jints: 438 pushl %esi 439 movl 4+12(%esp),%ecx # count 440 pushl %edi 441 movl 8+ 4(%esp),%esi # from 442 movl 8+ 8(%esp),%edi # to 443 cmpl %esi,%edi 444 leal -4(%esi,%ecx,4),%eax # from + count*4 - 4 445 jbe ci_CopyRight 446 cmpl %eax,%edi 447 jbe ci_CopyLeft 448ci_CopyRight: 449 cmpl $32,%ecx 450 jbe 2f # <= 32 dwords 451 rep; smovl 452 popl %edi 453 popl %esi 454 ret 455 .=.+10 4562: subl %esi,%edi 457 .p2align 4,,15 4583: movl (%esi),%edx 459 movl %edx,(%edi,%esi,1) 460 addl $4,%esi 461 subl $1,%ecx 462 jnz 3b 463 popl %edi 464 popl %esi 465 ret 466ci_CopyLeft: 467 std 468 leal -4(%edi,%ecx,4),%edi # to + count*4 - 4 469 cmpl $32,%ecx 470 ja 3f # > 32 dwords 471 subl %eax,%edi # eax == from + count*4 - 4 472 .p2align 4,,15 4732: movl (%eax),%edx 474 movl %edx,(%edi,%eax,1) 475 subl $4,%eax 476 subl $1,%ecx 477 jnz 2b 478 cld 479 popl %edi 480 popl %esi 481 ret 4823: movl %eax,%esi # from + count*4 - 4 483 rep; smovl 484 cld 485 popl %edi 486 popl %esi 487 ret 488 489 # Support for void Copy::conjoint_jlongs_atomic(jlong* from, 490 # jlong* to, 491 # size_t count) 492 # 493 # 32-bit 494 # 495 # count treated as signed 496 # 497 # if (from > to) { 498 # while (--count >= 0) { 499 # *to++ = *from++; 500 # } 501 # } else { 502 # while (--count >= 0) { 503 # to[count] = from[count]; 504 # } 505 # } 506 .p2align 4,,15 507 .type _Copy_conjoint_jlongs_atomic,@function 508_Copy_conjoint_jlongs_atomic: 509 movl 4+8(%esp),%ecx # count 510 movl 4+0(%esp),%eax # from 511 movl 4+4(%esp),%edx # to 512 cmpl %eax,%edx 513 jae cla_CopyLeft 514cla_CopyRight: 515 subl %eax,%edx 516 jmp 2f 517 .p2align 4,,15 5181: fildll (%eax) 519 fistpll (%edx,%eax,1) 520 addl $8,%eax 5212: subl $1,%ecx 522 jge 1b 523 ret 524 .p2align 4,,15 5253: fildll (%eax,%ecx,8) 526 fistpll (%edx,%ecx,8) 527cla_CopyLeft: 528 subl $1,%ecx 529 jge 3b 530 ret 531 532 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 533 # void* to, 534 # size_t count) 535 .p2align 4,,15 536 .type _mmx_Copy_arrayof_conjoint_jshorts,@function 537_mmx_Copy_arrayof_conjoint_jshorts: 538 pushl %esi 539 movl 4+12(%esp),%ecx 540 pushl %edi 541 movl 8+ 4(%esp),%esi 542 movl 8+ 8(%esp),%edi 543 cmpl %esi,%edi 544 leal -2(%esi,%ecx,2),%eax 545 jbe mmx_acs_CopyRight 546 cmpl %eax,%edi 547 jbe mmx_acs_CopyLeft 548mmx_acs_CopyRight: 549 movl %ecx,%eax 550 sarl %ecx 551 je 5f 552 cmpl $33,%ecx 553 jae 3f 5541: subl %esi,%edi 555 .p2align 4,,15 5562: movl (%esi),%edx 557 movl %edx,(%edi,%esi,1) 558 addl $4,%esi 559 subl $1,%ecx 560 jnz 2b 561 addl %esi,%edi 562 jmp 5f 5633: smovl # align to 8 bytes, we know we are 4 byte aligned to start 564 subl $1,%ecx 5654: .p2align 4,,15 566 movq 0(%esi),%mm0 567 addl $64,%edi 568 movq 8(%esi),%mm1 569 subl $16,%ecx 570 movq 16(%esi),%mm2 571 movq %mm0,-64(%edi) 572 movq 24(%esi),%mm0 573 movq %mm1,-56(%edi) 574 movq 32(%esi),%mm1 575 movq %mm2,-48(%edi) 576 movq 40(%esi),%mm2 577 movq %mm0,-40(%edi) 578 movq 48(%esi),%mm0 579 movq %mm1,-32(%edi) 580 movq 56(%esi),%mm1 581 movq %mm2,-24(%edi) 582 movq %mm0,-16(%edi) 583 addl $64,%esi 584 movq %mm1,-8(%edi) 585 cmpl $16,%ecx 586 jge 4b 587 emms 588 testl %ecx,%ecx 589 ja 1b 5905: andl $1,%eax 591 je 7f 5926: movw (%esi),%dx 593 movw %dx,(%edi) 5947: popl %edi 595 popl %esi 596 ret 597mmx_acs_CopyLeft: 598 std 599 leal -4(%edi,%ecx,2),%edi 600 movl %eax,%esi 601 movl %ecx,%eax 602 subl $2,%esi 603 sarl %ecx 604 je 4f 605 cmpl $32,%ecx 606 ja 3f 607 subl %esi,%edi 608 .p2align 4,,15 6092: movl (%esi),%edx 610 movl %edx,(%edi,%esi,1) 611 subl $4,%esi 612 subl $1,%ecx 613 jnz 2b 614 addl %esi,%edi 615 jmp 4f 6163: rep; smovl 6174: andl $1,%eax 618 je 6f 619 addl $2,%esi 620 addl $2,%edi 6215: movw (%esi),%dx 622 movw %dx,(%edi) 6236: cld 624 popl %edi 625 popl %esi 626 ret 627 628 629 # Support for jlong Atomic::cmpxchg(jlong exchange_value, 630 # volatile jlong* dest, 631 # jlong compare_value, 632 # bool is_MP) 633 # 634 .p2align 4,,15 635 .type _Atomic_cmpxchg_long,@function 636_Atomic_cmpxchg_long: 637 # 8(%esp) : return PC 638 pushl %ebx # 4(%esp) : old %ebx 639 pushl %edi # 0(%esp) : old %edi 640 movl 12(%esp), %ebx # 12(%esp) : exchange_value (low) 641 movl 16(%esp), %ecx # 16(%esp) : exchange_value (high) 642 movl 24(%esp), %eax # 24(%esp) : compare_value (low) 643 movl 28(%esp), %edx # 28(%esp) : compare_value (high) 644 movl 20(%esp), %edi # 20(%esp) : dest 645 cmpl $0, 32(%esp) # 32(%esp) : is_MP 646 je 1f 647 lock 6481: cmpxchg8b (%edi) 649 popl %edi 650 popl %ebx 651 ret 652 653