linux_x86_32.s revision 4795:2cb5d5f6d5e5
1# 2# Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved. 3# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4# 5# This code is free software; you can redistribute it and/or modify it 6# under the terms of the GNU General Public License version 2 only, as 7# published by the Free Software Foundation. 8# 9# This code is distributed in the hope that it will be useful, but WITHOUT 10# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12# version 2 for more details (a copy is included in the LICENSE file that 13# accompanied this code). 14# 15# You should have received a copy of the GNU General Public License version 16# 2 along with this work; if not, write to the Free Software Foundation, 17# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18# 19# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20# or visit www.oracle.com if you need additional information or have any 21# questions. 22# 23 24 25 # NOTE WELL! The _Copy functions are called directly 26 # from server-compiler-generated code via CallLeafNoFP, 27 # which means that they *must* either not use floating 28 # point or use it in the same manner as does the server 29 # compiler. 30 31 .globl _Copy_conjoint_bytes 32 .globl _Copy_arrayof_conjoint_bytes 33 .globl _Copy_conjoint_jshorts_atomic 34 .globl _Copy_arrayof_conjoint_jshorts 35 .globl _Copy_conjoint_jints_atomic 36 .globl _Copy_arrayof_conjoint_jints 37 .globl _Copy_conjoint_jlongs_atomic 38 .globl _mmx_Copy_arrayof_conjoint_jshorts 39 40 .globl _Atomic_cmpxchg_long 41 .globl _Atomic_move_long 42 43 .text 44 45 .globl SafeFetch32, Fetch32PFI, Fetch32Resume 46 .globl SafeFetchN 47 ## TODO: avoid exposing Fetch32PFI and Fetch32Resume. 48 ## Instead, the signal handler would call a new SafeFetchTriage(FaultingEIP) 49 ## routine to vet the address. If the address is the faulting LD then 50 ## SafeFetchTriage() would return the resume-at EIP, otherwise null. 51 .type SafeFetch32,@function 52 .p2align 4,,15 53SafeFetch32: 54SafeFetchN: 55 movl 0x8(%esp), %eax 56 movl 0x4(%esp), %ecx 57Fetch32PFI: 58 movl (%ecx), %eax 59Fetch32Resume: 60 ret 61 62 63 .globl SpinPause 64 .type SpinPause,@function 65 .p2align 4,,15 66SpinPause: 67 rep 68 nop 69 movl $1, %eax 70 ret 71 72 # Support for void Copy::conjoint_bytes(void* from, 73 # void* to, 74 # size_t count) 75 .p2align 4,,15 76 .type _Copy_conjoint_bytes,@function 77_Copy_conjoint_bytes: 78 pushl %esi 79 movl 4+12(%esp),%ecx # count 80 pushl %edi 81 movl 8+ 4(%esp),%esi # from 82 movl 8+ 8(%esp),%edi # to 83 cmpl %esi,%edi 84 leal -1(%esi,%ecx),%eax # from + count - 1 85 jbe cb_CopyRight 86 cmpl %eax,%edi 87 jbe cb_CopyLeft 88 # copy from low to high 89cb_CopyRight: 90 cmpl $3,%ecx 91 jbe 5f # <= 3 bytes 92 # align source address at dword address boundary 93 movl %ecx,%eax # original count 94 movl $4,%ecx 95 subl %esi,%ecx 96 andl $3,%ecx # prefix byte count 97 jz 1f # no prefix 98 subl %ecx,%eax # byte count less prefix 99 # copy prefix 100 subl %esi,%edi 1010: movb (%esi),%dl 102 movb %dl,(%edi,%esi,1) 103 addl $1,%esi 104 subl $1,%ecx 105 jnz 0b 106 addl %esi,%edi 1071: movl %eax,%ecx # byte count less prefix 108 shrl $2,%ecx # dword count 109 jz 4f # no dwords to move 110 cmpl $32,%ecx 111 jbe 2f # <= 32 dwords 112 # copy aligned dwords 113 rep; smovl 114 jmp 4f 115 # copy aligned dwords 1162: subl %esi,%edi 117 .p2align 4,,15 1183: movl (%esi),%edx 119 movl %edx,(%edi,%esi,1) 120 addl $4,%esi 121 subl $1,%ecx 122 jnz 3b 123 addl %esi,%edi 1244: movl %eax,%ecx # byte count less prefix 1255: andl $3,%ecx # suffix byte count 126 jz 7f # no suffix 127 # copy suffix 128 xorl %eax,%eax 1296: movb (%esi,%eax,1),%dl 130 movb %dl,(%edi,%eax,1) 131 addl $1,%eax 132 subl $1,%ecx 133 jnz 6b 1347: popl %edi 135 popl %esi 136 ret 137 # copy from high to low 138cb_CopyLeft: 139 std 140 leal -4(%edi,%ecx),%edi # to + count - 4 141 movl %eax,%esi # from + count - 1 142 movl %ecx,%eax 143 subl $3,%esi # from + count - 4 144 cmpl $3,%ecx 145 jbe 5f # <= 3 bytes 1461: shrl $2,%ecx # dword count 147 jz 4f # no dwords to move 148 cmpl $32,%ecx 149 ja 3f # > 32 dwords 150 # copy dwords, aligned or not 151 subl %esi,%edi 152 .p2align 4,,15 1532: movl (%esi),%edx 154 movl %edx,(%edi,%esi,1) 155 subl $4,%esi 156 subl $1,%ecx 157 jnz 2b 158 addl %esi,%edi 159 jmp 4f 160 # copy dwords, aligned or not 1613: rep; smovl 1624: movl %eax,%ecx # byte count 1635: andl $3,%ecx # suffix byte count 164 jz 7f # no suffix 165 # copy suffix 166 subl %esi,%edi 167 addl $3,%esi 1686: movb (%esi),%dl 169 movb %dl,(%edi,%esi,1) 170 subl $1,%esi 171 subl $1,%ecx 172 jnz 6b 1737: cld 174 popl %edi 175 popl %esi 176 ret 177 178 # Support for void Copy::arrayof_conjoint_bytes(void* from, 179 # void* to, 180 # size_t count) 181 # 182 # Same as _Copy_conjoint_bytes, except no source alignment check. 183 .p2align 4,,15 184 .type _Copy_arrayof_conjoint_bytes,@function 185_Copy_arrayof_conjoint_bytes: 186 pushl %esi 187 movl 4+12(%esp),%ecx # count 188 pushl %edi 189 movl 8+ 4(%esp),%esi # from 190 movl 8+ 8(%esp),%edi # to 191 cmpl %esi,%edi 192 leal -1(%esi,%ecx),%eax # from + count - 1 193 jbe acb_CopyRight 194 cmpl %eax,%edi 195 jbe acb_CopyLeft 196 # copy from low to high 197acb_CopyRight: 198 cmpl $3,%ecx 199 jbe 5f 2001: movl %ecx,%eax 201 shrl $2,%ecx 202 jz 4f 203 cmpl $32,%ecx 204 ja 3f 205 # copy aligned dwords 206 subl %esi,%edi 207 .p2align 4,,15 2082: movl (%esi),%edx 209 movl %edx,(%edi,%esi,1) 210 addl $4,%esi 211 subl $1,%ecx 212 jnz 2b 213 addl %esi,%edi 214 jmp 4f 215 # copy aligned dwords 2163: rep; smovl 2174: movl %eax,%ecx 2185: andl $3,%ecx 219 jz 7f 220 # copy suffix 221 xorl %eax,%eax 2226: movb (%esi,%eax,1),%dl 223 movb %dl,(%edi,%eax,1) 224 addl $1,%eax 225 subl $1,%ecx 226 jnz 6b 2277: popl %edi 228 popl %esi 229 ret 230acb_CopyLeft: 231 std 232 leal -4(%edi,%ecx),%edi # to + count - 4 233 movl %eax,%esi # from + count - 1 234 movl %ecx,%eax 235 subl $3,%esi # from + count - 4 236 cmpl $3,%ecx 237 jbe 5f 2381: shrl $2,%ecx 239 jz 4f 240 cmpl $32,%ecx 241 jbe 2f # <= 32 dwords 242 rep; smovl 243 jmp 4f 244 .space 8 2452: subl %esi,%edi 246 .p2align 4,,15 2473: movl (%esi),%edx 248 movl %edx,(%edi,%esi,1) 249 subl $4,%esi 250 subl $1,%ecx 251 jnz 3b 252 addl %esi,%edi 2534: movl %eax,%ecx 2545: andl $3,%ecx 255 jz 7f 256 subl %esi,%edi 257 addl $3,%esi 2586: movb (%esi),%dl 259 movb %dl,(%edi,%esi,1) 260 subl $1,%esi 261 subl $1,%ecx 262 jnz 6b 2637: cld 264 popl %edi 265 popl %esi 266 ret 267 268 # Support for void Copy::conjoint_jshorts_atomic(void* from, 269 # void* to, 270 # size_t count) 271 .p2align 4,,15 272 .type _Copy_conjoint_jshorts_atomic,@function 273_Copy_conjoint_jshorts_atomic: 274 pushl %esi 275 movl 4+12(%esp),%ecx # count 276 pushl %edi 277 movl 8+ 4(%esp),%esi # from 278 movl 8+ 8(%esp),%edi # to 279 cmpl %esi,%edi 280 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 281 jbe cs_CopyRight 282 cmpl %eax,%edi 283 jbe cs_CopyLeft 284 # copy from low to high 285cs_CopyRight: 286 # align source address at dword address boundary 287 movl %esi,%eax # original from 288 andl $3,%eax # either 0 or 2 289 jz 1f # no prefix 290 # copy prefix 291 subl $1,%ecx 292 jl 5f # zero count 293 movw (%esi),%dx 294 movw %dx,(%edi) 295 addl %eax,%esi # %eax == 2 296 addl %eax,%edi 2971: movl %ecx,%eax # word count less prefix 298 sarl %ecx # dword count 299 jz 4f # no dwords to move 300 cmpl $32,%ecx 301 jbe 2f # <= 32 dwords 302 # copy aligned dwords 303 rep; smovl 304 jmp 4f 305 # copy aligned dwords 3062: subl %esi,%edi 307 .p2align 4,,15 3083: movl (%esi),%edx 309 movl %edx,(%edi,%esi,1) 310 addl $4,%esi 311 subl $1,%ecx 312 jnz 3b 313 addl %esi,%edi 3144: andl $1,%eax # suffix count 315 jz 5f # no suffix 316 # copy suffix 317 movw (%esi),%dx 318 movw %dx,(%edi) 3195: popl %edi 320 popl %esi 321 ret 322 # copy from high to low 323cs_CopyLeft: 324 std 325 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 326 movl %eax,%esi # from + count*2 - 2 327 movl %ecx,%eax 328 subl $2,%esi # from + count*2 - 4 3291: sarl %ecx # dword count 330 jz 4f # no dwords to move 331 cmpl $32,%ecx 332 ja 3f # > 32 dwords 333 subl %esi,%edi 334 .p2align 4,,15 3352: movl (%esi),%edx 336 movl %edx,(%edi,%esi,1) 337 subl $4,%esi 338 subl $1,%ecx 339 jnz 2b 340 addl %esi,%edi 341 jmp 4f 3423: rep; smovl 3434: andl $1,%eax # suffix count 344 jz 5f # no suffix 345 # copy suffix 346 addl $2,%esi 347 addl $2,%edi 348 movw (%esi),%dx 349 movw %dx,(%edi) 3505: cld 351 popl %edi 352 popl %esi 353 ret 354 355 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 356 # void* to, 357 # size_t count) 358 .p2align 4,,15 359 .type _Copy_arrayof_conjoint_jshorts,@function 360_Copy_arrayof_conjoint_jshorts: 361 pushl %esi 362 movl 4+12(%esp),%ecx # count 363 pushl %edi 364 movl 8+ 4(%esp),%esi # from 365 movl 8+ 8(%esp),%edi # to 366 cmpl %esi,%edi 367 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 368 jbe acs_CopyRight 369 cmpl %eax,%edi 370 jbe acs_CopyLeft 371acs_CopyRight: 372 movl %ecx,%eax # word count 373 sarl %ecx # dword count 374 jz 4f # no dwords to move 375 cmpl $32,%ecx 376 jbe 2f # <= 32 dwords 377 # copy aligned dwords 378 rep; smovl 379 jmp 4f 380 # copy aligned dwords 381 .space 5 3822: subl %esi,%edi 383 .p2align 4,,15 3843: movl (%esi),%edx 385 movl %edx,(%edi,%esi,1) 386 addl $4,%esi 387 subl $1,%ecx 388 jnz 3b 389 addl %esi,%edi 3904: andl $1,%eax # suffix count 391 jz 5f # no suffix 392 # copy suffix 393 movw (%esi),%dx 394 movw %dx,(%edi) 3955: popl %edi 396 popl %esi 397 ret 398acs_CopyLeft: 399 std 400 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 401 movl %eax,%esi # from + count*2 - 2 402 movl %ecx,%eax 403 subl $2,%esi # from + count*2 - 4 404 sarl %ecx # dword count 405 jz 4f # no dwords to move 406 cmpl $32,%ecx 407 ja 3f # > 32 dwords 408 subl %esi,%edi 409 .p2align 4,,15 4102: movl (%esi),%edx 411 movl %edx,(%edi,%esi,1) 412 subl $4,%esi 413 subl $1,%ecx 414 jnz 2b 415 addl %esi,%edi 416 jmp 4f 4173: rep; smovl 4184: andl $1,%eax # suffix count 419 jz 5f # no suffix 420 # copy suffix 421 addl $2,%esi 422 addl $2,%edi 423 movw (%esi),%dx 424 movw %dx,(%edi) 4255: cld 426 popl %edi 427 popl %esi 428 ret 429 430 # Support for void Copy::conjoint_jints_atomic(void* from, 431 # void* to, 432 # size_t count) 433 # Equivalent to 434 # arrayof_conjoint_jints 435 .p2align 4,,15 436 .type _Copy_conjoint_jints_atomic,@function 437 .type _Copy_arrayof_conjoint_jints,@function 438_Copy_conjoint_jints_atomic: 439_Copy_arrayof_conjoint_jints: 440 pushl %esi 441 movl 4+12(%esp),%ecx # count 442 pushl %edi 443 movl 8+ 4(%esp),%esi # from 444 movl 8+ 8(%esp),%edi # to 445 cmpl %esi,%edi 446 leal -4(%esi,%ecx,4),%eax # from + count*4 - 4 447 jbe ci_CopyRight 448 cmpl %eax,%edi 449 jbe ci_CopyLeft 450ci_CopyRight: 451 cmpl $32,%ecx 452 jbe 2f # <= 32 dwords 453 rep; smovl 454 popl %edi 455 popl %esi 456 ret 457 .space 10 4582: subl %esi,%edi 459 jmp 4f 460 .p2align 4,,15 4613: movl (%esi),%edx 462 movl %edx,(%edi,%esi,1) 463 addl $4,%esi 4644: subl $1,%ecx 465 jge 3b 466 popl %edi 467 popl %esi 468 ret 469ci_CopyLeft: 470 std 471 leal -4(%edi,%ecx,4),%edi # to + count*4 - 4 472 cmpl $32,%ecx 473 ja 4f # > 32 dwords 474 subl %eax,%edi # eax == from + count*4 - 4 475 jmp 3f 476 .p2align 4,,15 4772: movl (%eax),%edx 478 movl %edx,(%edi,%eax,1) 479 subl $4,%eax 4803: subl $1,%ecx 481 jge 2b 482 cld 483 popl %edi 484 popl %esi 485 ret 4864: movl %eax,%esi # from + count*4 - 4 487 rep; smovl 488 cld 489 popl %edi 490 popl %esi 491 ret 492 493 # Support for void Copy::conjoint_jlongs_atomic(jlong* from, 494 # jlong* to, 495 # size_t count) 496 # 497 # 32-bit 498 # 499 # count treated as signed 500 # 501 # if (from > to) { 502 # while (--count >= 0) { 503 # *to++ = *from++; 504 # } 505 # } else { 506 # while (--count >= 0) { 507 # to[count] = from[count]; 508 # } 509 # } 510 .p2align 4,,15 511 .type _Copy_conjoint_jlongs_atomic,@function 512_Copy_conjoint_jlongs_atomic: 513 movl 4+8(%esp),%ecx # count 514 movl 4+0(%esp),%eax # from 515 movl 4+4(%esp),%edx # to 516 cmpl %eax,%edx 517 jae cla_CopyLeft 518cla_CopyRight: 519 subl %eax,%edx 520 jmp 2f 521 .p2align 4,,15 5221: fildll (%eax) 523 fistpll (%edx,%eax,1) 524 addl $8,%eax 5252: subl $1,%ecx 526 jge 1b 527 ret 528 .p2align 4,,15 5293: fildll (%eax,%ecx,8) 530 fistpll (%edx,%ecx,8) 531cla_CopyLeft: 532 subl $1,%ecx 533 jge 3b 534 ret 535 536 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 537 # void* to, 538 # size_t count) 539 .p2align 4,,15 540 .type _mmx_Copy_arrayof_conjoint_jshorts,@function 541_mmx_Copy_arrayof_conjoint_jshorts: 542 pushl %esi 543 movl 4+12(%esp),%ecx 544 pushl %edi 545 movl 8+ 4(%esp),%esi 546 movl 8+ 8(%esp),%edi 547 cmpl %esi,%edi 548 leal -2(%esi,%ecx,2),%eax 549 jbe mmx_acs_CopyRight 550 cmpl %eax,%edi 551 jbe mmx_acs_CopyLeft 552mmx_acs_CopyRight: 553 movl %ecx,%eax 554 sarl %ecx 555 je 5f 556 cmpl $33,%ecx 557 jae 3f 5581: subl %esi,%edi 559 .p2align 4,,15 5602: movl (%esi),%edx 561 movl %edx,(%edi,%esi,1) 562 addl $4,%esi 563 subl $1,%ecx 564 jnz 2b 565 addl %esi,%edi 566 jmp 5f 5673: smovl # align to 8 bytes, we know we are 4 byte aligned to start 568 subl $1,%ecx 5694: .p2align 4,,15 570 movq 0(%esi),%mm0 571 addl $64,%edi 572 movq 8(%esi),%mm1 573 subl $16,%ecx 574 movq 16(%esi),%mm2 575 movq %mm0,-64(%edi) 576 movq 24(%esi),%mm0 577 movq %mm1,-56(%edi) 578 movq 32(%esi),%mm1 579 movq %mm2,-48(%edi) 580 movq 40(%esi),%mm2 581 movq %mm0,-40(%edi) 582 movq 48(%esi),%mm0 583 movq %mm1,-32(%edi) 584 movq 56(%esi),%mm1 585 movq %mm2,-24(%edi) 586 movq %mm0,-16(%edi) 587 addl $64,%esi 588 movq %mm1,-8(%edi) 589 cmpl $16,%ecx 590 jge 4b 591 emms 592 testl %ecx,%ecx 593 ja 1b 5945: andl $1,%eax 595 je 7f 5966: movw (%esi),%dx 597 movw %dx,(%edi) 5987: popl %edi 599 popl %esi 600 ret 601mmx_acs_CopyLeft: 602 std 603 leal -4(%edi,%ecx,2),%edi 604 movl %eax,%esi 605 movl %ecx,%eax 606 subl $2,%esi 607 sarl %ecx 608 je 4f 609 cmpl $32,%ecx 610 ja 3f 611 subl %esi,%edi 612 .p2align 4,,15 6132: movl (%esi),%edx 614 movl %edx,(%edi,%esi,1) 615 subl $4,%esi 616 subl $1,%ecx 617 jnz 2b 618 addl %esi,%edi 619 jmp 4f 6203: rep; smovl 6214: andl $1,%eax 622 je 6f 623 addl $2,%esi 624 addl $2,%edi 6255: movw (%esi),%dx 626 movw %dx,(%edi) 6276: cld 628 popl %edi 629 popl %esi 630 ret 631 632 633 # Support for jlong Atomic::cmpxchg(jlong exchange_value, 634 # volatile jlong* dest, 635 # jlong compare_value, 636 # bool is_MP) 637 # 638 .p2align 4,,15 639 .type _Atomic_cmpxchg_long,@function 640_Atomic_cmpxchg_long: 641 # 8(%esp) : return PC 642 pushl %ebx # 4(%esp) : old %ebx 643 pushl %edi # 0(%esp) : old %edi 644 movl 12(%esp), %ebx # 12(%esp) : exchange_value (low) 645 movl 16(%esp), %ecx # 16(%esp) : exchange_value (high) 646 movl 24(%esp), %eax # 24(%esp) : compare_value (low) 647 movl 28(%esp), %edx # 28(%esp) : compare_value (high) 648 movl 20(%esp), %edi # 20(%esp) : dest 649 cmpl $0, 32(%esp) # 32(%esp) : is_MP 650 je 1f 651 lock 6521: cmpxchg8b (%edi) 653 popl %edi 654 popl %ebx 655 ret 656 657 658 # Support for jlong Atomic::load and Atomic::store. 659 # void _Atomic_move_long(volatile jlong* src, volatile jlong* dst) 660 .p2align 4,,15 661 .type _Atomic_move_long,@function 662_Atomic_move_long: 663 movl 4(%esp), %eax # src 664 fildll (%eax) 665 movl 8(%esp), %eax # dest 666 fistpll (%eax) 667 ret 668 669