linux_x86_32.s revision 1949:0a8e0d4345b3
1# 2# Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 3# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4# 5# This code is free software; you can redistribute it and/or modify it 6# under the terms of the GNU General Public License version 2 only, as 7# published by the Free Software Foundation. 8# 9# This code is distributed in the hope that it will be useful, but WITHOUT 10# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12# version 2 for more details (a copy is included in the LICENSE file that 13# accompanied this code). 14# 15# You should have received a copy of the GNU General Public License version 16# 2 along with this work; if not, write to the Free Software Foundation, 17# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18# 19# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20# or visit www.oracle.com if you need additional information or have any 21# questions. 22# 23 24 25 # NOTE WELL! The _Copy functions are called directly 26 # from server-compiler-generated code via CallLeafNoFP, 27 # which means that they *must* either not use floating 28 # point or use it in the same manner as does the server 29 # compiler. 30 31 .globl _Copy_conjoint_bytes 32 .globl _Copy_arrayof_conjoint_bytes 33 .globl _Copy_conjoint_jshorts_atomic 34 .globl _Copy_arrayof_conjoint_jshorts 35 .globl _Copy_conjoint_jints_atomic 36 .globl _Copy_arrayof_conjoint_jints 37 .globl _Copy_conjoint_jlongs_atomic 38 .globl _mmx_Copy_arrayof_conjoint_jshorts 39 40 .globl _Atomic_cmpxchg_long 41 42 .text 43 44 .globl SafeFetch32, Fetch32PFI, Fetch32Resume 45 .globl SafeFetchN 46 ## TODO: avoid exposing Fetch32PFI and Fetch32Resume. 47 ## Instead, the signal handler would call a new SafeFetchTriage(FaultingEIP) 48 ## routine to vet the address. If the address is the faulting LD then 49 ## SafeFetchTriage() would return the resume-at EIP, otherwise null. 50 .type SafeFetch32,@function 51 .p2align 4,,15 52SafeFetch32: 53SafeFetchN: 54 movl 0x8(%esp), %eax 55 movl 0x4(%esp), %ecx 56Fetch32PFI: 57 movl (%ecx), %eax 58Fetch32Resume: 59 ret 60 61 62 .globl SpinPause 63 .type SpinPause,@function 64 .p2align 4,,15 65SpinPause: 66 rep 67 nop 68 movl $1, %eax 69 ret 70 71 # Support for void Copy::conjoint_bytes(void* from, 72 # void* to, 73 # size_t count) 74 .p2align 4,,15 75 .type _Copy_conjoint_bytes,@function 76_Copy_conjoint_bytes: 77 pushl %esi 78 movl 4+12(%esp),%ecx # count 79 pushl %edi 80 movl 8+ 4(%esp),%esi # from 81 movl 8+ 8(%esp),%edi # to 82 cmpl %esi,%edi 83 leal -1(%esi,%ecx),%eax # from + count - 1 84 jbe cb_CopyRight 85 cmpl %eax,%edi 86 jbe cb_CopyLeft 87 # copy from low to high 88cb_CopyRight: 89 cmpl $3,%ecx 90 jbe 5f # <= 3 bytes 91 # align source address at dword address boundary 92 movl %ecx,%eax # original count 93 movl $4,%ecx 94 subl %esi,%ecx 95 andl $3,%ecx # prefix byte count 96 jz 1f # no prefix 97 subl %ecx,%eax # byte count less prefix 98 # copy prefix 99 subl %esi,%edi 1000: movb (%esi),%dl 101 movb %dl,(%edi,%esi,1) 102 addl $1,%esi 103 subl $1,%ecx 104 jnz 0b 105 addl %esi,%edi 1061: movl %eax,%ecx # byte count less prefix 107 shrl $2,%ecx # dword count 108 jz 4f # no dwords to move 109 cmpl $32,%ecx 110 jbe 2f # <= 32 dwords 111 # copy aligned dwords 112 rep; smovl 113 jmp 4f 114 # copy aligned dwords 1152: subl %esi,%edi 116 .p2align 4,,15 1173: movl (%esi),%edx 118 movl %edx,(%edi,%esi,1) 119 addl $4,%esi 120 subl $1,%ecx 121 jnz 3b 122 addl %esi,%edi 1234: movl %eax,%ecx # byte count less prefix 1245: andl $3,%ecx # suffix byte count 125 jz 7f # no suffix 126 # copy suffix 127 xorl %eax,%eax 1286: movb (%esi,%eax,1),%dl 129 movb %dl,(%edi,%eax,1) 130 addl $1,%eax 131 subl $1,%ecx 132 jnz 6b 1337: popl %edi 134 popl %esi 135 ret 136 # copy from high to low 137cb_CopyLeft: 138 std 139 leal -4(%edi,%ecx),%edi # to + count - 4 140 movl %eax,%esi # from + count - 1 141 movl %ecx,%eax 142 subl $3,%esi # from + count - 4 143 cmpl $3,%ecx 144 jbe 5f # <= 3 bytes 1451: shrl $2,%ecx # dword count 146 jz 4f # no dwords to move 147 cmpl $32,%ecx 148 ja 3f # > 32 dwords 149 # copy dwords, aligned or not 150 subl %esi,%edi 151 .p2align 4,,15 1522: movl (%esi),%edx 153 movl %edx,(%edi,%esi,1) 154 subl $4,%esi 155 subl $1,%ecx 156 jnz 2b 157 addl %esi,%edi 158 jmp 4f 159 # copy dwords, aligned or not 1603: rep; smovl 1614: movl %eax,%ecx # byte count 1625: andl $3,%ecx # suffix byte count 163 jz 7f # no suffix 164 # copy suffix 165 subl %esi,%edi 166 addl $3,%esi 1676: movb (%esi),%dl 168 movb %dl,(%edi,%esi,1) 169 subl $1,%esi 170 subl $1,%ecx 171 jnz 6b 1727: cld 173 popl %edi 174 popl %esi 175 ret 176 177 # Support for void Copy::arrayof_conjoint_bytes(void* from, 178 # void* to, 179 # size_t count) 180 # 181 # Same as _Copy_conjoint_bytes, except no source alignment check. 182 .p2align 4,,15 183 .type _Copy_arrayof_conjoint_bytes,@function 184_Copy_arrayof_conjoint_bytes: 185 pushl %esi 186 movl 4+12(%esp),%ecx # count 187 pushl %edi 188 movl 8+ 4(%esp),%esi # from 189 movl 8+ 8(%esp),%edi # to 190 cmpl %esi,%edi 191 leal -1(%esi,%ecx),%eax # from + count - 1 192 jbe acb_CopyRight 193 cmpl %eax,%edi 194 jbe acb_CopyLeft 195 # copy from low to high 196acb_CopyRight: 197 cmpl $3,%ecx 198 jbe 5f 1991: movl %ecx,%eax 200 shrl $2,%ecx 201 jz 4f 202 cmpl $32,%ecx 203 ja 3f 204 # copy aligned dwords 205 subl %esi,%edi 206 .p2align 4,,15 2072: movl (%esi),%edx 208 movl %edx,(%edi,%esi,1) 209 addl $4,%esi 210 subl $1,%ecx 211 jnz 2b 212 addl %esi,%edi 213 jmp 4f 214 # copy aligned dwords 2153: rep; smovl 2164: movl %eax,%ecx 2175: andl $3,%ecx 218 jz 7f 219 # copy suffix 220 xorl %eax,%eax 2216: movb (%esi,%eax,1),%dl 222 movb %dl,(%edi,%eax,1) 223 addl $1,%eax 224 subl $1,%ecx 225 jnz 6b 2267: popl %edi 227 popl %esi 228 ret 229acb_CopyLeft: 230 std 231 leal -4(%edi,%ecx),%edi # to + count - 4 232 movl %eax,%esi # from + count - 1 233 movl %ecx,%eax 234 subl $3,%esi # from + count - 4 235 cmpl $3,%ecx 236 jbe 5f 2371: shrl $2,%ecx 238 jz 4f 239 cmpl $32,%ecx 240 jbe 2f # <= 32 dwords 241 rep; smovl 242 jmp 4f 243 .=.+8 2442: subl %esi,%edi 245 .p2align 4,,15 2463: movl (%esi),%edx 247 movl %edx,(%edi,%esi,1) 248 subl $4,%esi 249 subl $1,%ecx 250 jnz 3b 251 addl %esi,%edi 2524: movl %eax,%ecx 2535: andl $3,%ecx 254 jz 7f 255 subl %esi,%edi 256 addl $3,%esi 2576: movb (%esi),%dl 258 movb %dl,(%edi,%esi,1) 259 subl $1,%esi 260 subl $1,%ecx 261 jnz 6b 2627: cld 263 popl %edi 264 popl %esi 265 ret 266 267 # Support for void Copy::conjoint_jshorts_atomic(void* from, 268 # void* to, 269 # size_t count) 270 .p2align 4,,15 271 .type _Copy_conjoint_jshorts_atomic,@function 272_Copy_conjoint_jshorts_atomic: 273 pushl %esi 274 movl 4+12(%esp),%ecx # count 275 pushl %edi 276 movl 8+ 4(%esp),%esi # from 277 movl 8+ 8(%esp),%edi # to 278 cmpl %esi,%edi 279 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 280 jbe cs_CopyRight 281 cmpl %eax,%edi 282 jbe cs_CopyLeft 283 # copy from low to high 284cs_CopyRight: 285 # align source address at dword address boundary 286 movl %esi,%eax # original from 287 andl $3,%eax # either 0 or 2 288 jz 1f # no prefix 289 # copy prefix 290 subl $1,%ecx 291 jl 5f # zero count 292 movw (%esi),%dx 293 movw %dx,(%edi) 294 addl %eax,%esi # %eax == 2 295 addl %eax,%edi 2961: movl %ecx,%eax # word count less prefix 297 sarl %ecx # dword count 298 jz 4f # no dwords to move 299 cmpl $32,%ecx 300 jbe 2f # <= 32 dwords 301 # copy aligned dwords 302 rep; smovl 303 jmp 4f 304 # copy aligned dwords 3052: subl %esi,%edi 306 .p2align 4,,15 3073: movl (%esi),%edx 308 movl %edx,(%edi,%esi,1) 309 addl $4,%esi 310 subl $1,%ecx 311 jnz 3b 312 addl %esi,%edi 3134: andl $1,%eax # suffix count 314 jz 5f # no suffix 315 # copy suffix 316 movw (%esi),%dx 317 movw %dx,(%edi) 3185: popl %edi 319 popl %esi 320 ret 321 # copy from high to low 322cs_CopyLeft: 323 std 324 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 325 movl %eax,%esi # from + count*2 - 2 326 movl %ecx,%eax 327 subl $2,%esi # from + count*2 - 4 3281: sarl %ecx # dword count 329 jz 4f # no dwords to move 330 cmpl $32,%ecx 331 ja 3f # > 32 dwords 332 subl %esi,%edi 333 .p2align 4,,15 3342: movl (%esi),%edx 335 movl %edx,(%edi,%esi,1) 336 subl $4,%esi 337 subl $1,%ecx 338 jnz 2b 339 addl %esi,%edi 340 jmp 4f 3413: rep; smovl 3424: andl $1,%eax # suffix count 343 jz 5f # no suffix 344 # copy suffix 345 addl $2,%esi 346 addl $2,%edi 347 movw (%esi),%dx 348 movw %dx,(%edi) 3495: cld 350 popl %edi 351 popl %esi 352 ret 353 354 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 355 # void* to, 356 # size_t count) 357 .p2align 4,,15 358 .type _Copy_arrayof_conjoint_jshorts,@function 359_Copy_arrayof_conjoint_jshorts: 360 pushl %esi 361 movl 4+12(%esp),%ecx # count 362 pushl %edi 363 movl 8+ 4(%esp),%esi # from 364 movl 8+ 8(%esp),%edi # to 365 cmpl %esi,%edi 366 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 367 jbe acs_CopyRight 368 cmpl %eax,%edi 369 jbe acs_CopyLeft 370acs_CopyRight: 371 movl %ecx,%eax # word count 372 sarl %ecx # dword count 373 jz 4f # no dwords to move 374 cmpl $32,%ecx 375 jbe 2f # <= 32 dwords 376 # copy aligned dwords 377 rep; smovl 378 jmp 4f 379 # copy aligned dwords 380 .=.+5 3812: subl %esi,%edi 382 .p2align 4,,15 3833: movl (%esi),%edx 384 movl %edx,(%edi,%esi,1) 385 addl $4,%esi 386 subl $1,%ecx 387 jnz 3b 388 addl %esi,%edi 3894: andl $1,%eax # suffix count 390 jz 5f # no suffix 391 # copy suffix 392 movw (%esi),%dx 393 movw %dx,(%edi) 3945: popl %edi 395 popl %esi 396 ret 397acs_CopyLeft: 398 std 399 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 400 movl %eax,%esi # from + count*2 - 2 401 movl %ecx,%eax 402 subl $2,%esi # from + count*2 - 4 403 sarl %ecx # dword count 404 jz 4f # no dwords to move 405 cmpl $32,%ecx 406 ja 3f # > 32 dwords 407 subl %esi,%edi 408 .p2align 4,,15 4092: movl (%esi),%edx 410 movl %edx,(%edi,%esi,1) 411 subl $4,%esi 412 subl $1,%ecx 413 jnz 2b 414 addl %esi,%edi 415 jmp 4f 4163: rep; smovl 4174: andl $1,%eax # suffix count 418 jz 5f # no suffix 419 # copy suffix 420 addl $2,%esi 421 addl $2,%edi 422 movw (%esi),%dx 423 movw %dx,(%edi) 4245: cld 425 popl %edi 426 popl %esi 427 ret 428 429 # Support for void Copy::conjoint_jints_atomic(void* from, 430 # void* to, 431 # size_t count) 432 # Equivalent to 433 # arrayof_conjoint_jints 434 .p2align 4,,15 435 .type _Copy_conjoint_jints_atomic,@function 436 .type _Copy_arrayof_conjoint_jints,@function 437_Copy_conjoint_jints_atomic: 438_Copy_arrayof_conjoint_jints: 439 pushl %esi 440 movl 4+12(%esp),%ecx # count 441 pushl %edi 442 movl 8+ 4(%esp),%esi # from 443 movl 8+ 8(%esp),%edi # to 444 cmpl %esi,%edi 445 leal -4(%esi,%ecx,4),%eax # from + count*4 - 4 446 jbe ci_CopyRight 447 cmpl %eax,%edi 448 jbe ci_CopyLeft 449ci_CopyRight: 450 cmpl $32,%ecx 451 jbe 2f # <= 32 dwords 452 rep; smovl 453 popl %edi 454 popl %esi 455 ret 456 .=.+10 4572: subl %esi,%edi 458 jmp 4f 459 .p2align 4,,15 4603: movl (%esi),%edx 461 movl %edx,(%edi,%esi,1) 462 addl $4,%esi 4634: subl $1,%ecx 464 jge 3b 465 popl %edi 466 popl %esi 467 ret 468ci_CopyLeft: 469 std 470 leal -4(%edi,%ecx,4),%edi # to + count*4 - 4 471 cmpl $32,%ecx 472 ja 4f # > 32 dwords 473 subl %eax,%edi # eax == from + count*4 - 4 474 jmp 3f 475 .p2align 4,,15 4762: movl (%eax),%edx 477 movl %edx,(%edi,%eax,1) 478 subl $4,%eax 4793: subl $1,%ecx 480 jge 2b 481 cld 482 popl %edi 483 popl %esi 484 ret 4854: movl %eax,%esi # from + count*4 - 4 486 rep; smovl 487 cld 488 popl %edi 489 popl %esi 490 ret 491 492 # Support for void Copy::conjoint_jlongs_atomic(jlong* from, 493 # jlong* to, 494 # size_t count) 495 # 496 # 32-bit 497 # 498 # count treated as signed 499 # 500 # if (from > to) { 501 # while (--count >= 0) { 502 # *to++ = *from++; 503 # } 504 # } else { 505 # while (--count >= 0) { 506 # to[count] = from[count]; 507 # } 508 # } 509 .p2align 4,,15 510 .type _Copy_conjoint_jlongs_atomic,@function 511_Copy_conjoint_jlongs_atomic: 512 movl 4+8(%esp),%ecx # count 513 movl 4+0(%esp),%eax # from 514 movl 4+4(%esp),%edx # to 515 cmpl %eax,%edx 516 jae cla_CopyLeft 517cla_CopyRight: 518 subl %eax,%edx 519 jmp 2f 520 .p2align 4,,15 5211: fildll (%eax) 522 fistpll (%edx,%eax,1) 523 addl $8,%eax 5242: subl $1,%ecx 525 jge 1b 526 ret 527 .p2align 4,,15 5283: fildll (%eax,%ecx,8) 529 fistpll (%edx,%ecx,8) 530cla_CopyLeft: 531 subl $1,%ecx 532 jge 3b 533 ret 534 535 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 536 # void* to, 537 # size_t count) 538 .p2align 4,,15 539 .type _mmx_Copy_arrayof_conjoint_jshorts,@function 540_mmx_Copy_arrayof_conjoint_jshorts: 541 pushl %esi 542 movl 4+12(%esp),%ecx 543 pushl %edi 544 movl 8+ 4(%esp),%esi 545 movl 8+ 8(%esp),%edi 546 cmpl %esi,%edi 547 leal -2(%esi,%ecx,2),%eax 548 jbe mmx_acs_CopyRight 549 cmpl %eax,%edi 550 jbe mmx_acs_CopyLeft 551mmx_acs_CopyRight: 552 movl %ecx,%eax 553 sarl %ecx 554 je 5f 555 cmpl $33,%ecx 556 jae 3f 5571: subl %esi,%edi 558 .p2align 4,,15 5592: movl (%esi),%edx 560 movl %edx,(%edi,%esi,1) 561 addl $4,%esi 562 subl $1,%ecx 563 jnz 2b 564 addl %esi,%edi 565 jmp 5f 5663: smovl # align to 8 bytes, we know we are 4 byte aligned to start 567 subl $1,%ecx 5684: .p2align 4,,15 569 movq 0(%esi),%mm0 570 addl $64,%edi 571 movq 8(%esi),%mm1 572 subl $16,%ecx 573 movq 16(%esi),%mm2 574 movq %mm0,-64(%edi) 575 movq 24(%esi),%mm0 576 movq %mm1,-56(%edi) 577 movq 32(%esi),%mm1 578 movq %mm2,-48(%edi) 579 movq 40(%esi),%mm2 580 movq %mm0,-40(%edi) 581 movq 48(%esi),%mm0 582 movq %mm1,-32(%edi) 583 movq 56(%esi),%mm1 584 movq %mm2,-24(%edi) 585 movq %mm0,-16(%edi) 586 addl $64,%esi 587 movq %mm1,-8(%edi) 588 cmpl $16,%ecx 589 jge 4b 590 emms 591 testl %ecx,%ecx 592 ja 1b 5935: andl $1,%eax 594 je 7f 5956: movw (%esi),%dx 596 movw %dx,(%edi) 5977: popl %edi 598 popl %esi 599 ret 600mmx_acs_CopyLeft: 601 std 602 leal -4(%edi,%ecx,2),%edi 603 movl %eax,%esi 604 movl %ecx,%eax 605 subl $2,%esi 606 sarl %ecx 607 je 4f 608 cmpl $32,%ecx 609 ja 3f 610 subl %esi,%edi 611 .p2align 4,,15 6122: movl (%esi),%edx 613 movl %edx,(%edi,%esi,1) 614 subl $4,%esi 615 subl $1,%ecx 616 jnz 2b 617 addl %esi,%edi 618 jmp 4f 6193: rep; smovl 6204: andl $1,%eax 621 je 6f 622 addl $2,%esi 623 addl $2,%edi 6245: movw (%esi),%dx 625 movw %dx,(%edi) 6266: cld 627 popl %edi 628 popl %esi 629 ret 630 631 632 # Support for jlong Atomic::cmpxchg(jlong exchange_value, 633 # volatile jlong* dest, 634 # jlong compare_value, 635 # bool is_MP) 636 # 637 .p2align 4,,15 638 .type _Atomic_cmpxchg_long,@function 639_Atomic_cmpxchg_long: 640 # 8(%esp) : return PC 641 pushl %ebx # 4(%esp) : old %ebx 642 pushl %edi # 0(%esp) : old %edi 643 movl 12(%esp), %ebx # 12(%esp) : exchange_value (low) 644 movl 16(%esp), %ecx # 16(%esp) : exchange_value (high) 645 movl 24(%esp), %eax # 24(%esp) : compare_value (low) 646 movl 28(%esp), %edx # 28(%esp) : compare_value (high) 647 movl 20(%esp), %edi # 20(%esp) : dest 648 cmpl $0, 32(%esp) # 32(%esp) : is_MP 649 je 1f 650 lock 6511: cmpxchg8b (%edi) 652 popl %edi 653 popl %ebx 654 ret 655 656