linux_x86_32.s revision 4795:2cb5d5f6d5e5
1#
2# Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved.
3# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4#
5# This code is free software; you can redistribute it and/or modify it
6# under the terms of the GNU General Public License version 2 only, as
7# published by the Free Software Foundation.
8#
9# This code is distributed in the hope that it will be useful, but WITHOUT
10# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12# version 2 for more details (a copy is included in the LICENSE file that
13# accompanied this code).
14#
15# You should have received a copy of the GNU General Public License version
16# 2 along with this work; if not, write to the Free Software Foundation,
17# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18#
19# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20# or visit www.oracle.com if you need additional information or have any
21# questions.
22#
23
24
25        # NOTE WELL!  The _Copy functions are called directly
26	# from server-compiler-generated code via CallLeafNoFP,
27	# which means that they *must* either not use floating
28	# point or use it in the same manner as does the server
29	# compiler.
30
31        .globl _Copy_conjoint_bytes
32        .globl _Copy_arrayof_conjoint_bytes
33        .globl _Copy_conjoint_jshorts_atomic
34	.globl _Copy_arrayof_conjoint_jshorts
35        .globl _Copy_conjoint_jints_atomic
36        .globl _Copy_arrayof_conjoint_jints
37	.globl _Copy_conjoint_jlongs_atomic
38	.globl _mmx_Copy_arrayof_conjoint_jshorts
39
40        .globl _Atomic_cmpxchg_long
41        .globl _Atomic_move_long
42
43	.text
44
45        .globl  SafeFetch32, Fetch32PFI, Fetch32Resume
46        .globl  SafeFetchN
47        ## TODO: avoid exposing Fetch32PFI and Fetch32Resume.
48        ## Instead, the signal handler would call a new SafeFetchTriage(FaultingEIP)
49        ## routine to vet the address.  If the address is the faulting LD then
50        ## SafeFetchTriage() would return the resume-at EIP, otherwise null.
51	.type    SafeFetch32,@function
52        .p2align 4,,15
53SafeFetch32:
54SafeFetchN:
55         movl    0x8(%esp), %eax
56         movl    0x4(%esp), %ecx
57Fetch32PFI:
58         movl    (%ecx), %eax
59Fetch32Resume:
60         ret
61
62
63        .globl  SpinPause
64	.type   SpinPause,@function
65        .p2align 4,,15
66SpinPause:
67        rep
68        nop
69        movl    $1, %eax
70        ret
71
72        # Support for void Copy::conjoint_bytes(void* from,
73        #                                       void* to,
74        #                                       size_t count)
75        .p2align 4,,15
76	.type    _Copy_conjoint_bytes,@function
77_Copy_conjoint_bytes:
78        pushl    %esi
79        movl     4+12(%esp),%ecx      # count
80        pushl    %edi
81        movl     8+ 4(%esp),%esi      # from
82        movl     8+ 8(%esp),%edi      # to
83        cmpl     %esi,%edi
84        leal     -1(%esi,%ecx),%eax   # from + count - 1
85        jbe      cb_CopyRight
86        cmpl     %eax,%edi
87        jbe      cb_CopyLeft
88        # copy from low to high
89cb_CopyRight:
90        cmpl     $3,%ecx
91        jbe      5f                   # <= 3 bytes
92        # align source address at dword address boundary
93        movl     %ecx,%eax            # original count
94        movl     $4,%ecx
95        subl     %esi,%ecx
96        andl     $3,%ecx              # prefix byte count
97        jz       1f                   # no prefix
98        subl     %ecx,%eax            # byte count less prefix
99        # copy prefix
100        subl     %esi,%edi
1010:      movb     (%esi),%dl
102        movb     %dl,(%edi,%esi,1)
103        addl     $1,%esi
104        subl     $1,%ecx
105        jnz      0b
106        addl     %esi,%edi
1071:      movl     %eax,%ecx            # byte count less prefix
108        shrl     $2,%ecx              # dword count
109        jz       4f                   # no dwords to move
110        cmpl     $32,%ecx
111        jbe      2f                   # <= 32 dwords
112        # copy aligned dwords
113        rep;     smovl
114        jmp      4f
115        # copy aligned dwords
1162:      subl     %esi,%edi
117        .p2align 4,,15
1183:      movl     (%esi),%edx
119        movl     %edx,(%edi,%esi,1)
120        addl     $4,%esi
121        subl     $1,%ecx
122        jnz      3b
123        addl     %esi,%edi
1244:      movl     %eax,%ecx            # byte count less prefix
1255:      andl     $3,%ecx              # suffix byte count
126        jz       7f                   # no suffix
127        # copy suffix
128        xorl     %eax,%eax
1296:      movb     (%esi,%eax,1),%dl
130        movb     %dl,(%edi,%eax,1)
131        addl     $1,%eax
132        subl     $1,%ecx
133        jnz      6b
1347:      popl     %edi
135        popl     %esi
136        ret
137        # copy from high to low
138cb_CopyLeft:
139        std
140        leal     -4(%edi,%ecx),%edi   # to + count - 4
141        movl     %eax,%esi            # from + count - 1
142        movl     %ecx,%eax
143        subl     $3,%esi              # from + count - 4
144        cmpl     $3,%ecx
145        jbe      5f                   # <= 3 bytes
1461:      shrl     $2,%ecx              # dword count
147        jz       4f                   # no dwords to move
148        cmpl     $32,%ecx
149        ja       3f                   # > 32 dwords
150        # copy dwords, aligned or not
151        subl     %esi,%edi
152        .p2align 4,,15
1532:      movl     (%esi),%edx
154        movl     %edx,(%edi,%esi,1)
155        subl     $4,%esi
156        subl     $1,%ecx
157        jnz      2b
158        addl     %esi,%edi
159        jmp      4f
160        # copy dwords, aligned or not
1613:      rep;     smovl
1624:      movl     %eax,%ecx            # byte count
1635:      andl     $3,%ecx              # suffix byte count
164        jz       7f                   # no suffix
165        # copy suffix
166        subl     %esi,%edi
167        addl     $3,%esi
1686:      movb     (%esi),%dl
169        movb     %dl,(%edi,%esi,1)
170	subl     $1,%esi
171        subl     $1,%ecx
172        jnz      6b
1737:      cld
174        popl     %edi
175        popl     %esi
176        ret
177
178        # Support for void Copy::arrayof_conjoint_bytes(void* from,
179        #                                               void* to,
180        #                                               size_t count)
181        #
182        # Same as _Copy_conjoint_bytes, except no source alignment check.
183        .p2align 4,,15
184	.type    _Copy_arrayof_conjoint_bytes,@function
185_Copy_arrayof_conjoint_bytes:
186        pushl    %esi
187        movl     4+12(%esp),%ecx      # count
188        pushl    %edi
189        movl     8+ 4(%esp),%esi      # from
190        movl     8+ 8(%esp),%edi      # to
191        cmpl     %esi,%edi
192        leal     -1(%esi,%ecx),%eax   # from + count - 1
193        jbe      acb_CopyRight
194        cmpl     %eax,%edi
195        jbe      acb_CopyLeft
196        # copy from low to high
197acb_CopyRight:
198        cmpl     $3,%ecx
199        jbe      5f
2001:      movl     %ecx,%eax
201        shrl     $2,%ecx
202        jz       4f
203        cmpl     $32,%ecx
204        ja       3f
205        # copy aligned dwords
206        subl     %esi,%edi
207        .p2align 4,,15
2082:      movl     (%esi),%edx
209        movl     %edx,(%edi,%esi,1)
210        addl     $4,%esi
211        subl     $1,%ecx
212        jnz      2b
213        addl     %esi,%edi
214        jmp      4f
215        # copy aligned dwords
2163:      rep;     smovl
2174:      movl     %eax,%ecx
2185:      andl     $3,%ecx
219        jz       7f
220        # copy suffix
221        xorl     %eax,%eax
2226:      movb     (%esi,%eax,1),%dl
223        movb     %dl,(%edi,%eax,1)
224        addl     $1,%eax
225        subl     $1,%ecx
226        jnz      6b
2277:      popl     %edi
228        popl     %esi
229        ret
230acb_CopyLeft:
231        std
232        leal     -4(%edi,%ecx),%edi   # to + count - 4
233        movl     %eax,%esi            # from + count - 1
234        movl     %ecx,%eax
235        subl     $3,%esi              # from + count - 4
236        cmpl     $3,%ecx
237        jbe      5f
2381:      shrl     $2,%ecx
239        jz       4f
240        cmpl     $32,%ecx
241        jbe      2f                   # <= 32 dwords
242        rep;     smovl
243        jmp      4f
244	.space 8
2452:      subl     %esi,%edi
246        .p2align 4,,15
2473:      movl     (%esi),%edx
248        movl     %edx,(%edi,%esi,1)
249        subl     $4,%esi
250        subl     $1,%ecx
251        jnz      3b
252        addl     %esi,%edi
2534:      movl     %eax,%ecx
2545:      andl     $3,%ecx
255        jz       7f
256        subl     %esi,%edi
257        addl     $3,%esi
2586:      movb     (%esi),%dl
259        movb     %dl,(%edi,%esi,1)
260	subl     $1,%esi
261        subl     $1,%ecx
262        jnz      6b
2637:      cld
264        popl     %edi
265        popl     %esi
266        ret
267
268        # Support for void Copy::conjoint_jshorts_atomic(void* from,
269        #                                                void* to,
270        #                                                size_t count)
271        .p2align 4,,15
272	.type    _Copy_conjoint_jshorts_atomic,@function
273_Copy_conjoint_jshorts_atomic:
274        pushl    %esi
275        movl     4+12(%esp),%ecx      # count
276        pushl    %edi
277        movl     8+ 4(%esp),%esi      # from
278        movl     8+ 8(%esp),%edi      # to
279        cmpl     %esi,%edi
280        leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
281        jbe      cs_CopyRight
282        cmpl     %eax,%edi
283        jbe      cs_CopyLeft
284        # copy from low to high
285cs_CopyRight:
286        # align source address at dword address boundary
287        movl     %esi,%eax            # original from
288        andl     $3,%eax              # either 0 or 2
289        jz       1f                   # no prefix
290        # copy prefix
291        subl     $1,%ecx
292        jl       5f                   # zero count
293        movw     (%esi),%dx
294        movw     %dx,(%edi)
295        addl     %eax,%esi            # %eax == 2
296        addl     %eax,%edi
2971:      movl     %ecx,%eax            # word count less prefix
298        sarl     %ecx                 # dword count
299        jz       4f                   # no dwords to move
300        cmpl     $32,%ecx
301        jbe      2f                   # <= 32 dwords
302        # copy aligned dwords
303        rep;     smovl
304        jmp      4f
305        # copy aligned dwords
3062:      subl     %esi,%edi
307        .p2align 4,,15
3083:      movl     (%esi),%edx
309        movl     %edx,(%edi,%esi,1)
310        addl     $4,%esi
311        subl     $1,%ecx
312        jnz      3b
313        addl     %esi,%edi
3144:      andl     $1,%eax              # suffix count
315        jz       5f                   # no suffix
316        # copy suffix
317        movw     (%esi),%dx
318        movw     %dx,(%edi)
3195:      popl     %edi
320        popl     %esi
321        ret
322        # copy from high to low
323cs_CopyLeft:
324        std
325        leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
326        movl     %eax,%esi            # from + count*2 - 2
327        movl     %ecx,%eax
328        subl     $2,%esi              # from + count*2 - 4
3291:      sarl     %ecx                 # dword count
330        jz       4f                   # no dwords to move
331        cmpl     $32,%ecx
332        ja       3f                   # > 32 dwords
333        subl     %esi,%edi
334        .p2align 4,,15
3352:      movl     (%esi),%edx
336        movl     %edx,(%edi,%esi,1)
337        subl     $4,%esi
338        subl     $1,%ecx
339        jnz      2b
340        addl     %esi,%edi
341        jmp      4f
3423:      rep;     smovl
3434:      andl     $1,%eax              # suffix count
344        jz       5f                   # no suffix
345        # copy suffix
346        addl     $2,%esi
347        addl     $2,%edi
348        movw     (%esi),%dx
349        movw     %dx,(%edi)
3505:      cld
351        popl     %edi
352        popl     %esi
353        ret
354
355        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
356        #                                                 void* to,
357        #                                                 size_t count)
358        .p2align 4,,15
359	.type    _Copy_arrayof_conjoint_jshorts,@function
360_Copy_arrayof_conjoint_jshorts:
361        pushl    %esi
362        movl     4+12(%esp),%ecx      # count
363        pushl    %edi
364        movl     8+ 4(%esp),%esi      # from
365        movl     8+ 8(%esp),%edi      # to
366        cmpl     %esi,%edi
367        leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
368        jbe      acs_CopyRight
369        cmpl     %eax,%edi
370        jbe      acs_CopyLeft
371acs_CopyRight:
372        movl     %ecx,%eax            # word count
373        sarl     %ecx                 # dword count
374        jz       4f                   # no dwords to move
375        cmpl     $32,%ecx
376        jbe      2f                   # <= 32 dwords
377        # copy aligned dwords
378        rep;     smovl
379        jmp      4f
380        # copy aligned dwords
381        .space 5
3822:      subl     %esi,%edi
383        .p2align 4,,15
3843:      movl     (%esi),%edx
385        movl     %edx,(%edi,%esi,1)
386        addl     $4,%esi
387        subl     $1,%ecx
388        jnz      3b
389        addl     %esi,%edi
3904:      andl     $1,%eax              # suffix count
391        jz       5f                   # no suffix
392        # copy suffix
393        movw     (%esi),%dx
394        movw     %dx,(%edi)
3955:      popl     %edi
396        popl     %esi
397        ret
398acs_CopyLeft:
399        std
400        leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
401        movl     %eax,%esi            # from + count*2 - 2
402        movl     %ecx,%eax
403        subl     $2,%esi              # from + count*2 - 4
404        sarl     %ecx                 # dword count
405        jz       4f                   # no dwords to move
406        cmpl     $32,%ecx
407        ja       3f                   # > 32 dwords
408        subl     %esi,%edi
409        .p2align 4,,15
4102:      movl     (%esi),%edx
411        movl     %edx,(%edi,%esi,1)
412        subl     $4,%esi
413        subl     $1,%ecx
414        jnz      2b
415        addl     %esi,%edi
416        jmp      4f
4173:      rep;     smovl
4184:      andl     $1,%eax              # suffix count
419        jz       5f                   # no suffix
420        # copy suffix
421        addl     $2,%esi
422        addl     $2,%edi
423        movw     (%esi),%dx
424        movw     %dx,(%edi)
4255:      cld
426        popl     %edi
427        popl     %esi
428        ret
429
430        # Support for void Copy::conjoint_jints_atomic(void* from,
431        #                                              void* to,
432        #                                              size_t count)
433        # Equivalent to
434        #   arrayof_conjoint_jints
435        .p2align 4,,15
436	.type    _Copy_conjoint_jints_atomic,@function
437	.type    _Copy_arrayof_conjoint_jints,@function
438_Copy_conjoint_jints_atomic:
439_Copy_arrayof_conjoint_jints:
440        pushl    %esi
441        movl     4+12(%esp),%ecx      # count
442        pushl    %edi
443        movl     8+ 4(%esp),%esi      # from
444        movl     8+ 8(%esp),%edi      # to
445        cmpl     %esi,%edi
446        leal     -4(%esi,%ecx,4),%eax # from + count*4 - 4
447        jbe      ci_CopyRight
448        cmpl     %eax,%edi
449        jbe      ci_CopyLeft
450ci_CopyRight:
451        cmpl     $32,%ecx
452        jbe      2f                   # <= 32 dwords
453        rep;     smovl
454        popl     %edi
455        popl     %esi
456        ret
457        .space 10
4582:      subl     %esi,%edi
459        jmp      4f
460        .p2align 4,,15
4613:      movl     (%esi),%edx
462        movl     %edx,(%edi,%esi,1)
463        addl     $4,%esi
4644:      subl     $1,%ecx
465        jge      3b
466        popl     %edi
467        popl     %esi
468        ret
469ci_CopyLeft:
470        std
471        leal     -4(%edi,%ecx,4),%edi # to + count*4 - 4
472        cmpl     $32,%ecx
473        ja       4f                   # > 32 dwords
474        subl     %eax,%edi            # eax == from + count*4 - 4
475        jmp      3f
476        .p2align 4,,15
4772:      movl     (%eax),%edx
478        movl     %edx,(%edi,%eax,1)
479        subl     $4,%eax
4803:      subl     $1,%ecx
481        jge      2b
482        cld
483        popl     %edi
484        popl     %esi
485        ret
4864:      movl     %eax,%esi            # from + count*4 - 4
487        rep;     smovl
488        cld
489        popl     %edi
490        popl     %esi
491        ret
492
493        # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
494        #                                               jlong* to,
495        #                                               size_t count)
496        #
497        # 32-bit
498        #
499        # count treated as signed
500        #
501        # if (from > to) {
502        #   while (--count >= 0) {
503        #     *to++ = *from++;
504        #   }
505        # } else {
506        #   while (--count >= 0) {
507        #     to[count] = from[count];
508        #   }
509        # }
510        .p2align 4,,15
511	.type    _Copy_conjoint_jlongs_atomic,@function
512_Copy_conjoint_jlongs_atomic:
513        movl     4+8(%esp),%ecx       # count
514        movl     4+0(%esp),%eax       # from
515        movl     4+4(%esp),%edx       # to
516        cmpl     %eax,%edx
517        jae      cla_CopyLeft
518cla_CopyRight:
519        subl     %eax,%edx
520        jmp      2f
521        .p2align 4,,15
5221:      fildll   (%eax)
523        fistpll  (%edx,%eax,1)
524        addl     $8,%eax
5252:      subl     $1,%ecx
526        jge      1b
527        ret
528        .p2align 4,,15
5293:      fildll   (%eax,%ecx,8)
530        fistpll  (%edx,%ecx,8)
531cla_CopyLeft:
532        subl     $1,%ecx
533        jge      3b
534        ret
535
536        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
537        #                                                 void* to,
538        #                                                 size_t count)
539        .p2align 4,,15
540	.type    _mmx_Copy_arrayof_conjoint_jshorts,@function
541_mmx_Copy_arrayof_conjoint_jshorts:
542        pushl    %esi
543        movl     4+12(%esp),%ecx
544        pushl    %edi
545        movl     8+ 4(%esp),%esi
546        movl     8+ 8(%esp),%edi
547        cmpl     %esi,%edi
548        leal     -2(%esi,%ecx,2),%eax
549        jbe      mmx_acs_CopyRight
550        cmpl     %eax,%edi
551        jbe      mmx_acs_CopyLeft
552mmx_acs_CopyRight:
553        movl     %ecx,%eax
554        sarl     %ecx
555        je       5f
556        cmpl     $33,%ecx
557        jae      3f
5581:      subl     %esi,%edi
559        .p2align 4,,15
5602:      movl     (%esi),%edx
561        movl     %edx,(%edi,%esi,1)
562        addl     $4,%esi
563        subl     $1,%ecx
564        jnz      2b
565        addl     %esi,%edi
566        jmp      5f
5673:      smovl # align to 8 bytes, we know we are 4 byte aligned to start
568        subl     $1,%ecx
5694:      .p2align 4,,15
570        movq     0(%esi),%mm0
571        addl     $64,%edi
572        movq     8(%esi),%mm1
573        subl     $16,%ecx
574        movq     16(%esi),%mm2
575        movq     %mm0,-64(%edi)
576        movq     24(%esi),%mm0
577        movq     %mm1,-56(%edi)
578        movq     32(%esi),%mm1
579        movq     %mm2,-48(%edi)
580        movq     40(%esi),%mm2
581        movq     %mm0,-40(%edi)
582        movq     48(%esi),%mm0
583        movq     %mm1,-32(%edi)
584        movq     56(%esi),%mm1
585        movq     %mm2,-24(%edi)
586        movq     %mm0,-16(%edi)
587        addl     $64,%esi
588        movq     %mm1,-8(%edi)
589        cmpl     $16,%ecx
590        jge      4b
591        emms
592	testl    %ecx,%ecx
593	ja       1b
5945:      andl     $1,%eax
595        je       7f
5966:      movw     (%esi),%dx
597        movw     %dx,(%edi)
5987:	popl     %edi
599        popl     %esi
600        ret
601mmx_acs_CopyLeft:
602        std
603        leal     -4(%edi,%ecx,2),%edi
604        movl     %eax,%esi
605        movl     %ecx,%eax
606        subl     $2,%esi
607        sarl     %ecx
608        je       4f
609        cmpl     $32,%ecx
610        ja       3f
611        subl     %esi,%edi
612        .p2align 4,,15
6132:      movl     (%esi),%edx
614        movl     %edx,(%edi,%esi,1)
615        subl     $4,%esi
616        subl     $1,%ecx
617        jnz      2b
618        addl     %esi,%edi
619        jmp      4f
6203:      rep;     smovl
6214:      andl     $1,%eax
622        je       6f
623        addl     $2,%esi
624        addl     $2,%edi
6255:      movw     (%esi),%dx
626        movw     %dx,(%edi)
6276:      cld
628        popl     %edi
629        popl     %esi
630        ret
631
632
633        # Support for jlong Atomic::cmpxchg(jlong exchange_value,
634        #                                   volatile jlong* dest,
635        #                                   jlong compare_value,
636        #                                   bool is_MP)
637        #
638        .p2align 4,,15
639	.type    _Atomic_cmpxchg_long,@function
640_Atomic_cmpxchg_long:
641                                   #  8(%esp) : return PC
642        pushl    %ebx              #  4(%esp) : old %ebx
643        pushl    %edi              #  0(%esp) : old %edi
644        movl     12(%esp), %ebx    # 12(%esp) : exchange_value (low)
645        movl     16(%esp), %ecx    # 16(%esp) : exchange_value (high)
646        movl     24(%esp), %eax    # 24(%esp) : compare_value (low)
647        movl     28(%esp), %edx    # 28(%esp) : compare_value (high)
648        movl     20(%esp), %edi    # 20(%esp) : dest
649        cmpl     $0, 32(%esp)      # 32(%esp) : is_MP
650        je       1f
651        lock
6521:      cmpxchg8b (%edi)
653        popl     %edi
654        popl     %ebx
655        ret
656
657
658        # Support for jlong Atomic::load and Atomic::store.
659        # void _Atomic_move_long(volatile jlong* src, volatile jlong* dst)
660        .p2align 4,,15
661	.type    _Atomic_move_long,@function
662_Atomic_move_long:
663        movl     4(%esp), %eax   # src
664        fildll    (%eax)
665        movl     8(%esp), %eax   # dest
666        fistpll   (%eax)
667        ret
668
669