linux_x86_32.s revision 5776:de6a9e811145
1#
2# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
3# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4#
5# This code is free software; you can redistribute it and/or modify it
6# under the terms of the GNU General Public License version 2 only, as
7# published by the Free Software Foundation.
8#
9# This code is distributed in the hope that it will be useful, but WITHOUT
10# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12# version 2 for more details (a copy is included in the LICENSE file that
13# accompanied this code).
14#
15# You should have received a copy of the GNU General Public License version
16# 2 along with this work; if not, write to the Free Software Foundation,
17# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18#
19# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20# or visit www.oracle.com if you need additional information or have any
21# questions.
22#
23
24
25        # NOTE WELL!  The _Copy functions are called directly
26	# from server-compiler-generated code via CallLeafNoFP,
27	# which means that they *must* either not use floating
28	# point or use it in the same manner as does the server
29	# compiler.
30
31        .globl _Copy_conjoint_bytes
32        .globl _Copy_arrayof_conjoint_bytes
33        .globl _Copy_conjoint_jshorts_atomic
34	.globl _Copy_arrayof_conjoint_jshorts
35        .globl _Copy_conjoint_jints_atomic
36        .globl _Copy_arrayof_conjoint_jints
37	.globl _Copy_conjoint_jlongs_atomic
38	.globl _mmx_Copy_arrayof_conjoint_jshorts
39
40        .globl _Atomic_cmpxchg_long
41        .globl _Atomic_move_long
42
43	.text
44
45        .globl  SpinPause
46	.type   SpinPause,@function
47        .p2align 4,,15
48SpinPause:
49        rep
50        nop
51        movl    $1, %eax
52        ret
53
54        # Support for void Copy::conjoint_bytes(void* from,
55        #                                       void* to,
56        #                                       size_t count)
57        .p2align 4,,15
58	.type    _Copy_conjoint_bytes,@function
59_Copy_conjoint_bytes:
60        pushl    %esi
61        movl     4+12(%esp),%ecx      # count
62        pushl    %edi
63        movl     8+ 4(%esp),%esi      # from
64        movl     8+ 8(%esp),%edi      # to
65        cmpl     %esi,%edi
66        leal     -1(%esi,%ecx),%eax   # from + count - 1
67        jbe      cb_CopyRight
68        cmpl     %eax,%edi
69        jbe      cb_CopyLeft
70        # copy from low to high
71cb_CopyRight:
72        cmpl     $3,%ecx
73        jbe      5f                   # <= 3 bytes
74        # align source address at dword address boundary
75        movl     %ecx,%eax            # original count
76        movl     $4,%ecx
77        subl     %esi,%ecx
78        andl     $3,%ecx              # prefix byte count
79        jz       1f                   # no prefix
80        subl     %ecx,%eax            # byte count less prefix
81        # copy prefix
82        subl     %esi,%edi
830:      movb     (%esi),%dl
84        movb     %dl,(%edi,%esi,1)
85        addl     $1,%esi
86        subl     $1,%ecx
87        jnz      0b
88        addl     %esi,%edi
891:      movl     %eax,%ecx            # byte count less prefix
90        shrl     $2,%ecx              # dword count
91        jz       4f                   # no dwords to move
92        cmpl     $32,%ecx
93        jbe      2f                   # <= 32 dwords
94        # copy aligned dwords
95        rep;     smovl
96        jmp      4f
97        # copy aligned dwords
982:      subl     %esi,%edi
99        .p2align 4,,15
1003:      movl     (%esi),%edx
101        movl     %edx,(%edi,%esi,1)
102        addl     $4,%esi
103        subl     $1,%ecx
104        jnz      3b
105        addl     %esi,%edi
1064:      movl     %eax,%ecx            # byte count less prefix
1075:      andl     $3,%ecx              # suffix byte count
108        jz       7f                   # no suffix
109        # copy suffix
110        xorl     %eax,%eax
1116:      movb     (%esi,%eax,1),%dl
112        movb     %dl,(%edi,%eax,1)
113        addl     $1,%eax
114        subl     $1,%ecx
115        jnz      6b
1167:      popl     %edi
117        popl     %esi
118        ret
119        # copy from high to low
120cb_CopyLeft:
121        std
122        leal     -4(%edi,%ecx),%edi   # to + count - 4
123        movl     %eax,%esi            # from + count - 1
124        movl     %ecx,%eax
125        subl     $3,%esi              # from + count - 4
126        cmpl     $3,%ecx
127        jbe      5f                   # <= 3 bytes
1281:      shrl     $2,%ecx              # dword count
129        jz       4f                   # no dwords to move
130        cmpl     $32,%ecx
131        ja       3f                   # > 32 dwords
132        # copy dwords, aligned or not
133        subl     %esi,%edi
134        .p2align 4,,15
1352:      movl     (%esi),%edx
136        movl     %edx,(%edi,%esi,1)
137        subl     $4,%esi
138        subl     $1,%ecx
139        jnz      2b
140        addl     %esi,%edi
141        jmp      4f
142        # copy dwords, aligned or not
1433:      rep;     smovl
1444:      movl     %eax,%ecx            # byte count
1455:      andl     $3,%ecx              # suffix byte count
146        jz       7f                   # no suffix
147        # copy suffix
148        subl     %esi,%edi
149        addl     $3,%esi
1506:      movb     (%esi),%dl
151        movb     %dl,(%edi,%esi,1)
152	subl     $1,%esi
153        subl     $1,%ecx
154        jnz      6b
1557:      cld
156        popl     %edi
157        popl     %esi
158        ret
159
160        # Support for void Copy::arrayof_conjoint_bytes(void* from,
161        #                                               void* to,
162        #                                               size_t count)
163        #
164        # Same as _Copy_conjoint_bytes, except no source alignment check.
165        .p2align 4,,15
166	.type    _Copy_arrayof_conjoint_bytes,@function
167_Copy_arrayof_conjoint_bytes:
168        pushl    %esi
169        movl     4+12(%esp),%ecx      # count
170        pushl    %edi
171        movl     8+ 4(%esp),%esi      # from
172        movl     8+ 8(%esp),%edi      # to
173        cmpl     %esi,%edi
174        leal     -1(%esi,%ecx),%eax   # from + count - 1
175        jbe      acb_CopyRight
176        cmpl     %eax,%edi
177        jbe      acb_CopyLeft
178        # copy from low to high
179acb_CopyRight:
180        cmpl     $3,%ecx
181        jbe      5f
1821:      movl     %ecx,%eax
183        shrl     $2,%ecx
184        jz       4f
185        cmpl     $32,%ecx
186        ja       3f
187        # copy aligned dwords
188        subl     %esi,%edi
189        .p2align 4,,15
1902:      movl     (%esi),%edx
191        movl     %edx,(%edi,%esi,1)
192        addl     $4,%esi
193        subl     $1,%ecx
194        jnz      2b
195        addl     %esi,%edi
196        jmp      4f
197        # copy aligned dwords
1983:      rep;     smovl
1994:      movl     %eax,%ecx
2005:      andl     $3,%ecx
201        jz       7f
202        # copy suffix
203        xorl     %eax,%eax
2046:      movb     (%esi,%eax,1),%dl
205        movb     %dl,(%edi,%eax,1)
206        addl     $1,%eax
207        subl     $1,%ecx
208        jnz      6b
2097:      popl     %edi
210        popl     %esi
211        ret
212acb_CopyLeft:
213        std
214        leal     -4(%edi,%ecx),%edi   # to + count - 4
215        movl     %eax,%esi            # from + count - 1
216        movl     %ecx,%eax
217        subl     $3,%esi              # from + count - 4
218        cmpl     $3,%ecx
219        jbe      5f
2201:      shrl     $2,%ecx
221        jz       4f
222        cmpl     $32,%ecx
223        jbe      2f                   # <= 32 dwords
224        rep;     smovl
225        jmp      4f
226	.space 8
2272:      subl     %esi,%edi
228        .p2align 4,,15
2293:      movl     (%esi),%edx
230        movl     %edx,(%edi,%esi,1)
231        subl     $4,%esi
232        subl     $1,%ecx
233        jnz      3b
234        addl     %esi,%edi
2354:      movl     %eax,%ecx
2365:      andl     $3,%ecx
237        jz       7f
238        subl     %esi,%edi
239        addl     $3,%esi
2406:      movb     (%esi),%dl
241        movb     %dl,(%edi,%esi,1)
242	subl     $1,%esi
243        subl     $1,%ecx
244        jnz      6b
2457:      cld
246        popl     %edi
247        popl     %esi
248        ret
249
250        # Support for void Copy::conjoint_jshorts_atomic(void* from,
251        #                                                void* to,
252        #                                                size_t count)
253        .p2align 4,,15
254	.type    _Copy_conjoint_jshorts_atomic,@function
255_Copy_conjoint_jshorts_atomic:
256        pushl    %esi
257        movl     4+12(%esp),%ecx      # count
258        pushl    %edi
259        movl     8+ 4(%esp),%esi      # from
260        movl     8+ 8(%esp),%edi      # to
261        cmpl     %esi,%edi
262        leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
263        jbe      cs_CopyRight
264        cmpl     %eax,%edi
265        jbe      cs_CopyLeft
266        # copy from low to high
267cs_CopyRight:
268        # align source address at dword address boundary
269        movl     %esi,%eax            # original from
270        andl     $3,%eax              # either 0 or 2
271        jz       1f                   # no prefix
272        # copy prefix
273        subl     $1,%ecx
274        jl       5f                   # zero count
275        movw     (%esi),%dx
276        movw     %dx,(%edi)
277        addl     %eax,%esi            # %eax == 2
278        addl     %eax,%edi
2791:      movl     %ecx,%eax            # word count less prefix
280        sarl     %ecx                 # dword count
281        jz       4f                   # no dwords to move
282        cmpl     $32,%ecx
283        jbe      2f                   # <= 32 dwords
284        # copy aligned dwords
285        rep;     smovl
286        jmp      4f
287        # copy aligned dwords
2882:      subl     %esi,%edi
289        .p2align 4,,15
2903:      movl     (%esi),%edx
291        movl     %edx,(%edi,%esi,1)
292        addl     $4,%esi
293        subl     $1,%ecx
294        jnz      3b
295        addl     %esi,%edi
2964:      andl     $1,%eax              # suffix count
297        jz       5f                   # no suffix
298        # copy suffix
299        movw     (%esi),%dx
300        movw     %dx,(%edi)
3015:      popl     %edi
302        popl     %esi
303        ret
304        # copy from high to low
305cs_CopyLeft:
306        std
307        leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
308        movl     %eax,%esi            # from + count*2 - 2
309        movl     %ecx,%eax
310        subl     $2,%esi              # from + count*2 - 4
3111:      sarl     %ecx                 # dword count
312        jz       4f                   # no dwords to move
313        cmpl     $32,%ecx
314        ja       3f                   # > 32 dwords
315        subl     %esi,%edi
316        .p2align 4,,15
3172:      movl     (%esi),%edx
318        movl     %edx,(%edi,%esi,1)
319        subl     $4,%esi
320        subl     $1,%ecx
321        jnz      2b
322        addl     %esi,%edi
323        jmp      4f
3243:      rep;     smovl
3254:      andl     $1,%eax              # suffix count
326        jz       5f                   # no suffix
327        # copy suffix
328        addl     $2,%esi
329        addl     $2,%edi
330        movw     (%esi),%dx
331        movw     %dx,(%edi)
3325:      cld
333        popl     %edi
334        popl     %esi
335        ret
336
337        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
338        #                                                 void* to,
339        #                                                 size_t count)
340        .p2align 4,,15
341	.type    _Copy_arrayof_conjoint_jshorts,@function
342_Copy_arrayof_conjoint_jshorts:
343        pushl    %esi
344        movl     4+12(%esp),%ecx      # count
345        pushl    %edi
346        movl     8+ 4(%esp),%esi      # from
347        movl     8+ 8(%esp),%edi      # to
348        cmpl     %esi,%edi
349        leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
350        jbe      acs_CopyRight
351        cmpl     %eax,%edi
352        jbe      acs_CopyLeft
353acs_CopyRight:
354        movl     %ecx,%eax            # word count
355        sarl     %ecx                 # dword count
356        jz       4f                   # no dwords to move
357        cmpl     $32,%ecx
358        jbe      2f                   # <= 32 dwords
359        # copy aligned dwords
360        rep;     smovl
361        jmp      4f
362        # copy aligned dwords
363        .space 5
3642:      subl     %esi,%edi
365        .p2align 4,,15
3663:      movl     (%esi),%edx
367        movl     %edx,(%edi,%esi,1)
368        addl     $4,%esi
369        subl     $1,%ecx
370        jnz      3b
371        addl     %esi,%edi
3724:      andl     $1,%eax              # suffix count
373        jz       5f                   # no suffix
374        # copy suffix
375        movw     (%esi),%dx
376        movw     %dx,(%edi)
3775:      popl     %edi
378        popl     %esi
379        ret
380acs_CopyLeft:
381        std
382        leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
383        movl     %eax,%esi            # from + count*2 - 2
384        movl     %ecx,%eax
385        subl     $2,%esi              # from + count*2 - 4
386        sarl     %ecx                 # dword count
387        jz       4f                   # no dwords to move
388        cmpl     $32,%ecx
389        ja       3f                   # > 32 dwords
390        subl     %esi,%edi
391        .p2align 4,,15
3922:      movl     (%esi),%edx
393        movl     %edx,(%edi,%esi,1)
394        subl     $4,%esi
395        subl     $1,%ecx
396        jnz      2b
397        addl     %esi,%edi
398        jmp      4f
3993:      rep;     smovl
4004:      andl     $1,%eax              # suffix count
401        jz       5f                   # no suffix
402        # copy suffix
403        addl     $2,%esi
404        addl     $2,%edi
405        movw     (%esi),%dx
406        movw     %dx,(%edi)
4075:      cld
408        popl     %edi
409        popl     %esi
410        ret
411
412        # Support for void Copy::conjoint_jints_atomic(void* from,
413        #                                              void* to,
414        #                                              size_t count)
415        # Equivalent to
416        #   arrayof_conjoint_jints
417        .p2align 4,,15
418	.type    _Copy_conjoint_jints_atomic,@function
419	.type    _Copy_arrayof_conjoint_jints,@function
420_Copy_conjoint_jints_atomic:
421_Copy_arrayof_conjoint_jints:
422        pushl    %esi
423        movl     4+12(%esp),%ecx      # count
424        pushl    %edi
425        movl     8+ 4(%esp),%esi      # from
426        movl     8+ 8(%esp),%edi      # to
427        cmpl     %esi,%edi
428        leal     -4(%esi,%ecx,4),%eax # from + count*4 - 4
429        jbe      ci_CopyRight
430        cmpl     %eax,%edi
431        jbe      ci_CopyLeft
432ci_CopyRight:
433        cmpl     $32,%ecx
434        jbe      2f                   # <= 32 dwords
435        rep;     smovl
436        popl     %edi
437        popl     %esi
438        ret
439        .space 10
4402:      subl     %esi,%edi
441        jmp      4f
442        .p2align 4,,15
4433:      movl     (%esi),%edx
444        movl     %edx,(%edi,%esi,1)
445        addl     $4,%esi
4464:      subl     $1,%ecx
447        jge      3b
448        popl     %edi
449        popl     %esi
450        ret
451ci_CopyLeft:
452        std
453        leal     -4(%edi,%ecx,4),%edi # to + count*4 - 4
454        cmpl     $32,%ecx
455        ja       4f                   # > 32 dwords
456        subl     %eax,%edi            # eax == from + count*4 - 4
457        jmp      3f
458        .p2align 4,,15
4592:      movl     (%eax),%edx
460        movl     %edx,(%edi,%eax,1)
461        subl     $4,%eax
4623:      subl     $1,%ecx
463        jge      2b
464        cld
465        popl     %edi
466        popl     %esi
467        ret
4684:      movl     %eax,%esi            # from + count*4 - 4
469        rep;     smovl
470        cld
471        popl     %edi
472        popl     %esi
473        ret
474
475        # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
476        #                                               jlong* to,
477        #                                               size_t count)
478        #
479        # 32-bit
480        #
481        # count treated as signed
482        #
483        # if (from > to) {
484        #   while (--count >= 0) {
485        #     *to++ = *from++;
486        #   }
487        # } else {
488        #   while (--count >= 0) {
489        #     to[count] = from[count];
490        #   }
491        # }
492        .p2align 4,,15
493	.type    _Copy_conjoint_jlongs_atomic,@function
494_Copy_conjoint_jlongs_atomic:
495        movl     4+8(%esp),%ecx       # count
496        movl     4+0(%esp),%eax       # from
497        movl     4+4(%esp),%edx       # to
498        cmpl     %eax,%edx
499        jae      cla_CopyLeft
500cla_CopyRight:
501        subl     %eax,%edx
502        jmp      2f
503        .p2align 4,,15
5041:      fildll   (%eax)
505        fistpll  (%edx,%eax,1)
506        addl     $8,%eax
5072:      subl     $1,%ecx
508        jge      1b
509        ret
510        .p2align 4,,15
5113:      fildll   (%eax,%ecx,8)
512        fistpll  (%edx,%ecx,8)
513cla_CopyLeft:
514        subl     $1,%ecx
515        jge      3b
516        ret
517
518        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
519        #                                                 void* to,
520        #                                                 size_t count)
521        .p2align 4,,15
522	.type    _mmx_Copy_arrayof_conjoint_jshorts,@function
523_mmx_Copy_arrayof_conjoint_jshorts:
524        pushl    %esi
525        movl     4+12(%esp),%ecx
526        pushl    %edi
527        movl     8+ 4(%esp),%esi
528        movl     8+ 8(%esp),%edi
529        cmpl     %esi,%edi
530        leal     -2(%esi,%ecx,2),%eax
531        jbe      mmx_acs_CopyRight
532        cmpl     %eax,%edi
533        jbe      mmx_acs_CopyLeft
534mmx_acs_CopyRight:
535        movl     %ecx,%eax
536        sarl     %ecx
537        je       5f
538        cmpl     $33,%ecx
539        jae      3f
5401:      subl     %esi,%edi
541        .p2align 4,,15
5422:      movl     (%esi),%edx
543        movl     %edx,(%edi,%esi,1)
544        addl     $4,%esi
545        subl     $1,%ecx
546        jnz      2b
547        addl     %esi,%edi
548        jmp      5f
5493:      smovl # align to 8 bytes, we know we are 4 byte aligned to start
550        subl     $1,%ecx
5514:      .p2align 4,,15
552        movq     0(%esi),%mm0
553        addl     $64,%edi
554        movq     8(%esi),%mm1
555        subl     $16,%ecx
556        movq     16(%esi),%mm2
557        movq     %mm0,-64(%edi)
558        movq     24(%esi),%mm0
559        movq     %mm1,-56(%edi)
560        movq     32(%esi),%mm1
561        movq     %mm2,-48(%edi)
562        movq     40(%esi),%mm2
563        movq     %mm0,-40(%edi)
564        movq     48(%esi),%mm0
565        movq     %mm1,-32(%edi)
566        movq     56(%esi),%mm1
567        movq     %mm2,-24(%edi)
568        movq     %mm0,-16(%edi)
569        addl     $64,%esi
570        movq     %mm1,-8(%edi)
571        cmpl     $16,%ecx
572        jge      4b
573        emms
574	testl    %ecx,%ecx
575	ja       1b
5765:      andl     $1,%eax
577        je       7f
5786:      movw     (%esi),%dx
579        movw     %dx,(%edi)
5807:	popl     %edi
581        popl     %esi
582        ret
583mmx_acs_CopyLeft:
584        std
585        leal     -4(%edi,%ecx,2),%edi
586        movl     %eax,%esi
587        movl     %ecx,%eax
588        subl     $2,%esi
589        sarl     %ecx
590        je       4f
591        cmpl     $32,%ecx
592        ja       3f
593        subl     %esi,%edi
594        .p2align 4,,15
5952:      movl     (%esi),%edx
596        movl     %edx,(%edi,%esi,1)
597        subl     $4,%esi
598        subl     $1,%ecx
599        jnz      2b
600        addl     %esi,%edi
601        jmp      4f
6023:      rep;     smovl
6034:      andl     $1,%eax
604        je       6f
605        addl     $2,%esi
606        addl     $2,%edi
6075:      movw     (%esi),%dx
608        movw     %dx,(%edi)
6096:      cld
610        popl     %edi
611        popl     %esi
612        ret
613
614
615        # Support for jlong Atomic::cmpxchg(jlong exchange_value,
616        #                                   volatile jlong* dest,
617        #                                   jlong compare_value,
618        #                                   bool is_MP)
619        #
620        .p2align 4,,15
621	.type    _Atomic_cmpxchg_long,@function
622_Atomic_cmpxchg_long:
623                                   #  8(%esp) : return PC
624        pushl    %ebx              #  4(%esp) : old %ebx
625        pushl    %edi              #  0(%esp) : old %edi
626        movl     12(%esp), %ebx    # 12(%esp) : exchange_value (low)
627        movl     16(%esp), %ecx    # 16(%esp) : exchange_value (high)
628        movl     24(%esp), %eax    # 24(%esp) : compare_value (low)
629        movl     28(%esp), %edx    # 28(%esp) : compare_value (high)
630        movl     20(%esp), %edi    # 20(%esp) : dest
631        cmpl     $0, 32(%esp)      # 32(%esp) : is_MP
632        je       1f
633        lock
6341:      cmpxchg8b (%edi)
635        popl     %edi
636        popl     %ebx
637        ret
638
639
640        # Support for jlong Atomic::load and Atomic::store.
641        # void _Atomic_move_long(volatile jlong* src, volatile jlong* dst)
642        .p2align 4,,15
643	.type    _Atomic_move_long,@function
644_Atomic_move_long:
645        movl     4(%esp), %eax   # src
646        fildll    (%eax)
647        movl     8(%esp), %eax   # dest
648        fistpll   (%eax)
649        ret
650
651