linux_x86_32.s revision 1949:0a8e0d4345b3
1#
2# Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
3# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4#
5# This code is free software; you can redistribute it and/or modify it
6# under the terms of the GNU General Public License version 2 only, as
7# published by the Free Software Foundation.
8#
9# This code is distributed in the hope that it will be useful, but WITHOUT
10# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12# version 2 for more details (a copy is included in the LICENSE file that
13# accompanied this code).
14#
15# You should have received a copy of the GNU General Public License version
16# 2 along with this work; if not, write to the Free Software Foundation,
17# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18#
19# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20# or visit www.oracle.com if you need additional information or have any
21# questions.
22#
23
24
25        # NOTE WELL!  The _Copy functions are called directly
26	# from server-compiler-generated code via CallLeafNoFP,
27	# which means that they *must* either not use floating
28	# point or use it in the same manner as does the server
29	# compiler.
30
31        .globl _Copy_conjoint_bytes
32        .globl _Copy_arrayof_conjoint_bytes
33        .globl _Copy_conjoint_jshorts_atomic
34	.globl _Copy_arrayof_conjoint_jshorts
35        .globl _Copy_conjoint_jints_atomic
36        .globl _Copy_arrayof_conjoint_jints
37	.globl _Copy_conjoint_jlongs_atomic
38	.globl _mmx_Copy_arrayof_conjoint_jshorts
39
40        .globl _Atomic_cmpxchg_long
41
42	.text
43
44        .globl  SafeFetch32, Fetch32PFI, Fetch32Resume
45        .globl  SafeFetchN
46        ## TODO: avoid exposing Fetch32PFI and Fetch32Resume.
47        ## Instead, the signal handler would call a new SafeFetchTriage(FaultingEIP)
48        ## routine to vet the address.  If the address is the faulting LD then
49        ## SafeFetchTriage() would return the resume-at EIP, otherwise null.
50	.type    SafeFetch32,@function
51        .p2align 4,,15
52SafeFetch32:
53SafeFetchN:
54         movl    0x8(%esp), %eax
55         movl    0x4(%esp), %ecx
56Fetch32PFI:
57         movl    (%ecx), %eax
58Fetch32Resume:
59         ret
60
61
62        .globl  SpinPause
63	.type   SpinPause,@function
64        .p2align 4,,15
65SpinPause:
66        rep
67        nop
68        movl    $1, %eax
69        ret
70
71        # Support for void Copy::conjoint_bytes(void* from,
72        #                                       void* to,
73        #                                       size_t count)
74        .p2align 4,,15
75	.type    _Copy_conjoint_bytes,@function
76_Copy_conjoint_bytes:
77        pushl    %esi
78        movl     4+12(%esp),%ecx      # count
79        pushl    %edi
80        movl     8+ 4(%esp),%esi      # from
81        movl     8+ 8(%esp),%edi      # to
82        cmpl     %esi,%edi
83        leal     -1(%esi,%ecx),%eax   # from + count - 1
84        jbe      cb_CopyRight
85        cmpl     %eax,%edi
86        jbe      cb_CopyLeft
87        # copy from low to high
88cb_CopyRight:
89        cmpl     $3,%ecx
90        jbe      5f                   # <= 3 bytes
91        # align source address at dword address boundary
92        movl     %ecx,%eax            # original count
93        movl     $4,%ecx
94        subl     %esi,%ecx
95        andl     $3,%ecx              # prefix byte count
96        jz       1f                   # no prefix
97        subl     %ecx,%eax            # byte count less prefix
98        # copy prefix
99        subl     %esi,%edi
1000:      movb     (%esi),%dl
101        movb     %dl,(%edi,%esi,1)
102        addl     $1,%esi
103        subl     $1,%ecx
104        jnz      0b
105        addl     %esi,%edi
1061:      movl     %eax,%ecx            # byte count less prefix
107        shrl     $2,%ecx              # dword count
108        jz       4f                   # no dwords to move
109        cmpl     $32,%ecx
110        jbe      2f                   # <= 32 dwords
111        # copy aligned dwords
112        rep;     smovl
113        jmp      4f
114        # copy aligned dwords
1152:      subl     %esi,%edi
116        .p2align 4,,15
1173:      movl     (%esi),%edx
118        movl     %edx,(%edi,%esi,1)
119        addl     $4,%esi
120        subl     $1,%ecx
121        jnz      3b
122        addl     %esi,%edi
1234:      movl     %eax,%ecx            # byte count less prefix
1245:      andl     $3,%ecx              # suffix byte count
125        jz       7f                   # no suffix
126        # copy suffix
127        xorl     %eax,%eax
1286:      movb     (%esi,%eax,1),%dl
129        movb     %dl,(%edi,%eax,1)
130        addl     $1,%eax
131        subl     $1,%ecx
132        jnz      6b
1337:      popl     %edi
134        popl     %esi
135        ret
136        # copy from high to low
137cb_CopyLeft:
138        std
139        leal     -4(%edi,%ecx),%edi   # to + count - 4
140        movl     %eax,%esi            # from + count - 1
141        movl     %ecx,%eax
142        subl     $3,%esi              # from + count - 4
143        cmpl     $3,%ecx
144        jbe      5f                   # <= 3 bytes
1451:      shrl     $2,%ecx              # dword count
146        jz       4f                   # no dwords to move
147        cmpl     $32,%ecx
148        ja       3f                   # > 32 dwords
149        # copy dwords, aligned or not
150        subl     %esi,%edi
151        .p2align 4,,15
1522:      movl     (%esi),%edx
153        movl     %edx,(%edi,%esi,1)
154        subl     $4,%esi
155        subl     $1,%ecx
156        jnz      2b
157        addl     %esi,%edi
158        jmp      4f
159        # copy dwords, aligned or not
1603:      rep;     smovl
1614:      movl     %eax,%ecx            # byte count
1625:      andl     $3,%ecx              # suffix byte count
163        jz       7f                   # no suffix
164        # copy suffix
165        subl     %esi,%edi
166        addl     $3,%esi
1676:      movb     (%esi),%dl
168        movb     %dl,(%edi,%esi,1)
169	subl     $1,%esi
170        subl     $1,%ecx
171        jnz      6b
1727:      cld
173        popl     %edi
174        popl     %esi
175        ret
176
177        # Support for void Copy::arrayof_conjoint_bytes(void* from,
178        #                                               void* to,
179        #                                               size_t count)
180        #
181        # Same as _Copy_conjoint_bytes, except no source alignment check.
182        .p2align 4,,15
183	.type    _Copy_arrayof_conjoint_bytes,@function
184_Copy_arrayof_conjoint_bytes:
185        pushl    %esi
186        movl     4+12(%esp),%ecx      # count
187        pushl    %edi
188        movl     8+ 4(%esp),%esi      # from
189        movl     8+ 8(%esp),%edi      # to
190        cmpl     %esi,%edi
191        leal     -1(%esi,%ecx),%eax   # from + count - 1
192        jbe      acb_CopyRight
193        cmpl     %eax,%edi
194        jbe      acb_CopyLeft
195        # copy from low to high
196acb_CopyRight:
197        cmpl     $3,%ecx
198        jbe      5f
1991:      movl     %ecx,%eax
200        shrl     $2,%ecx
201        jz       4f
202        cmpl     $32,%ecx
203        ja       3f
204        # copy aligned dwords
205        subl     %esi,%edi
206        .p2align 4,,15
2072:      movl     (%esi),%edx
208        movl     %edx,(%edi,%esi,1)
209        addl     $4,%esi
210        subl     $1,%ecx
211        jnz      2b
212        addl     %esi,%edi
213        jmp      4f
214        # copy aligned dwords
2153:      rep;     smovl
2164:      movl     %eax,%ecx
2175:      andl     $3,%ecx
218        jz       7f
219        # copy suffix
220        xorl     %eax,%eax
2216:      movb     (%esi,%eax,1),%dl
222        movb     %dl,(%edi,%eax,1)
223        addl     $1,%eax
224        subl     $1,%ecx
225        jnz      6b
2267:      popl     %edi
227        popl     %esi
228        ret
229acb_CopyLeft:
230        std
231        leal     -4(%edi,%ecx),%edi   # to + count - 4
232        movl     %eax,%esi            # from + count - 1
233        movl     %ecx,%eax
234        subl     $3,%esi              # from + count - 4
235        cmpl     $3,%ecx
236        jbe      5f
2371:      shrl     $2,%ecx
238        jz       4f
239        cmpl     $32,%ecx
240        jbe      2f                   # <= 32 dwords
241        rep;     smovl
242        jmp      4f
243	.=.+8
2442:      subl     %esi,%edi
245        .p2align 4,,15
2463:      movl     (%esi),%edx
247        movl     %edx,(%edi,%esi,1)
248        subl     $4,%esi
249        subl     $1,%ecx
250        jnz      3b
251        addl     %esi,%edi
2524:      movl     %eax,%ecx
2535:      andl     $3,%ecx
254        jz       7f
255        subl     %esi,%edi
256        addl     $3,%esi
2576:      movb     (%esi),%dl
258        movb     %dl,(%edi,%esi,1)
259	subl     $1,%esi
260        subl     $1,%ecx
261        jnz      6b
2627:      cld
263        popl     %edi
264        popl     %esi
265        ret
266
267        # Support for void Copy::conjoint_jshorts_atomic(void* from,
268        #                                                void* to,
269        #                                                size_t count)
270        .p2align 4,,15
271	.type    _Copy_conjoint_jshorts_atomic,@function
272_Copy_conjoint_jshorts_atomic:
273        pushl    %esi
274        movl     4+12(%esp),%ecx      # count
275        pushl    %edi
276        movl     8+ 4(%esp),%esi      # from
277        movl     8+ 8(%esp),%edi      # to
278        cmpl     %esi,%edi
279        leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
280        jbe      cs_CopyRight
281        cmpl     %eax,%edi
282        jbe      cs_CopyLeft
283        # copy from low to high
284cs_CopyRight:
285        # align source address at dword address boundary
286        movl     %esi,%eax            # original from
287        andl     $3,%eax              # either 0 or 2
288        jz       1f                   # no prefix
289        # copy prefix
290        subl     $1,%ecx
291        jl       5f                   # zero count
292        movw     (%esi),%dx
293        movw     %dx,(%edi)
294        addl     %eax,%esi            # %eax == 2
295        addl     %eax,%edi
2961:      movl     %ecx,%eax            # word count less prefix
297        sarl     %ecx                 # dword count
298        jz       4f                   # no dwords to move
299        cmpl     $32,%ecx
300        jbe      2f                   # <= 32 dwords
301        # copy aligned dwords
302        rep;     smovl
303        jmp      4f
304        # copy aligned dwords
3052:      subl     %esi,%edi
306        .p2align 4,,15
3073:      movl     (%esi),%edx
308        movl     %edx,(%edi,%esi,1)
309        addl     $4,%esi
310        subl     $1,%ecx
311        jnz      3b
312        addl     %esi,%edi
3134:      andl     $1,%eax              # suffix count
314        jz       5f                   # no suffix
315        # copy suffix
316        movw     (%esi),%dx
317        movw     %dx,(%edi)
3185:      popl     %edi
319        popl     %esi
320        ret
321        # copy from high to low
322cs_CopyLeft:
323        std
324        leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
325        movl     %eax,%esi            # from + count*2 - 2
326        movl     %ecx,%eax
327        subl     $2,%esi              # from + count*2 - 4
3281:      sarl     %ecx                 # dword count
329        jz       4f                   # no dwords to move
330        cmpl     $32,%ecx
331        ja       3f                   # > 32 dwords
332        subl     %esi,%edi
333        .p2align 4,,15
3342:      movl     (%esi),%edx
335        movl     %edx,(%edi,%esi,1)
336        subl     $4,%esi
337        subl     $1,%ecx
338        jnz      2b
339        addl     %esi,%edi
340        jmp      4f
3413:      rep;     smovl
3424:      andl     $1,%eax              # suffix count
343        jz       5f                   # no suffix
344        # copy suffix
345        addl     $2,%esi
346        addl     $2,%edi
347        movw     (%esi),%dx
348        movw     %dx,(%edi)
3495:      cld
350        popl     %edi
351        popl     %esi
352        ret
353
354        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
355        #                                                 void* to,
356        #                                                 size_t count)
357        .p2align 4,,15
358	.type    _Copy_arrayof_conjoint_jshorts,@function
359_Copy_arrayof_conjoint_jshorts:
360        pushl    %esi
361        movl     4+12(%esp),%ecx      # count
362        pushl    %edi
363        movl     8+ 4(%esp),%esi      # from
364        movl     8+ 8(%esp),%edi      # to
365        cmpl     %esi,%edi
366        leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
367        jbe      acs_CopyRight
368        cmpl     %eax,%edi
369        jbe      acs_CopyLeft
370acs_CopyRight:
371        movl     %ecx,%eax            # word count
372        sarl     %ecx                 # dword count
373        jz       4f                   # no dwords to move
374        cmpl     $32,%ecx
375        jbe      2f                   # <= 32 dwords
376        # copy aligned dwords
377        rep;     smovl
378        jmp      4f
379        # copy aligned dwords
380        .=.+5
3812:      subl     %esi,%edi
382        .p2align 4,,15
3833:      movl     (%esi),%edx
384        movl     %edx,(%edi,%esi,1)
385        addl     $4,%esi
386        subl     $1,%ecx
387        jnz      3b
388        addl     %esi,%edi
3894:      andl     $1,%eax              # suffix count
390        jz       5f                   # no suffix
391        # copy suffix
392        movw     (%esi),%dx
393        movw     %dx,(%edi)
3945:      popl     %edi
395        popl     %esi
396        ret
397acs_CopyLeft:
398        std
399        leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
400        movl     %eax,%esi            # from + count*2 - 2
401        movl     %ecx,%eax
402        subl     $2,%esi              # from + count*2 - 4
403        sarl     %ecx                 # dword count
404        jz       4f                   # no dwords to move
405        cmpl     $32,%ecx
406        ja       3f                   # > 32 dwords
407        subl     %esi,%edi
408        .p2align 4,,15
4092:      movl     (%esi),%edx
410        movl     %edx,(%edi,%esi,1)
411        subl     $4,%esi
412        subl     $1,%ecx
413        jnz      2b
414        addl     %esi,%edi
415        jmp      4f
4163:      rep;     smovl
4174:      andl     $1,%eax              # suffix count
418        jz       5f                   # no suffix
419        # copy suffix
420        addl     $2,%esi
421        addl     $2,%edi
422        movw     (%esi),%dx
423        movw     %dx,(%edi)
4245:      cld
425        popl     %edi
426        popl     %esi
427        ret
428
429        # Support for void Copy::conjoint_jints_atomic(void* from,
430        #                                              void* to,
431        #                                              size_t count)
432        # Equivalent to
433        #   arrayof_conjoint_jints
434        .p2align 4,,15
435	.type    _Copy_conjoint_jints_atomic,@function
436	.type    _Copy_arrayof_conjoint_jints,@function
437_Copy_conjoint_jints_atomic:
438_Copy_arrayof_conjoint_jints:
439        pushl    %esi
440        movl     4+12(%esp),%ecx      # count
441        pushl    %edi
442        movl     8+ 4(%esp),%esi      # from
443        movl     8+ 8(%esp),%edi      # to
444        cmpl     %esi,%edi
445        leal     -4(%esi,%ecx,4),%eax # from + count*4 - 4
446        jbe      ci_CopyRight
447        cmpl     %eax,%edi
448        jbe      ci_CopyLeft
449ci_CopyRight:
450        cmpl     $32,%ecx
451        jbe      2f                   # <= 32 dwords
452        rep;     smovl
453        popl     %edi
454        popl     %esi
455        ret
456        .=.+10
4572:      subl     %esi,%edi
458        jmp      4f
459        .p2align 4,,15
4603:      movl     (%esi),%edx
461        movl     %edx,(%edi,%esi,1)
462        addl     $4,%esi
4634:      subl     $1,%ecx
464        jge      3b
465        popl     %edi
466        popl     %esi
467        ret
468ci_CopyLeft:
469        std
470        leal     -4(%edi,%ecx,4),%edi # to + count*4 - 4
471        cmpl     $32,%ecx
472        ja       4f                   # > 32 dwords
473        subl     %eax,%edi            # eax == from + count*4 - 4
474        jmp      3f
475        .p2align 4,,15
4762:      movl     (%eax),%edx
477        movl     %edx,(%edi,%eax,1)
478        subl     $4,%eax
4793:      subl     $1,%ecx
480        jge      2b
481        cld
482        popl     %edi
483        popl     %esi
484        ret
4854:      movl     %eax,%esi            # from + count*4 - 4
486        rep;     smovl
487        cld
488        popl     %edi
489        popl     %esi
490        ret
491
492        # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
493        #                                               jlong* to,
494        #                                               size_t count)
495        #
496        # 32-bit
497        #
498        # count treated as signed
499        #
500        # if (from > to) {
501        #   while (--count >= 0) {
502        #     *to++ = *from++;
503        #   }
504        # } else {
505        #   while (--count >= 0) {
506        #     to[count] = from[count];
507        #   }
508        # }
509        .p2align 4,,15
510	.type    _Copy_conjoint_jlongs_atomic,@function
511_Copy_conjoint_jlongs_atomic:
512        movl     4+8(%esp),%ecx       # count
513        movl     4+0(%esp),%eax       # from
514        movl     4+4(%esp),%edx       # to
515        cmpl     %eax,%edx
516        jae      cla_CopyLeft
517cla_CopyRight:
518        subl     %eax,%edx
519        jmp      2f
520        .p2align 4,,15
5211:      fildll   (%eax)
522        fistpll  (%edx,%eax,1)
523        addl     $8,%eax
5242:      subl     $1,%ecx
525        jge      1b
526        ret
527        .p2align 4,,15
5283:      fildll   (%eax,%ecx,8)
529        fistpll  (%edx,%ecx,8)
530cla_CopyLeft:
531        subl     $1,%ecx
532        jge      3b
533        ret
534
535        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
536        #                                                 void* to,
537        #                                                 size_t count)
538        .p2align 4,,15
539	.type    _mmx_Copy_arrayof_conjoint_jshorts,@function
540_mmx_Copy_arrayof_conjoint_jshorts:
541        pushl    %esi
542        movl     4+12(%esp),%ecx
543        pushl    %edi
544        movl     8+ 4(%esp),%esi
545        movl     8+ 8(%esp),%edi
546        cmpl     %esi,%edi
547        leal     -2(%esi,%ecx,2),%eax
548        jbe      mmx_acs_CopyRight
549        cmpl     %eax,%edi
550        jbe      mmx_acs_CopyLeft
551mmx_acs_CopyRight:
552        movl     %ecx,%eax
553        sarl     %ecx
554        je       5f
555        cmpl     $33,%ecx
556        jae      3f
5571:      subl     %esi,%edi
558        .p2align 4,,15
5592:      movl     (%esi),%edx
560        movl     %edx,(%edi,%esi,1)
561        addl     $4,%esi
562        subl     $1,%ecx
563        jnz      2b
564        addl     %esi,%edi
565        jmp      5f
5663:      smovl # align to 8 bytes, we know we are 4 byte aligned to start
567        subl     $1,%ecx
5684:      .p2align 4,,15
569        movq     0(%esi),%mm0
570        addl     $64,%edi
571        movq     8(%esi),%mm1
572        subl     $16,%ecx
573        movq     16(%esi),%mm2
574        movq     %mm0,-64(%edi)
575        movq     24(%esi),%mm0
576        movq     %mm1,-56(%edi)
577        movq     32(%esi),%mm1
578        movq     %mm2,-48(%edi)
579        movq     40(%esi),%mm2
580        movq     %mm0,-40(%edi)
581        movq     48(%esi),%mm0
582        movq     %mm1,-32(%edi)
583        movq     56(%esi),%mm1
584        movq     %mm2,-24(%edi)
585        movq     %mm0,-16(%edi)
586        addl     $64,%esi
587        movq     %mm1,-8(%edi)
588        cmpl     $16,%ecx
589        jge      4b
590        emms
591	testl    %ecx,%ecx
592	ja       1b
5935:      andl     $1,%eax
594        je       7f
5956:      movw     (%esi),%dx
596        movw     %dx,(%edi)
5977:	popl     %edi
598        popl     %esi
599        ret
600mmx_acs_CopyLeft:
601        std
602        leal     -4(%edi,%ecx,2),%edi
603        movl     %eax,%esi
604        movl     %ecx,%eax
605        subl     $2,%esi
606        sarl     %ecx
607        je       4f
608        cmpl     $32,%ecx
609        ja       3f
610        subl     %esi,%edi
611        .p2align 4,,15
6122:      movl     (%esi),%edx
613        movl     %edx,(%edi,%esi,1)
614        subl     $4,%esi
615        subl     $1,%ecx
616        jnz      2b
617        addl     %esi,%edi
618        jmp      4f
6193:      rep;     smovl
6204:      andl     $1,%eax
621        je       6f
622        addl     $2,%esi
623        addl     $2,%edi
6245:      movw     (%esi),%dx
625        movw     %dx,(%edi)
6266:      cld
627        popl     %edi
628        popl     %esi
629        ret
630
631
632        # Support for jlong Atomic::cmpxchg(jlong exchange_value,
633        #                                   volatile jlong* dest,
634        #                                   jlong compare_value,
635        #                                   bool is_MP)
636        #
637        .p2align 4,,15
638	.type    _Atomic_cmpxchg_long,@function
639_Atomic_cmpxchg_long:
640                                   #  8(%esp) : return PC
641        pushl    %ebx              #  4(%esp) : old %ebx
642        pushl    %edi              #  0(%esp) : old %edi
643        movl     12(%esp), %ebx    # 12(%esp) : exchange_value (low)
644        movl     16(%esp), %ecx    # 16(%esp) : exchange_value (high)
645        movl     24(%esp), %eax    # 24(%esp) : compare_value (low)
646        movl     28(%esp), %edx    # 28(%esp) : compare_value (high)
647        movl     20(%esp), %edi    # 20(%esp) : dest
648        cmpl     $0, 32(%esp)      # 32(%esp) : is_MP
649        je       1f
650        lock
6511:      cmpxchg8b (%edi)
652        popl     %edi
653        popl     %ebx
654        ret
655
656