linux_x86_32.s revision 0:a61af66fc99e
1#
2# Copyright 2004-2007 Sun Microsystems, Inc.  All Rights Reserved.
3# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4#
5# This code is free software; you can redistribute it and/or modify it
6# under the terms of the GNU General Public License version 2 only, as
7# published by the Free Software Foundation.
8#
9# This code is distributed in the hope that it will be useful, but WITHOUT
10# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12# version 2 for more details (a copy is included in the LICENSE file that
13# accompanied this code).
14#
15# You should have received a copy of the GNU General Public License version
16# 2 along with this work; if not, write to the Free Software Foundation,
17# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18#
19# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20# CA 95054 USA or visit www.sun.com if you need additional information or
21# have any questions.
22#
23
24
25        # NOTE WELL!  The _Copy functions are called directly
26	# from server-compiler-generated code via CallLeafNoFP,
27	# which means that they *must* either not use floating
28	# point or use it in the same manner as does the server
29	# compiler.
30
31        .globl _Copy_conjoint_bytes
32        .globl _Copy_arrayof_conjoint_bytes
33        .globl _Copy_conjoint_jshorts_atomic
34	.globl _Copy_arrayof_conjoint_jshorts
35        .globl _Copy_conjoint_jints_atomic
36        .globl _Copy_arrayof_conjoint_jints
37	.globl _Copy_conjoint_jlongs_atomic
38	.globl _mmx_Copy_arrayof_conjoint_jshorts
39
40        .globl _Atomic_cmpxchg_long
41
42	.text
43
44        .globl  SafeFetch32, Fetch32PFI, Fetch32Resume
45        .globl  SafeFetchN
46        ## TODO: avoid exposing Fetch32PFI and Fetch32Resume.
47        ## Instead, the signal handler would call a new SafeFetchTriage(FaultingEIP)
48        ## routine to vet the address.  If the address is the faulting LD then
49        ## SafeFetchTriage() would return the resume-at EIP, otherwise null.
50	.type    SafeFetch32,@function
51        .p2align 4,,15
52SafeFetch32:
53SafeFetchN:
54         movl    0x8(%esp), %eax
55         movl    0x4(%esp), %ecx
56Fetch32PFI:
57         movl    (%ecx), %eax
58Fetch32Resume:
59         ret
60
61
62        .globl  SpinPause
63	.type   SpinPause,@function
64        .p2align 4,,15
65SpinPause:
66        rep
67        nop
68        movl    $1, %eax
69        ret
70
71        # Support for void Copy::conjoint_bytes(void* from,
72        #                                       void* to,
73        #                                       size_t count)
74        .p2align 4,,15
75	.type    _Copy_conjoint_bytes,@function
76_Copy_conjoint_bytes:
77        pushl    %esi
78        movl     4+12(%esp),%ecx      # count
79        pushl    %edi
80        movl     8+ 4(%esp),%esi      # from
81        movl     8+ 8(%esp),%edi      # to
82        cmpl     %esi,%edi
83        leal     -1(%esi,%ecx),%eax   # from + count - 1
84        jbe      cb_CopyRight
85        cmpl     %eax,%edi
86        jbe      cb_CopyLeft
87        # copy from low to high
88cb_CopyRight:
89        cmpl     $3,%ecx
90        jbe      5f                   # <= 3 bytes
91        # align source address at dword address boundary
92        movl     %ecx,%eax            # original count
93        movl     $4,%ecx
94        subl     %esi,%ecx
95        andl     $3,%ecx              # prefix byte count
96        jz       1f                   # no prefix
97        subl     %ecx,%eax            # byte count less prefix
98        # copy prefix
99        subl     %esi,%edi
1000:      movb     (%esi),%dl
101        movb     %dl,(%edi,%esi,1)
102        addl     $1,%esi
103        subl     $1,%ecx
104        jnz      0b
105        addl     %esi,%edi
1061:      movl     %eax,%ecx            # byte count less prefix
107        shrl     $2,%ecx              # dword count
108        jz       4f                   # no dwords to move
109        cmpl     $32,%ecx
110        jbe      2f                   # <= 32 dwords
111        # copy aligned dwords
112        rep;     smovl
113        jmp      4f
114        # copy aligned dwords
1152:      subl     %esi,%edi
116        .p2align 4,,15
1173:      movl     (%esi),%edx
118        movl     %edx,(%edi,%esi,1)
119        addl     $4,%esi
120        subl     $1,%ecx
121        jnz      3b
122        addl     %esi,%edi
1234:      movl     %eax,%ecx            # byte count less prefix
124        andl     $3,%ecx              # suffix byte count
125        jz       7f                   # no suffix
126        # copy suffix
1275:      xorl     %eax,%eax
1286:      movb     (%esi,%eax,1),%dl
129        movb     %dl,(%edi,%eax,1)
130        addl     $1,%eax
131        subl     $1,%ecx
132        jnz      6b
1337:      popl     %edi
134        popl     %esi
135        ret
136        # copy from high to low
137cb_CopyLeft:
138        std
139        leal     -4(%edi,%ecx),%edi   # to + count - 4
140        movl     %eax,%esi            # from + count - 1
141        movl     %ecx,%eax
142        subl     $3,%esi              # from + count - 4
143        cmpl     $3,%ecx
144        jbe      5f                   # <= 3 bytes
1451:      shrl     $2,%ecx              # dword count
146        jz       4f                   # no dwords to move
147        cmpl     $32,%ecx
148        ja       3f                   # > 32 dwords
149        # copy dwords, aligned or not
150        subl     %esi,%edi
151        .p2align 4,,15
1522:      movl     (%esi),%edx
153        movl     %edx,(%edi,%esi,1)
154        subl     $4,%esi
155        subl     $1,%ecx
156        jnz      2b
157        addl     %esi,%edi
158        jmp      4f
159        # copy dwords, aligned or not
1603:      rep;     smovl
1614:      movl     %eax,%ecx            # byte count
162        andl     $3,%ecx              # suffix byte count
163        jz       7f                   # no suffix
164        # copy suffix
1655:      subl     %esi,%edi
166        addl     $3,%esi
1676:      movb     (%esi),%dl
168        movb     %dl,(%edi,%esi,1)
169	subl     $1,%esi
170        subl     $1,%ecx
171        jnz      6b
1727:      cld
173        popl     %edi
174        popl     %esi
175        ret
176
177        # Support for void Copy::arrayof_conjoint_bytes(void* from,
178        #                                               void* to,
179        #                                               size_t count)
180        #
181        # Same as _Copy_conjoint_bytes, except no source alignment check.
182        .p2align 4,,15
183	.type    _Copy_arrayof_conjoint_bytes,@function
184_Copy_arrayof_conjoint_bytes:
185        pushl    %esi
186        movl     4+12(%esp),%ecx      # count
187        pushl    %edi
188        movl     8+ 4(%esp),%esi      # from
189        movl     8+ 8(%esp),%edi      # to
190        cmpl     %esi,%edi
191        leal     -1(%esi,%ecx),%eax   # from + count - 1
192        jbe      acb_CopyRight
193        cmpl     %eax,%edi
194        jbe      acb_CopyLeft
195        # copy from low to high
196acb_CopyRight:
197        cmpl     $3,%ecx
198        jbe      5f
1991:      movl     %ecx,%eax
200        shrl     $2,%ecx
201        jz       4f
202        cmpl     $32,%ecx
203        ja       3f
204        # copy aligned dwords
205        subl     %esi,%edi
206        .p2align 4,,15
2072:      movl     (%esi),%edx
208        movl     %edx,(%edi,%esi,1)
209        addl     $4,%esi
210        subl     $1,%ecx
211        jnz      2b
212        addl     %esi,%edi
213        jmp      4f
214        # copy aligned dwords
2153:      rep;     smovl
2164:      movl     %eax,%ecx
217        andl     $3,%ecx
218        jz       7f
219        # copy suffix
2205:      xorl     %eax,%eax
2216:      movb     (%esi,%eax,1),%dl
222        movb     %dl,(%edi,%eax,1)
223        addl     $1,%eax
224        subl     $1,%ecx
225        jnz      6b
2267:      popl     %edi
227        popl     %esi
228        ret
229acb_CopyLeft:
230        std
231        leal     -4(%edi,%ecx),%edi   # to + count - 4
232        movl     %eax,%esi            # from + count - 1
233        movl     %ecx,%eax
234        subl     $3,%esi              # from + count - 4
235        cmpl     $3,%ecx
236        jbe      5f
2371:      shrl     $2,%ecx
238        jz       4f
239        cmpl     $32,%ecx
240        jbe      2f                   # <= 32 dwords
241        rep;     smovl
242        jmp      4f
243	.=.+8
2442:      subl     %esi,%edi
245        .p2align 4,,15
2463:      movl     (%esi),%edx
247        movl     %edx,(%edi,%esi,1)
248        subl     $4,%esi
249        subl     $1,%ecx
250        jnz      3b
251        addl     %esi,%edi
2524:      movl     %eax,%ecx
253        andl     $3,%ecx
254        jz       7f
2555:      subl     %esi,%edi
256        addl     $3,%esi
2576:      movb     (%esi),%dl
258        movb     %dl,(%edi,%esi,1)
259	subl     $1,%esi
260        subl     $1,%ecx
261        jnz      6b
2627:      cld
263        popl     %edi
264        popl     %esi
265        ret
266
267        # Support for void Copy::conjoint_jshorts_atomic(void* from,
268        #                                                void* to,
269        #                                                size_t count)
270        .p2align 4,,15
271	.type    _Copy_conjoint_jshorts_atomic,@function
272_Copy_conjoint_jshorts_atomic:
273        pushl    %esi
274        movl     4+12(%esp),%ecx      # count
275        pushl    %edi
276        movl     8+ 4(%esp),%esi      # from
277        movl     8+ 8(%esp),%edi      # to
278        cmpl     %esi,%edi
279        leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
280        jbe      cs_CopyRight
281        cmpl     %eax,%edi
282        jbe      cs_CopyLeft
283        # copy from low to high
284cs_CopyRight:
285        # align source address at dword address boundary
286        movl     %esi,%eax            # original from
287        andl     $3,%eax              # either 0 or 2
288        jz       1f                   # no prefix
289        # copy prefix
290        movw     (%esi),%dx
291        movw     %dx,(%edi)
292        addl     %eax,%esi            # %eax == 2
293        addl     %eax,%edi
294        subl     $1,%ecx
2951:      movl     %ecx,%eax            # word count less prefix
296        sarl     %ecx                 # dword count
297        jz       4f                   # no dwords to move
298        cmpl     $32,%ecx
299        jbe      2f                   # <= 32 dwords
300        # copy aligned dwords
301        rep;     smovl
302        jmp      4f
303        # copy aligned dwords
3042:      subl     %esi,%edi
305        .p2align 4,,15
3063:      movl     (%esi),%edx
307        movl     %edx,(%edi,%esi,1)
308        addl     $4,%esi
309        subl     $1,%ecx
310        jnz      3b
311        addl     %esi,%edi
3124:      andl     $1,%eax              # suffix count
313        jz       5f                   # no suffix
314        # copy suffix
315        movw     (%esi),%dx
316        movw     %dx,(%edi)
3175:      popl     %edi
318        popl     %esi
319        ret
320        # copy from high to low
321cs_CopyLeft:
322        std
323        leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
324        movl     %eax,%esi            # from + count*2 - 2
325        movl     %ecx,%eax
326        subl     $2,%esi              # from + count*2 - 4
3271:      sarl     %ecx                 # dword count
328        jz       4f                   # no dwords to move
329        cmpl     $32,%ecx
330        ja       3f                   # > 32 dwords
331        subl     %esi,%edi
332        .p2align 4,,15
3332:      movl     (%esi),%edx
334        movl     %edx,(%edi,%esi,1)
335        subl     $4,%esi
336        subl     $1,%ecx
337        jnz      2b
338        addl     %esi,%edi
339        jmp      4f
3403:      rep;     smovl
3414:      andl     $1,%eax              # suffix count
342        jz       5f                   # no suffix
343        # copy suffix
344        addl     $2,%esi
345        addl     $2,%edi
346        movw     (%esi),%dx
347        movw     %dx,(%edi)
3485:      cld
349        popl     %edi
350        popl     %esi
351        ret
352
353        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
354        #                                                 void* to,
355        #                                                 size_t count)
356        .p2align 4,,15
357	.type    _Copy_arrayof_conjoint_jshorts,@function
358_Copy_arrayof_conjoint_jshorts:
359        pushl    %esi
360        movl     4+12(%esp),%ecx      # count
361        pushl    %edi
362        movl     8+ 4(%esp),%esi      # from
363        movl     8+ 8(%esp),%edi      # to
364        cmpl     %esi,%edi
365        leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
366        jbe      acs_CopyRight
367        cmpl     %eax,%edi
368        jbe      acs_CopyLeft
369acs_CopyRight:
370        movl     %ecx,%eax            # word count
371        sarl     %ecx                 # dword count
372        jz       4f                   # no dwords to move
373        cmpl     $32,%ecx
374        jbe      2f                   # <= 32 dwords
375        # copy aligned dwords
376        rep;     smovl
377        jmp      4f
378        # copy aligned dwords
379        .=.+5
3802:      subl     %esi,%edi
381        .p2align 4,,15
3823:      movl     (%esi),%edx
383        movl     %edx,(%edi,%esi,1)
384        addl     $4,%esi
385        subl     $1,%ecx
386        jnz      3b
387        addl     %esi,%edi
3884:      andl     $1,%eax              # suffix count
389        jz       5f                   # no suffix
390        # copy suffix
391        movw     (%esi),%dx
392        movw     %dx,(%edi)
3935:      popl     %edi
394        popl     %esi
395        ret
396acs_CopyLeft:
397        std
398        leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
399        movl     %eax,%esi            # from + count*2 - 2
400        movl     %ecx,%eax
401        subl     $2,%esi              # from + count*2 - 4
402        sarl     %ecx                 # dword count
403        jz       4f                   # no dwords to move
404        cmpl     $32,%ecx
405        ja       3f                   # > 32 dwords
406        subl     %esi,%edi
407        .p2align 4,,15
4082:      movl     (%esi),%edx
409        movl     %edx,(%edi,%esi,1)
410        subl     $4,%esi
411        subl     $1,%ecx
412        jnz      2b
413        addl     %esi,%edi
414        jmp      4f
4153:      rep;     smovl
4164:      andl     $1,%eax              # suffix count
417        jz       5f                   # no suffix
418        # copy suffix
419        addl     $2,%esi
420        addl     $2,%edi
421        movw     (%esi),%dx
422        movw     %dx,(%edi)
4235:      cld
424        popl     %edi
425        popl     %esi
426        ret
427
428        # Support for void Copy::conjoint_jints_atomic(void* from,
429        #                                              void* to,
430        #                                              size_t count)
431        # Equivalent to
432        #   arrayof_conjoint_jints
433        .p2align 4,,15
434	.type    _Copy_conjoint_jints_atomic,@function
435	.type    _Copy_arrayof_conjoint_jints,@function
436_Copy_conjoint_jints_atomic:
437_Copy_arrayof_conjoint_jints:
438        pushl    %esi
439        movl     4+12(%esp),%ecx      # count
440        pushl    %edi
441        movl     8+ 4(%esp),%esi      # from
442        movl     8+ 8(%esp),%edi      # to
443        cmpl     %esi,%edi
444        leal     -4(%esi,%ecx,4),%eax # from + count*4 - 4
445        jbe      ci_CopyRight
446        cmpl     %eax,%edi
447        jbe      ci_CopyLeft
448ci_CopyRight:
449        cmpl     $32,%ecx
450        jbe      2f                   # <= 32 dwords
451        rep;     smovl
452        popl     %edi
453        popl     %esi
454        ret
455        .=.+10
4562:      subl     %esi,%edi
457        .p2align 4,,15
4583:      movl     (%esi),%edx
459        movl     %edx,(%edi,%esi,1)
460        addl     $4,%esi
461        subl     $1,%ecx
462        jnz      3b
463        popl     %edi
464        popl     %esi
465        ret
466ci_CopyLeft:
467        std
468        leal     -4(%edi,%ecx,4),%edi # to + count*4 - 4
469        cmpl     $32,%ecx
470        ja       3f                   # > 32 dwords
471        subl     %eax,%edi            # eax == from + count*4 - 4
472        .p2align 4,,15
4732:      movl     (%eax),%edx
474        movl     %edx,(%edi,%eax,1)
475        subl     $4,%eax
476        subl     $1,%ecx
477        jnz      2b
478        cld
479        popl     %edi
480        popl     %esi
481        ret
4823:      movl     %eax,%esi            # from + count*4 - 4
483        rep;     smovl
484        cld
485        popl     %edi
486        popl     %esi
487        ret
488
489        # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
490        #                                               jlong* to,
491        #                                               size_t count)
492        #
493        # 32-bit
494        #
495        # count treated as signed
496        #
497        # if (from > to) {
498        #   while (--count >= 0) {
499        #     *to++ = *from++;
500        #   }
501        # } else {
502        #   while (--count >= 0) {
503        #     to[count] = from[count];
504        #   }
505        # }
506        .p2align 4,,15
507	.type    _Copy_conjoint_jlongs_atomic,@function
508_Copy_conjoint_jlongs_atomic:
509        movl     4+8(%esp),%ecx       # count
510        movl     4+0(%esp),%eax       # from
511        movl     4+4(%esp),%edx       # to
512        cmpl     %eax,%edx
513        jae      cla_CopyLeft
514cla_CopyRight:
515        subl     %eax,%edx
516        jmp      2f
517        .p2align 4,,15
5181:      fildll   (%eax)
519        fistpll  (%edx,%eax,1)
520        addl     $8,%eax
5212:      subl     $1,%ecx
522        jge      1b
523        ret
524        .p2align 4,,15
5253:      fildll   (%eax,%ecx,8)
526        fistpll  (%edx,%ecx,8)
527cla_CopyLeft:
528        subl     $1,%ecx
529        jge      3b
530        ret
531
532        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
533        #                                                 void* to,
534        #                                                 size_t count)
535        .p2align 4,,15
536	.type    _mmx_Copy_arrayof_conjoint_jshorts,@function
537_mmx_Copy_arrayof_conjoint_jshorts:
538        pushl    %esi
539        movl     4+12(%esp),%ecx
540        pushl    %edi
541        movl     8+ 4(%esp),%esi
542        movl     8+ 8(%esp),%edi
543        cmpl     %esi,%edi
544        leal     -2(%esi,%ecx,2),%eax
545        jbe      mmx_acs_CopyRight
546        cmpl     %eax,%edi
547        jbe      mmx_acs_CopyLeft
548mmx_acs_CopyRight:
549        movl     %ecx,%eax
550        sarl     %ecx
551        je       5f
552        cmpl     $33,%ecx
553        jae      3f
5541:      subl     %esi,%edi
555        .p2align 4,,15
5562:      movl     (%esi),%edx
557        movl     %edx,(%edi,%esi,1)
558        addl     $4,%esi
559        subl     $1,%ecx
560        jnz      2b
561        addl     %esi,%edi
562        jmp      5f
5633:      smovl # align to 8 bytes, we know we are 4 byte aligned to start
564        subl     $1,%ecx
5654:      .p2align 4,,15
566        movq     0(%esi),%mm0
567        addl     $64,%edi
568        movq     8(%esi),%mm1
569        subl     $16,%ecx
570        movq     16(%esi),%mm2
571        movq     %mm0,-64(%edi)
572        movq     24(%esi),%mm0
573        movq     %mm1,-56(%edi)
574        movq     32(%esi),%mm1
575        movq     %mm2,-48(%edi)
576        movq     40(%esi),%mm2
577        movq     %mm0,-40(%edi)
578        movq     48(%esi),%mm0
579        movq     %mm1,-32(%edi)
580        movq     56(%esi),%mm1
581        movq     %mm2,-24(%edi)
582        movq     %mm0,-16(%edi)
583        addl     $64,%esi
584        movq     %mm1,-8(%edi)
585        cmpl     $16,%ecx
586        jge      4b
587        emms
588	testl    %ecx,%ecx
589	ja       1b
5905:      andl     $1,%eax
591        je       7f
5926:      movw     (%esi),%dx
593        movw     %dx,(%edi)
5947:	popl     %edi
595        popl     %esi
596        ret
597mmx_acs_CopyLeft:
598        std
599        leal     -4(%edi,%ecx,2),%edi
600        movl     %eax,%esi
601        movl     %ecx,%eax
602        subl     $2,%esi
603        sarl     %ecx
604        je       4f
605        cmpl     $32,%ecx
606        ja       3f
607        subl     %esi,%edi
608        .p2align 4,,15
6092:      movl     (%esi),%edx
610        movl     %edx,(%edi,%esi,1)
611        subl     $4,%esi
612        subl     $1,%ecx
613        jnz      2b
614        addl     %esi,%edi
615        jmp      4f
6163:      rep;     smovl
6174:      andl     $1,%eax
618        je       6f
619        addl     $2,%esi
620        addl     $2,%edi
6215:      movw     (%esi),%dx
622        movw     %dx,(%edi)
6236:      cld
624        popl     %edi
625        popl     %esi
626        ret
627
628
629        # Support for jlong Atomic::cmpxchg(jlong exchange_value,
630        #                                   volatile jlong* dest,
631        #                                   jlong compare_value,
632        #                                   bool is_MP)
633        #
634        .p2align 4,,15
635	.type    _Atomic_cmpxchg_long,@function
636_Atomic_cmpxchg_long:
637                                   #  8(%esp) : return PC
638        pushl    %ebx              #  4(%esp) : old %ebx
639        pushl    %edi              #  0(%esp) : old %edi
640        movl     12(%esp), %ebx    # 12(%esp) : exchange_value (low)
641        movl     16(%esp), %ecx    # 16(%esp) : exchange_value (high)
642        movl     24(%esp), %eax    # 24(%esp) : compare_value (low)
643        movl     28(%esp), %edx    # 28(%esp) : compare_value (high)
644        movl     20(%esp), %edi    # 20(%esp) : dest
645        cmpl     $0, 32(%esp)      # 32(%esp) : is_MP
646        je       1f
647        lock
6481:      cmpxchg8b (%edi)
649        popl     %edi
650        popl     %ebx
651        ret
652
653