linux_x86_64.s revision 1472:c18cbe5936b8
1#
2# Copyright (c) 2004, 2007, Oracle and/or its affiliates. All rights reserved.
3# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4#
5# This code is free software; you can redistribute it and/or modify it
6# under the terms of the GNU General Public License version 2 only, as
7# published by the Free Software Foundation.
8#
9# This code is distributed in the hope that it will be useful, but WITHOUT
10# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12# version 2 for more details (a copy is included in the LICENSE file that
13# accompanied this code).
14#
15# You should have received a copy of the GNU General Public License version
16# 2 along with this work; if not, write to the Free Software Foundation,
17# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18#
19# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20# or visit www.oracle.com if you need additional information or have any
21# questions.
22#
23
24
25        # NOTE WELL!  The _Copy functions are called directly
26	# from server-compiler-generated code via CallLeafNoFP,
27	# which means that they *must* either not use floating
28	# point or use it in the same manner as does the server
29	# compiler.
30
31        .globl _Copy_arrayof_conjoint_bytes
32	.globl _Copy_arrayof_conjoint_jshorts
33        .globl _Copy_conjoint_jshorts_atomic
34        .globl _Copy_arrayof_conjoint_jints
35        .globl _Copy_conjoint_jints_atomic
36        .globl _Copy_arrayof_conjoint_jlongs
37        .globl _Copy_conjoint_jlongs_atomic
38
39	.text
40
41        .globl SafeFetch32, Fetch32PFI, Fetch32Resume
42        .align  16
43        .type   SafeFetch32,@function
44        // Prototype: int SafeFetch32 (int * Adr, int ErrValue)
45SafeFetch32:
46        movl    %esi, %eax
47Fetch32PFI:
48        movl    (%rdi), %eax
49Fetch32Resume:
50        ret
51
52        .globl SafeFetchN, FetchNPFI, FetchNResume
53        .align  16
54        .type   SafeFetchN,@function
55        // Prototype: intptr_t SafeFetchN (intptr_t * Adr, intptr_t ErrValue)
56SafeFetchN:
57        movq    %rsi, %rax
58FetchNPFI:
59        movq    (%rdi), %rax
60FetchNResume:
61        ret
62
63        .globl SpinPause
64        .align 16
65        .type  SpinPause,@function
66SpinPause:
67        rep
68        nop
69        movq   $1, %rax
70        ret
71
72        # Support for void Copy::arrayof_conjoint_bytes(void* from,
73        #                                               void* to,
74        #                                               size_t count)
75        # rdi - from
76        # rsi - to
77        # rdx - count, treated as ssize_t
78        #
79        .p2align 4,,15
80	.type    _Copy_arrayof_conjoint_bytes,@function
81_Copy_arrayof_conjoint_bytes:
82        movq     %rdx,%r8             # byte count
83        shrq     $3,%rdx              # qword count
84        cmpq     %rdi,%rsi
85        leaq     -1(%rdi,%r8,1),%rax  # from + bcount*1 - 1
86        jbe      acb_CopyRight
87        cmpq     %rax,%rsi
88        jbe      acb_CopyLeft
89acb_CopyRight:
90        leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
91        leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
92        negq     %rdx
93        jmp      7f
94        .p2align 4,,15
951:      movq     8(%rax,%rdx,8),%rsi
96        movq     %rsi,8(%rcx,%rdx,8)
97        addq     $1,%rdx
98        jnz      1b
992:      testq    $4,%r8               # check for trailing dword
100        jz       3f
101        movl     8(%rax),%esi         # copy trailing dword
102        movl     %esi,8(%rcx)
103        addq     $4,%rax
104        addq     $4,%rcx              # original %rsi is trashed, so we
105                                      #  can't use it as a base register
1063:      testq    $2,%r8               # check for trailing word
107        jz       4f
108        movw     8(%rax),%si          # copy trailing word
109        movw     %si,8(%rcx)
110        addq     $2,%rcx
1114:      testq    $1,%r8               # check for trailing byte
112        jz       5f
113        movb     -1(%rdi,%r8,1),%al   # copy trailing byte
114        movb     %al,8(%rcx)
1155:      ret
116        .p2align 4,,15
1176:      movq     -24(%rax,%rdx,8),%rsi
118        movq     %rsi,-24(%rcx,%rdx,8)
119        movq     -16(%rax,%rdx,8),%rsi
120        movq     %rsi,-16(%rcx,%rdx,8)
121        movq     -8(%rax,%rdx,8),%rsi
122        movq     %rsi,-8(%rcx,%rdx,8)
123        movq     (%rax,%rdx,8),%rsi
124        movq     %rsi,(%rcx,%rdx,8)
1257:      addq     $4,%rdx
126        jle      6b
127        subq     $4,%rdx
128        jl       1b
129        jmp      2b
130acb_CopyLeft:
131        testq    $1,%r8               # check for trailing byte
132        jz       1f
133        movb     -1(%rdi,%r8,1),%cl   # copy trailing byte
134        movb     %cl,-1(%rsi,%r8,1)
135        subq     $1,%r8               # adjust for possible trailing word
1361:      testq    $2,%r8               # check for trailing word
137        jz       2f
138        movw     -2(%rdi,%r8,1),%cx   # copy trailing word
139        movw     %cx,-2(%rsi,%r8,1)
1402:      testq    $4,%r8               # check for trailing dword
141        jz       5f
142        movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
143        movl     %ecx,(%rsi,%rdx,8)
144        jmp      5f
145        .p2align 4,,15
1463:      movq     -8(%rdi,%rdx,8),%rcx
147        movq     %rcx,-8(%rsi,%rdx,8)
148        subq     $1,%rdx
149        jnz      3b
150        ret
151        .p2align 4,,15
1524:      movq     24(%rdi,%rdx,8),%rcx
153        movq     %rcx,24(%rsi,%rdx,8)
154        movq     16(%rdi,%rdx,8),%rcx
155        movq     %rcx,16(%rsi,%rdx,8)
156        movq     8(%rdi,%rdx,8),%rcx
157        movq     %rcx,8(%rsi,%rdx,8)
158        movq     (%rdi,%rdx,8),%rcx
159        movq     %rcx,(%rsi,%rdx,8)
1605:      subq     $4,%rdx
161        jge      4b
162        addq     $4,%rdx
163        jg       3b
164        ret
165
166        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
167        #                                                 void* to,
168        #                                                 size_t count)
169        # Equivalent to
170        #   conjoint_jshorts_atomic
171        #
172        # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
173        # let the hardware handle it.  The tow or four words within dwords
174        # or qwords that span cache line boundaries will still be loaded
175        # and stored atomically.
176        #
177        # rdi - from
178        # rsi - to
179        # rdx - count, treated as ssize_t
180        #
181        .p2align 4,,15
182	.type    _Copy_arrayof_conjoint_jshorts,@function
183	.type    _Copy_conjoint_jshorts_atomic,@function
184_Copy_arrayof_conjoint_jshorts:
185_Copy_conjoint_jshorts_atomic:
186        movq     %rdx,%r8             # word count
187        shrq     $2,%rdx              # qword count
188        cmpq     %rdi,%rsi
189        leaq     -2(%rdi,%r8,2),%rax  # from + wcount*2 - 2
190        jbe      acs_CopyRight
191        cmpq     %rax,%rsi
192        jbe      acs_CopyLeft
193acs_CopyRight:
194        leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
195        leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
196        negq     %rdx
197        jmp      6f
1981:      movq     8(%rax,%rdx,8),%rsi
199        movq     %rsi,8(%rcx,%rdx,8)
200        addq     $1,%rdx
201        jnz      1b
2022:      testq    $2,%r8               # check for trailing dword
203        jz       3f
204        movl     8(%rax),%esi         # copy trailing dword
205        movl     %esi,8(%rcx)
206        addq     $4,%rcx              # original %rsi is trashed, so we
207                                      #  can't use it as a base register
2083:      testq    $1,%r8               # check for trailing word
209        jz       4f
210        movw     -2(%rdi,%r8,2),%si   # copy trailing word
211        movw     %si,8(%rcx)
2124:      ret
213        .p2align 4,,15
2145:      movq     -24(%rax,%rdx,8),%rsi
215        movq     %rsi,-24(%rcx,%rdx,8)
216        movq     -16(%rax,%rdx,8),%rsi
217        movq     %rsi,-16(%rcx,%rdx,8)
218        movq     -8(%rax,%rdx,8),%rsi
219        movq     %rsi,-8(%rcx,%rdx,8)
220        movq     (%rax,%rdx,8),%rsi
221        movq     %rsi,(%rcx,%rdx,8)
2226:      addq     $4,%rdx
223        jle      5b
224        subq     $4,%rdx
225        jl       1b
226        jmp      2b
227acs_CopyLeft:
228        testq    $1,%r8               # check for trailing word
229        jz       1f
230        movw     -2(%rdi,%r8,2),%cx   # copy trailing word
231        movw     %cx,-2(%rsi,%r8,2)
2321:      testq    $2,%r8               # check for trailing dword
233        jz       4f
234        movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
235        movl     %ecx,(%rsi,%rdx,8)
236        jmp      4f
2372:      movq     -8(%rdi,%rdx,8),%rcx
238        movq     %rcx,-8(%rsi,%rdx,8)
239        subq     $1,%rdx
240        jnz      2b
241        ret
242        .p2align 4,,15
2433:      movq     24(%rdi,%rdx,8),%rcx
244        movq     %rcx,24(%rsi,%rdx,8)
245        movq     16(%rdi,%rdx,8),%rcx
246        movq     %rcx,16(%rsi,%rdx,8)
247        movq     8(%rdi,%rdx,8),%rcx
248        movq     %rcx,8(%rsi,%rdx,8)
249        movq     (%rdi,%rdx,8),%rcx
250        movq     %rcx,(%rsi,%rdx,8)
2514:      subq     $4,%rdx
252        jge      3b
253        addq     $4,%rdx
254        jg       2b
255        ret
256
257        # Support for void Copy::arrayof_conjoint_jints(jint* from,
258        #                                               jint* to,
259        #                                               size_t count)
260        # Equivalent to
261        #   conjoint_jints_atomic
262        #
263        # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
264        # the hardware handle it.  The two dwords within qwords that span
265        # cache line boundaries will still be loaded and stored atomically.
266        #
267        # rdi - from
268        # rsi - to
269        # rdx - count, treated as ssize_t
270        #
271        .p2align 4,,15
272	.type    _Copy_arrayof_conjoint_jints,@function
273	.type    _Copy_conjoint_jints_atomic,@function
274_Copy_arrayof_conjoint_jints:
275_Copy_conjoint_jints_atomic:
276        movq     %rdx,%r8             # dword count
277        shrq     %rdx                 # qword count
278        cmpq     %rdi,%rsi
279        leaq     -4(%rdi,%r8,4),%rax  # from + dcount*4 - 4
280        jbe      aci_CopyRight
281        cmpq     %rax,%rsi
282        jbe      aci_CopyLeft
283aci_CopyRight:
284        leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
285        leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
286        negq     %rdx
287        jmp      5f
288        .p2align 4,,15
2891:      movq     8(%rax,%rdx,8),%rsi
290        movq     %rsi,8(%rcx,%rdx,8)
291        addq     $1,%rdx
292        jnz       1b
2932:      testq    $1,%r8               # check for trailing dword
294        jz       3f
295        movl     8(%rax),%esi         # copy trailing dword
296        movl     %esi,8(%rcx)
2973:      ret
298        .p2align 4,,15
2994:      movq     -24(%rax,%rdx,8),%rsi
300        movq     %rsi,-24(%rcx,%rdx,8)
301        movq     -16(%rax,%rdx,8),%rsi
302        movq     %rsi,-16(%rcx,%rdx,8)
303        movq     -8(%rax,%rdx,8),%rsi
304        movq     %rsi,-8(%rcx,%rdx,8)
305        movq     (%rax,%rdx,8),%rsi
306        movq     %rsi,(%rcx,%rdx,8)
3075:      addq     $4,%rdx
308        jle      4b
309        subq     $4,%rdx
310        jl       1b
311        jmp      2b
312aci_CopyLeft:
313        testq    $1,%r8               # check for trailing dword
314        jz       3f
315        movl     -4(%rdi,%r8,4),%ecx  # copy trailing dword
316        movl     %ecx,-4(%rsi,%r8,4)
317        jmp      3f
3181:      movq     -8(%rdi,%rdx,8),%rcx
319        movq     %rcx,-8(%rsi,%rdx,8)
320        subq     $1,%rdx
321        jnz      1b
322        ret
323        .p2align 4,,15
3242:      movq     24(%rdi,%rdx,8),%rcx
325        movq     %rcx,24(%rsi,%rdx,8)
326        movq     16(%rdi,%rdx,8),%rcx
327        movq     %rcx,16(%rsi,%rdx,8)
328        movq     8(%rdi,%rdx,8),%rcx
329        movq     %rcx,8(%rsi,%rdx,8)
330        movq     (%rdi,%rdx,8),%rcx
331        movq     %rcx,(%rsi,%rdx,8)
3323:      subq     $4,%rdx
333        jge      2b
334        addq     $4,%rdx
335        jg       1b
336        ret
337
338        # Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
339        #                                                jlong* to,
340        #                                                size_t count)
341        # Equivalent to
342        #   conjoint_jlongs_atomic
343        #   arrayof_conjoint_oops
344        #   conjoint_oops_atomic
345        #
346        # rdi - from
347        # rsi - to
348        # rdx - count, treated as ssize_t
349        #
350        .p2align 4,,15
351	.type    _Copy_arrayof_conjoint_jlongs,@function
352	.type    _Copy_conjoint_jlongs_atomic,@function
353_Copy_arrayof_conjoint_jlongs:
354_Copy_conjoint_jlongs_atomic:
355        cmpq     %rdi,%rsi
356        leaq     -8(%rdi,%rdx,8),%rax # from + count*8 - 8
357        jbe      acl_CopyRight
358        cmpq     %rax,%rsi
359        jbe      acl_CopyLeft
360acl_CopyRight:
361        leaq     -8(%rsi,%rdx,8),%rcx # to + count*8 - 8
362        negq     %rdx
363        jmp      3f
3641:      movq     8(%rax,%rdx,8),%rsi
365        movq     %rsi,8(%rcx,%rdx,8)
366        addq     $1,%rdx
367        jnz      1b
368        ret
369        .p2align 4,,15
3702:      movq     -24(%rax,%rdx,8),%rsi
371        movq     %rsi,-24(%rcx,%rdx,8)
372        movq     -16(%rax,%rdx,8),%rsi
373        movq     %rsi,-16(%rcx,%rdx,8)
374        movq     -8(%rax,%rdx,8),%rsi
375        movq     %rsi,-8(%rcx,%rdx,8)
376        movq     (%rax,%rdx,8),%rsi
377        movq     %rsi,(%rcx,%rdx,8)
3783:      addq     $4,%rdx
379        jle      2b
380        subq     $4,%rdx
381        jl       1b
382        ret
3834:      movq     -8(%rdi,%rdx,8),%rcx
384        movq     %rcx,-8(%rsi,%rdx,8)
385        subq     $1,%rdx
386        jnz      4b
387        ret
388        .p2align 4,,15
3895:      movq     24(%rdi,%rdx,8),%rcx
390        movq     %rcx,24(%rsi,%rdx,8)
391        movq     16(%rdi,%rdx,8),%rcx
392        movq     %rcx,16(%rsi,%rdx,8)
393        movq     8(%rdi,%rdx,8),%rcx
394        movq     %rcx,8(%rsi,%rdx,8)
395        movq     (%rdi,%rdx,8),%rcx
396        movq     %rcx,(%rsi,%rdx,8)
397acl_CopyLeft:
398        subq     $4,%rdx
399        jge      5b
400        addq     $4,%rdx
401        jg       4b
402        ret
403