solaris_x86_64.s revision 0:a61af66fc99e
1/
2/ Copyright 2004-2005 Sun Microsystems, Inc.  All Rights Reserved.
3/ DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4/
5/ This code is free software; you can redistribute it and/or modify it
6/ under the terms of the GNU General Public License version 2 only, as
7/ published by the Free Software Foundation.
8/
9/ This code is distributed in the hope that it will be useful, but WITHOUT
10/ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11/ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12/ version 2 for more details (a copy is included in the LICENSE file that
13/ accompanied this code).
14/
15/ You should have received a copy of the GNU General Public License version
16/ 2 along with this work; if not, write to the Free Software Foundation,
17/ Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18/
19/ Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20/ CA 95054 USA or visit www.sun.com if you need additional information or
21/ have any questions.
22/
23
24	.globl fs_load
25	.globl fs_thread
26
27        // NOTE WELL!  The _Copy functions are called directly
28	// from server-compiler-generated code via CallLeafNoFP,
29	// which means that they *must* either not use floating
30	// point or use it in the same manner as does the server
31	// compiler.
32
33        .globl _Copy_arrayof_conjoint_bytes
34        .globl _Copy_conjoint_jshorts_atomic
35	.globl _Copy_arrayof_conjoint_jshorts
36        .globl _Copy_conjoint_jints_atomic
37        .globl _Copy_arrayof_conjoint_jints
38	.globl _Copy_conjoint_jlongs_atomic
39        .globl _Copy_arrayof_conjoint_jlongs
40
41	.section .text,"ax"
42
43        / Fast thread accessors, used by threadLS_solaris_amd64.cpp
44	.align   16
45fs_load:
46	movq %fs:(%rdi),%rax
47	ret
48
49	.align   16
50fs_thread:
51	movq %fs:0x0,%rax
52	ret
53
54        .globl SafeFetch32, Fetch32PFI, Fetch32Resume
55        .align  16
56        // Prototype: int SafeFetch32 (int * Adr, int ErrValue)
57SafeFetch32:
58        movl    %esi, %eax
59Fetch32PFI:
60        movl    (%rdi), %eax
61Fetch32Resume:
62        ret
63
64        .globl SafeFetchN, FetchNPFI, FetchNResume
65        .align  16
66        // Prototype: intptr_t SafeFetchN (intptr_t * Adr, intptr_t ErrValue)
67SafeFetchN:
68        movq    %rsi, %rax
69FetchNPFI:
70        movq    (%rdi), %rax
71FetchNResume:
72        ret
73
74        .globl  SpinPause
75        .align  16
76SpinPause:
77        rep
78        nop
79        movq    $1, %rax
80        ret
81
82
83        / Support for void Copy::arrayof_conjoint_bytes(void* from,
84        /                                               void* to,
85        /                                               size_t count)
86        / rdi - from
87        / rsi - to
88        / rdx - count, treated as ssize_t
89        /
90        .align   16
91_Copy_arrayof_conjoint_bytes:
92        movq     %rdx,%r8             / byte count
93        shrq     $3,%rdx              / qword count
94        cmpq     %rdi,%rsi
95        leaq     -1(%rdi,%r8,1),%rax  / from + bcount*1 - 1
96        jbe      acb_CopyRight
97        cmpq     %rax,%rsi
98        jbe      acb_CopyLeft
99acb_CopyRight:
100        leaq     -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8
101        leaq     -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8
102        negq     %rdx
103        jmp      7f
104        .align   16
1051:      movq     8(%rax,%rdx,8),%rsi
106        movq     %rsi,8(%rcx,%rdx,8)
107        addq     $1,%rdx
108        jnz      1b
1092:      testq    $4,%r8               / check for trailing dword
110        jz       3f
111        movl     8(%rax),%esi         / copy trailing dword
112        movl     %esi,8(%rcx)
113        addq     $4,%rax
114        addq     $4,%rcx              / original %rsi is trashed, so we
115                                      /  can't use it as a base register
1163:      testq    $2,%r8               / check for trailing word
117        jz       4f
118        movw     8(%rax),%si          / copy trailing word
119        movw     %si,8(%rcx)
120        addq     $2,%rcx
1214:      testq    $1,%r8               / check for trailing byte
122        jz       5f
123        movb     -1(%rdi,%r8,1),%al   / copy trailing byte
124        movb     %al,8(%rcx)
1255:      ret
126        .align   16
1276:      movq     -24(%rax,%rdx,8),%rsi
128        movq     %rsi,-24(%rcx,%rdx,8)
129        movq     -16(%rax,%rdx,8),%rsi
130        movq     %rsi,-16(%rcx,%rdx,8)
131        movq     -8(%rax,%rdx,8),%rsi
132        movq     %rsi,-8(%rcx,%rdx,8)
133        movq     (%rax,%rdx,8),%rsi
134        movq     %rsi,(%rcx,%rdx,8)
1357:      addq     $4,%rdx
136        jle      6b
137        subq     $4,%rdx
138        jl       1b
139        jmp      2b
140acb_CopyLeft:
141        testq    $1,%r8               / check for trailing byte
142        jz       1f
143        movb     -1(%rdi,%r8,1),%cl   / copy trailing byte
144        movb     %cl,-1(%rsi,%r8,1)
145        subq     $1,%r8               / adjust for possible trailing word
1461:      testq    $2,%r8               / check for trailing word
147        jz       2f
148        movw     -2(%rdi,%r8,1),%cx   / copy trailing word
149        movw     %cx,-2(%rsi,%r8,1)
1502:      testq    $4,%r8               / check for trailing dword
151        jz       5f
152        movl     (%rdi,%rdx,8),%ecx   / copy trailing dword
153        movl     %ecx,(%rsi,%rdx,8)
154        jmp      5f
155        .align   16
1563:      movq     -8(%rdi,%rdx,8),%rcx
157        movq     %rcx,-8(%rsi,%rdx,8)
158        subq     $1,%rdx
159        jnz      3b
160        ret
161        .align   16
1624:      movq     24(%rdi,%rdx,8),%rcx
163        movq     %rcx,24(%rsi,%rdx,8)
164        movq     16(%rdi,%rdx,8),%rcx
165        movq     %rcx,16(%rsi,%rdx,8)
166        movq     8(%rdi,%rdx,8),%rcx
167        movq     %rcx,8(%rsi,%rdx,8)
168        movq     (%rdi,%rdx,8),%rcx
169        movq     %rcx,(%rsi,%rdx,8)
1705:      subq     $4,%rdx
171        jge      4b
172        addq     $4,%rdx
173        jg       3b
174        ret
175
176        / Support for void Copy::arrayof_conjoint_jshorts(void* from,
177        /                                                 void* to,
178        /                                                 size_t count)
179        / Equivalent to
180        /   conjoint_jshorts_atomic
181        /
182        / If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
183        / let the hardware handle it.  The tow or four words within dwords
184        / or qwords that span cache line boundaries will still be loaded
185        / and stored atomically.
186        /
187        / rdi - from
188        / rsi - to
189        / rdx - count, treated as ssize_t
190        /
191        .align   16
192_Copy_arrayof_conjoint_jshorts:
193_Copy_conjoint_jshorts_atomic:
194        movq     %rdx,%r8             / word count
195        shrq     $2,%rdx              / qword count
196        cmpq     %rdi,%rsi
197        leaq     -2(%rdi,%r8,2),%rax  / from + wcount*2 - 2
198        jbe      acs_CopyRight
199        cmpq     %rax,%rsi
200        jbe      acs_CopyLeft
201acs_CopyRight:
202        leaq     -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8
203        leaq     -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8
204        negq     %rdx
205        jmp      6f
2061:      movq     8(%rax,%rdx,8),%rsi
207        movq     %rsi,8(%rcx,%rdx,8)
208        addq     $1,%rdx
209        jnz      1b
2102:      testq    $2,%r8               / check for trailing dword
211        jz       3f
212        movl     8(%rax),%esi         / copy trailing dword
213        movl     %esi,8(%rcx)
214        addq     $4,%rcx              / original %rsi is trashed, so we
215                                      /  can't use it as a base register
2163:      testq    $1,%r8               / check for trailing word
217        jz       4f
218        movw     -2(%rdi,%r8,2),%si   / copy trailing word
219        movw     %si,8(%rcx)
2204:      ret
221        .align   16
2225:      movq     -24(%rax,%rdx,8),%rsi
223        movq     %rsi,-24(%rcx,%rdx,8)
224        movq     -16(%rax,%rdx,8),%rsi
225        movq     %rsi,-16(%rcx,%rdx,8)
226        movq     -8(%rax,%rdx,8),%rsi
227        movq     %rsi,-8(%rcx,%rdx,8)
228        movq     (%rax,%rdx,8),%rsi
229        movq     %rsi,(%rcx,%rdx,8)
2306:      addq     $4,%rdx
231        jle      5b
232        subq     $4,%rdx
233        jl       1b
234        jmp      2b
235acs_CopyLeft:
236        testq    $1,%r8               / check for trailing word
237        jz       1f
238        movw     -2(%rdi,%r8,2),%cx   / copy trailing word
239        movw     %cx,-2(%rsi,%r8,2)
2401:      testq    $2,%r8               / check for trailing dword
241        jz       4f
242        movl     (%rdi,%rdx,8),%ecx   / copy trailing dword
243        movl     %ecx,(%rsi,%rdx,8)
244        jmp      4f
2452:      movq     -8(%rdi,%rdx,8),%rcx
246        movq     %rcx,-8(%rsi,%rdx,8)
247        subq     $1,%rdx
248        jnz      2b
249        ret
250        .align   16
2513:      movq     24(%rdi,%rdx,8),%rcx
252        movq     %rcx,24(%rsi,%rdx,8)
253        movq     16(%rdi,%rdx,8),%rcx
254        movq     %rcx,16(%rsi,%rdx,8)
255        movq     8(%rdi,%rdx,8),%rcx
256        movq     %rcx,8(%rsi,%rdx,8)
257        movq     (%rdi,%rdx,8),%rcx
258        movq     %rcx,(%rsi,%rdx,8)
2594:      subq     $4,%rdx
260        jge      3b
261        addq     $4,%rdx
262        jg       2b
263        ret
264
265        / Support for void Copy::arrayof_conjoint_jints(jint* from,
266        /                                               jint* to,
267        /                                               size_t count)
268        / Equivalent to
269        /   conjoint_jints_atomic
270        /
271        / If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
272        / the hardware handle it.  The two dwords within qwords that span
273        / cache line boundaries will still be loaded and stored atomically.
274        /
275        / rdi - from
276        / rsi - to
277        / rdx - count, treated as ssize_t
278        /
279        .align   16
280_Copy_arrayof_conjoint_jints:
281_Copy_conjoint_jints_atomic:
282        movq     %rdx,%r8             / dword count
283        shrq     %rdx                 / qword count
284        cmpq     %rdi,%rsi
285        leaq     -4(%rdi,%r8,4),%rax  / from + dcount*4 - 4
286        jbe      aci_CopyRight
287        cmpq     %rax,%rsi
288        jbe      aci_CopyLeft
289aci_CopyRight:
290        leaq     -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8
291        leaq     -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8
292        negq     %rdx
293        jmp      5f
294        .align   16
2951:      movq     8(%rax,%rdx,8),%rsi
296        movq     %rsi,8(%rcx,%rdx,8)
297        addq     $1,%rdx
298        jnz       1b
2992:      testq    $1,%r8               / check for trailing dword
300        jz       3f
301        movl     8(%rax),%esi         / copy trailing dword
302        movl     %esi,8(%rcx)
3033:      ret
304        .align   16
3054:      movq     -24(%rax,%rdx,8),%rsi
306        movq     %rsi,-24(%rcx,%rdx,8)
307        movq     -16(%rax,%rdx,8),%rsi
308        movq     %rsi,-16(%rcx,%rdx,8)
309        movq     -8(%rax,%rdx,8),%rsi
310        movq     %rsi,-8(%rcx,%rdx,8)
311        movq     (%rax,%rdx,8),%rsi
312        movq     %rsi,(%rcx,%rdx,8)
3135:      addq     $4,%rdx
314        jle      4b
315        subq     $4,%rdx
316        jl       1b
317        jmp      2b
318aci_CopyLeft:
319        testq    $1,%r8               / check for trailing dword
320        jz       3f
321        movl     -4(%rdi,%r8,4),%ecx  / copy trailing dword
322        movl     %ecx,-4(%rsi,%r8,4)
323        jmp      3f
3241:      movq     -8(%rdi,%rdx,8),%rcx
325        movq     %rcx,-8(%rsi,%rdx,8)
326        subq     $1,%rdx
327        jnz      1b
328        ret
329        .align   16
3302:      movq     24(%rdi,%rdx,8),%rcx
331        movq     %rcx,24(%rsi,%rdx,8)
332        movq     16(%rdi,%rdx,8),%rcx
333        movq     %rcx,16(%rsi,%rdx,8)
334        movq     8(%rdi,%rdx,8),%rcx
335        movq     %rcx,8(%rsi,%rdx,8)
336        movq     (%rdi,%rdx,8),%rcx
337        movq     %rcx,(%rsi,%rdx,8)
3383:      subq     $4,%rdx
339        jge      2b
340        addq     $4,%rdx
341        jg       1b
342        ret
343
344        / Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
345        /                                                jlong* to,
346        /                                                size_t count)
347        / Equivalent to
348        /   conjoint_jlongs_atomic
349        /   arrayof_conjoint_oops
350        /   conjoint_oops_atomic
351        /
352        / rdi - from
353        / rsi - to
354        / rdx - count, treated as ssize_t
355        /
356        .align   16
357_Copy_arrayof_conjoint_jlongs:
358_Copy_conjoint_jlongs_atomic:
359        cmpq     %rdi,%rsi
360        leaq     -8(%rdi,%rdx,8),%rax / from + count*8 - 8
361        jbe      acl_CopyRight
362        cmpq     %rax,%rsi
363        jbe      acl_CopyLeft
364acl_CopyRight:
365        leaq     -8(%rsi,%rdx,8),%rcx / to + count*8 - 8
366        negq     %rdx
367        jmp      3f
3681:      movq     8(%rax,%rdx,8),%rsi
369        movq     %rsi,8(%rcx,%rdx,8)
370        addq     $1,%rdx
371        jnz      1b
372        ret
373        .align   16
3742:      movq     -24(%rax,%rdx,8),%rsi
375        movq     %rsi,-24(%rcx,%rdx,8)
376        movq     -16(%rax,%rdx,8),%rsi
377        movq     %rsi,-16(%rcx,%rdx,8)
378        movq     -8(%rax,%rdx,8),%rsi
379        movq     %rsi,-8(%rcx,%rdx,8)
380        movq     (%rax,%rdx,8),%rsi
381        movq     %rsi,(%rcx,%rdx,8)
3823:      addq     $4,%rdx
383        jle      2b
384        subq     $4,%rdx
385        jl       1b
386        ret
3874:      movq     -8(%rdi,%rdx,8),%rcx
388        movq     %rcx,-8(%rsi,%rdx,8)
389        subq     $1,%rdx
390        jnz      4b
391        ret
392        .align   16
3935:      movq     24(%rdi,%rdx,8),%rcx
394        movq     %rcx,24(%rsi,%rdx,8)
395        movq     16(%rdi,%rdx,8),%rcx
396        movq     %rcx,16(%rsi,%rdx,8)
397        movq     8(%rdi,%rdx,8),%rcx
398        movq     %rcx,8(%rsi,%rdx,8)
399        movq     (%rdi,%rdx,8),%rcx
400        movq     %rcx,(%rsi,%rdx,8)
401acl_CopyLeft:
402        subq     $4,%rdx
403        jge      5b
404        addq     $4,%rdx
405        jg       4b
406        ret
407