bsd_x86_64.s revision 4965:980532a806a5
1# 2# Copyright (c) 2004, 2007, Oracle and/or its affiliates. All rights reserved. 3# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4# 5# This code is free software; you can redistribute it and/or modify it 6# under the terms of the GNU General Public License version 2 only, as 7# published by the Free Software Foundation. 8# 9# This code is distributed in the hope that it will be useful, but WITHOUT 10# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12# version 2 for more details (a copy is included in the LICENSE file that 13# accompanied this code). 14# 15# You should have received a copy of the GNU General Public License version 16# 2 along with this work; if not, write to the Free Software Foundation, 17# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18# 19# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20# or visit www.oracle.com if you need additional information or have any 21# questions. 22# 23 24#ifdef __APPLE__ 25# Darwin uses _ prefixed global symbols 26#define SYMBOL(s) _ ## s 27#define ELF_TYPE(name, description) 28#else 29#define SYMBOL(s) s 30#define ELF_TYPE(name, description) .type name,description 31#endif 32 33 # NOTE WELL! The _Copy functions are called directly 34 # from server-compiler-generated code via CallLeafNoFP, 35 # which means that they *must* either not use floating 36 # point or use it in the same manner as does the server 37 # compiler. 38 39 .globl SYMBOL(_Copy_arrayof_conjoint_bytes) 40 .globl SYMBOL(_Copy_arrayof_conjoint_jshorts) 41 .globl SYMBOL(_Copy_conjoint_jshorts_atomic) 42 .globl SYMBOL(_Copy_arrayof_conjoint_jints) 43 .globl SYMBOL(_Copy_conjoint_jints_atomic) 44 .globl SYMBOL(_Copy_arrayof_conjoint_jlongs) 45 .globl SYMBOL(_Copy_conjoint_jlongs_atomic) 46 47 .text 48 49 .globl SYMBOL(SpinPause) 50 .p2align 4,,15 51 ELF_TYPE(SpinPause,@function) 52SYMBOL(SpinPause): 53 rep 54 nop 55 movq $1, %rax 56 ret 57 58 # Support for void Copy::arrayof_conjoint_bytes(void* from, 59 # void* to, 60 # size_t count) 61 # rdi - from 62 # rsi - to 63 # rdx - count, treated as ssize_t 64 # 65 .p2align 4,,15 66 ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function) 67SYMBOL(_Copy_arrayof_conjoint_bytes): 68 movq %rdx,%r8 # byte count 69 shrq $3,%rdx # qword count 70 cmpq %rdi,%rsi 71 leaq -1(%rdi,%r8,1),%rax # from + bcount*1 - 1 72 jbe acb_CopyRight 73 cmpq %rax,%rsi 74 jbe acb_CopyLeft 75acb_CopyRight: 76 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 77 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 78 negq %rdx 79 jmp 7f 80 .p2align 4,,15 811: movq 8(%rax,%rdx,8),%rsi 82 movq %rsi,8(%rcx,%rdx,8) 83 addq $1,%rdx 84 jnz 1b 852: testq $4,%r8 # check for trailing dword 86 jz 3f 87 movl 8(%rax),%esi # copy trailing dword 88 movl %esi,8(%rcx) 89 addq $4,%rax 90 addq $4,%rcx # original %rsi is trashed, so we 91 # can't use it as a base register 923: testq $2,%r8 # check for trailing word 93 jz 4f 94 movw 8(%rax),%si # copy trailing word 95 movw %si,8(%rcx) 96 addq $2,%rcx 974: testq $1,%r8 # check for trailing byte 98 jz 5f 99 movb -1(%rdi,%r8,1),%al # copy trailing byte 100 movb %al,8(%rcx) 1015: ret 102 .p2align 4,,15 1036: movq -24(%rax,%rdx,8),%rsi 104 movq %rsi,-24(%rcx,%rdx,8) 105 movq -16(%rax,%rdx,8),%rsi 106 movq %rsi,-16(%rcx,%rdx,8) 107 movq -8(%rax,%rdx,8),%rsi 108 movq %rsi,-8(%rcx,%rdx,8) 109 movq (%rax,%rdx,8),%rsi 110 movq %rsi,(%rcx,%rdx,8) 1117: addq $4,%rdx 112 jle 6b 113 subq $4,%rdx 114 jl 1b 115 jmp 2b 116acb_CopyLeft: 117 testq $1,%r8 # check for trailing byte 118 jz 1f 119 movb -1(%rdi,%r8,1),%cl # copy trailing byte 120 movb %cl,-1(%rsi,%r8,1) 121 subq $1,%r8 # adjust for possible trailing word 1221: testq $2,%r8 # check for trailing word 123 jz 2f 124 movw -2(%rdi,%r8,1),%cx # copy trailing word 125 movw %cx,-2(%rsi,%r8,1) 1262: testq $4,%r8 # check for trailing dword 127 jz 5f 128 movl (%rdi,%rdx,8),%ecx # copy trailing dword 129 movl %ecx,(%rsi,%rdx,8) 130 jmp 5f 131 .p2align 4,,15 1323: movq -8(%rdi,%rdx,8),%rcx 133 movq %rcx,-8(%rsi,%rdx,8) 134 subq $1,%rdx 135 jnz 3b 136 ret 137 .p2align 4,,15 1384: movq 24(%rdi,%rdx,8),%rcx 139 movq %rcx,24(%rsi,%rdx,8) 140 movq 16(%rdi,%rdx,8),%rcx 141 movq %rcx,16(%rsi,%rdx,8) 142 movq 8(%rdi,%rdx,8),%rcx 143 movq %rcx,8(%rsi,%rdx,8) 144 movq (%rdi,%rdx,8),%rcx 145 movq %rcx,(%rsi,%rdx,8) 1465: subq $4,%rdx 147 jge 4b 148 addq $4,%rdx 149 jg 3b 150 ret 151 152 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 153 # void* to, 154 # size_t count) 155 # Equivalent to 156 # conjoint_jshorts_atomic 157 # 158 # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 159 # let the hardware handle it. The tow or four words within dwords 160 # or qwords that span cache line boundaries will still be loaded 161 # and stored atomically. 162 # 163 # rdi - from 164 # rsi - to 165 # rdx - count, treated as ssize_t 166 # 167 .p2align 4,,15 168 ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function) 169 ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function) 170SYMBOL(_Copy_arrayof_conjoint_jshorts): 171SYMBOL(_Copy_conjoint_jshorts_atomic): 172 movq %rdx,%r8 # word count 173 shrq $2,%rdx # qword count 174 cmpq %rdi,%rsi 175 leaq -2(%rdi,%r8,2),%rax # from + wcount*2 - 2 176 jbe acs_CopyRight 177 cmpq %rax,%rsi 178 jbe acs_CopyLeft 179acs_CopyRight: 180 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 181 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 182 negq %rdx 183 jmp 6f 1841: movq 8(%rax,%rdx,8),%rsi 185 movq %rsi,8(%rcx,%rdx,8) 186 addq $1,%rdx 187 jnz 1b 1882: testq $2,%r8 # check for trailing dword 189 jz 3f 190 movl 8(%rax),%esi # copy trailing dword 191 movl %esi,8(%rcx) 192 addq $4,%rcx # original %rsi is trashed, so we 193 # can't use it as a base register 1943: testq $1,%r8 # check for trailing word 195 jz 4f 196 movw -2(%rdi,%r8,2),%si # copy trailing word 197 movw %si,8(%rcx) 1984: ret 199 .p2align 4,,15 2005: movq -24(%rax,%rdx,8),%rsi 201 movq %rsi,-24(%rcx,%rdx,8) 202 movq -16(%rax,%rdx,8),%rsi 203 movq %rsi,-16(%rcx,%rdx,8) 204 movq -8(%rax,%rdx,8),%rsi 205 movq %rsi,-8(%rcx,%rdx,8) 206 movq (%rax,%rdx,8),%rsi 207 movq %rsi,(%rcx,%rdx,8) 2086: addq $4,%rdx 209 jle 5b 210 subq $4,%rdx 211 jl 1b 212 jmp 2b 213acs_CopyLeft: 214 testq $1,%r8 # check for trailing word 215 jz 1f 216 movw -2(%rdi,%r8,2),%cx # copy trailing word 217 movw %cx,-2(%rsi,%r8,2) 2181: testq $2,%r8 # check for trailing dword 219 jz 4f 220 movl (%rdi,%rdx,8),%ecx # copy trailing dword 221 movl %ecx,(%rsi,%rdx,8) 222 jmp 4f 2232: movq -8(%rdi,%rdx,8),%rcx 224 movq %rcx,-8(%rsi,%rdx,8) 225 subq $1,%rdx 226 jnz 2b 227 ret 228 .p2align 4,,15 2293: movq 24(%rdi,%rdx,8),%rcx 230 movq %rcx,24(%rsi,%rdx,8) 231 movq 16(%rdi,%rdx,8),%rcx 232 movq %rcx,16(%rsi,%rdx,8) 233 movq 8(%rdi,%rdx,8),%rcx 234 movq %rcx,8(%rsi,%rdx,8) 235 movq (%rdi,%rdx,8),%rcx 236 movq %rcx,(%rsi,%rdx,8) 2374: subq $4,%rdx 238 jge 3b 239 addq $4,%rdx 240 jg 2b 241 ret 242 243 # Support for void Copy::arrayof_conjoint_jints(jint* from, 244 # jint* to, 245 # size_t count) 246 # Equivalent to 247 # conjoint_jints_atomic 248 # 249 # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 250 # the hardware handle it. The two dwords within qwords that span 251 # cache line boundaries will still be loaded and stored atomically. 252 # 253 # rdi - from 254 # rsi - to 255 # rdx - count, treated as ssize_t 256 # 257 .p2align 4,,15 258 ELF_TYPE(_Copy_arrayof_conjoint_jints,@function) 259 ELF_TYPE(_Copy_conjoint_jints_atomic,@function) 260SYMBOL(_Copy_arrayof_conjoint_jints): 261SYMBOL(_Copy_conjoint_jints_atomic): 262 movq %rdx,%r8 # dword count 263 shrq %rdx # qword count 264 cmpq %rdi,%rsi 265 leaq -4(%rdi,%r8,4),%rax # from + dcount*4 - 4 266 jbe aci_CopyRight 267 cmpq %rax,%rsi 268 jbe aci_CopyLeft 269aci_CopyRight: 270 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 271 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 272 negq %rdx 273 jmp 5f 274 .p2align 4,,15 2751: movq 8(%rax,%rdx,8),%rsi 276 movq %rsi,8(%rcx,%rdx,8) 277 addq $1,%rdx 278 jnz 1b 2792: testq $1,%r8 # check for trailing dword 280 jz 3f 281 movl 8(%rax),%esi # copy trailing dword 282 movl %esi,8(%rcx) 2833: ret 284 .p2align 4,,15 2854: movq -24(%rax,%rdx,8),%rsi 286 movq %rsi,-24(%rcx,%rdx,8) 287 movq -16(%rax,%rdx,8),%rsi 288 movq %rsi,-16(%rcx,%rdx,8) 289 movq -8(%rax,%rdx,8),%rsi 290 movq %rsi,-8(%rcx,%rdx,8) 291 movq (%rax,%rdx,8),%rsi 292 movq %rsi,(%rcx,%rdx,8) 2935: addq $4,%rdx 294 jle 4b 295 subq $4,%rdx 296 jl 1b 297 jmp 2b 298aci_CopyLeft: 299 testq $1,%r8 # check for trailing dword 300 jz 3f 301 movl -4(%rdi,%r8,4),%ecx # copy trailing dword 302 movl %ecx,-4(%rsi,%r8,4) 303 jmp 3f 3041: movq -8(%rdi,%rdx,8),%rcx 305 movq %rcx,-8(%rsi,%rdx,8) 306 subq $1,%rdx 307 jnz 1b 308 ret 309 .p2align 4,,15 3102: movq 24(%rdi,%rdx,8),%rcx 311 movq %rcx,24(%rsi,%rdx,8) 312 movq 16(%rdi,%rdx,8),%rcx 313 movq %rcx,16(%rsi,%rdx,8) 314 movq 8(%rdi,%rdx,8),%rcx 315 movq %rcx,8(%rsi,%rdx,8) 316 movq (%rdi,%rdx,8),%rcx 317 movq %rcx,(%rsi,%rdx,8) 3183: subq $4,%rdx 319 jge 2b 320 addq $4,%rdx 321 jg 1b 322 ret 323 324 # Support for void Copy::arrayof_conjoint_jlongs(jlong* from, 325 # jlong* to, 326 # size_t count) 327 # Equivalent to 328 # conjoint_jlongs_atomic 329 # arrayof_conjoint_oops 330 # conjoint_oops_atomic 331 # 332 # rdi - from 333 # rsi - to 334 # rdx - count, treated as ssize_t 335 # 336 .p2align 4,,15 337 ELF_TYPE(_Copy_arrayof_conjoint_jlongs,@function) 338 ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function) 339SYMBOL(_Copy_arrayof_conjoint_jlongs): 340SYMBOL(_Copy_conjoint_jlongs_atomic): 341 cmpq %rdi,%rsi 342 leaq -8(%rdi,%rdx,8),%rax # from + count*8 - 8 343 jbe acl_CopyRight 344 cmpq %rax,%rsi 345 jbe acl_CopyLeft 346acl_CopyRight: 347 leaq -8(%rsi,%rdx,8),%rcx # to + count*8 - 8 348 negq %rdx 349 jmp 3f 3501: movq 8(%rax,%rdx,8),%rsi 351 movq %rsi,8(%rcx,%rdx,8) 352 addq $1,%rdx 353 jnz 1b 354 ret 355 .p2align 4,,15 3562: movq -24(%rax,%rdx,8),%rsi 357 movq %rsi,-24(%rcx,%rdx,8) 358 movq -16(%rax,%rdx,8),%rsi 359 movq %rsi,-16(%rcx,%rdx,8) 360 movq -8(%rax,%rdx,8),%rsi 361 movq %rsi,-8(%rcx,%rdx,8) 362 movq (%rax,%rdx,8),%rsi 363 movq %rsi,(%rcx,%rdx,8) 3643: addq $4,%rdx 365 jle 2b 366 subq $4,%rdx 367 jl 1b 368 ret 3694: movq -8(%rdi,%rdx,8),%rcx 370 movq %rcx,-8(%rsi,%rdx,8) 371 subq $1,%rdx 372 jnz 4b 373 ret 374 .p2align 4,,15 3755: movq 24(%rdi,%rdx,8),%rcx 376 movq %rcx,24(%rsi,%rdx,8) 377 movq 16(%rdi,%rdx,8),%rcx 378 movq %rcx,16(%rsi,%rdx,8) 379 movq 8(%rdi,%rdx,8),%rcx 380 movq %rcx,8(%rsi,%rdx,8) 381 movq (%rdi,%rdx,8),%rcx 382 movq %rcx,(%rsi,%rdx,8) 383acl_CopyLeft: 384 subq $4,%rdx 385 jge 5b 386 addq $4,%rdx 387 jg 4b 388 ret 389