copy_linux_x86.inline.hpp revision 1472:c18cbe5936b8
1/*
2 * Copyright (c) 2003, 2004, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
26#ifdef AMD64
27  (void)memmove(to, from, count * HeapWordSize);
28#else
29  // Same as pd_aligned_conjoint_words, except includes a zero-count check.
30  intx temp;
31  __asm__ volatile("        testl   %6,%6         ;"
32                   "        jz      7f            ;"
33                   "        cmpl    %4,%5         ;"
34                   "        leal    -4(%4,%6,4),%3;"
35                   "        jbe     1f            ;"
36                   "        cmpl    %7,%5         ;"
37                   "        jbe     4f            ;"
38                   "1:      cmpl    $32,%6        ;"
39                   "        ja      3f            ;"
40                   "        subl    %4,%1         ;"
41                   "2:      movl    (%4),%3       ;"
42                   "        movl    %7,(%5,%4,1)  ;"
43                   "        addl    $4,%0         ;"
44                   "        subl    $1,%2          ;"
45                   "        jnz     2b            ;"
46                   "        jmp     7f            ;"
47                   "3:      rep;    smovl         ;"
48                   "        jmp     7f            ;"
49                   "4:      cmpl    $32,%2        ;"
50                   "        movl    %7,%0         ;"
51                   "        leal    -4(%5,%6,4),%1;"
52                   "        ja      6f            ;"
53                   "        subl    %4,%1         ;"
54                   "5:      movl    (%4),%3       ;"
55                   "        movl    %7,(%5,%4,1)  ;"
56                   "        subl    $4,%0         ;"
57                   "        subl    $1,%2          ;"
58                   "        jnz     5b            ;"
59                   "        jmp     7f            ;"
60                   "6:      std                   ;"
61                   "        rep;    smovl         ;"
62                   "        cld                   ;"
63                   "7:      nop                    "
64                   : "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
65                   : "0"  (from), "1"  (to), "2"  (count), "3"  (temp)
66                   : "memory", "flags");
67#endif // AMD64
68}
69
70static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
71#ifdef AMD64
72  switch (count) {
73  case 8:  to[7] = from[7];
74  case 7:  to[6] = from[6];
75  case 6:  to[5] = from[5];
76  case 5:  to[4] = from[4];
77  case 4:  to[3] = from[3];
78  case 3:  to[2] = from[2];
79  case 2:  to[1] = from[1];
80  case 1:  to[0] = from[0];
81  case 0:  break;
82  default:
83    (void)memcpy(to, from, count * HeapWordSize);
84    break;
85  }
86#else
87  // Same as pd_aligned_disjoint_words, except includes a zero-count check.
88  intx temp;
89  __asm__ volatile("        testl   %6,%6       ;"
90                   "        jz      3f          ;"
91                   "        cmpl    $32,%6      ;"
92                   "        ja      2f          ;"
93                   "        subl    %4,%1       ;"
94                   "1:      movl    (%4),%3     ;"
95                   "        movl    %7,(%5,%4,1);"
96                   "        addl    $4,%0       ;"
97                   "        subl    $1,%2        ;"
98                   "        jnz     1b          ;"
99                   "        jmp     3f          ;"
100                   "2:      rep;    smovl       ;"
101                   "3:      nop                  "
102                   : "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
103                   : "0"  (from), "1"  (to), "2"  (count), "3"  (temp)
104                   : "memory", "cc");
105#endif // AMD64
106}
107
108static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) {
109#ifdef AMD64
110  switch (count) {
111  case 8:  to[7] = from[7];
112  case 7:  to[6] = from[6];
113  case 6:  to[5] = from[5];
114  case 5:  to[4] = from[4];
115  case 4:  to[3] = from[3];
116  case 3:  to[2] = from[2];
117  case 2:  to[1] = from[1];
118  case 1:  to[0] = from[0];
119  case 0:  break;
120  default:
121    while (count-- > 0) {
122      *to++ = *from++;
123    }
124    break;
125  }
126#else
127  // pd_disjoint_words is word-atomic in this implementation.
128  pd_disjoint_words(from, to, count);
129#endif // AMD64
130}
131
132static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
133#ifdef AMD64
134  (void)memmove(to, from, count * HeapWordSize);
135#else
136  // Same as pd_conjoint_words, except no zero-count check.
137  intx temp;
138  __asm__ volatile("        cmpl    %4,%5         ;"
139                   "        leal    -4(%4,%6,4),%3;"
140                   "        jbe     1f            ;"
141                   "        cmpl    %7,%5         ;"
142                   "        jbe     4f            ;"
143                   "1:      cmpl    $32,%6        ;"
144                   "        ja      3f            ;"
145                   "        subl    %4,%1         ;"
146                   "2:      movl    (%4),%3       ;"
147                   "        movl    %7,(%5,%4,1)  ;"
148                   "        addl    $4,%0         ;"
149                   "        subl    $1,%2          ;"
150                   "        jnz     2b            ;"
151                   "        jmp     7f            ;"
152                   "3:      rep;    smovl         ;"
153                   "        jmp     7f            ;"
154                   "4:      cmpl    $32,%2        ;"
155                   "        movl    %7,%0         ;"
156                   "        leal    -4(%5,%6,4),%1;"
157                   "        ja      6f            ;"
158                   "        subl    %4,%1         ;"
159                   "5:      movl    (%4),%3       ;"
160                   "        movl    %7,(%5,%4,1)  ;"
161                   "        subl    $4,%0         ;"
162                   "        subl    $1,%2          ;"
163                   "        jnz     5b            ;"
164                   "        jmp     7f            ;"
165                   "6:      std                   ;"
166                   "        rep;    smovl         ;"
167                   "        cld                   ;"
168                   "7:      nop                    "
169                   : "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
170                   : "0"  (from), "1"  (to), "2"  (count), "3"  (temp)
171                   : "memory", "flags");
172#endif // AMD64
173}
174
175static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
176#ifdef AMD64
177  pd_disjoint_words(from, to, count);
178#else
179  // Same as pd_disjoint_words, except no zero-count check.
180  intx temp;
181  __asm__ volatile("        cmpl    $32,%6      ;"
182                   "        ja      2f          ;"
183                   "        subl    %4,%1       ;"
184                   "1:      movl    (%4),%3     ;"
185                   "        movl    %7,(%5,%4,1);"
186                   "        addl    $4,%0       ;"
187                   "        subl    $1,%2        ;"
188                   "        jnz     1b          ;"
189                   "        jmp     3f          ;"
190                   "2:      rep;    smovl       ;"
191                   "3:      nop                  "
192                   : "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
193                   : "0"  (from), "1"  (to), "2"  (count), "3"  (temp)
194                   : "memory", "cc");
195#endif // AMD64
196}
197
198static void pd_conjoint_bytes(void* from, void* to, size_t count) {
199#ifdef AMD64
200  (void)memmove(to, from, count);
201#else
202  intx temp;
203  __asm__ volatile("        testl   %6,%6          ;"
204                   "        jz      13f            ;"
205                   "        cmpl    %4,%5          ;"
206                   "        leal    -1(%4,%6),%3   ;"
207                   "        jbe     1f             ;"
208                   "        cmpl    %7,%5          ;"
209                   "        jbe     8f             ;"
210                   "1:      cmpl    $3,%6          ;"
211                   "        jbe     6f             ;"
212                   "        movl    %6,%3          ;"
213                   "        movl    $4,%2          ;"
214                   "        subl    %4,%2          ;"
215                   "        andl    $3,%2          ;"
216                   "        jz      2f             ;"
217                   "        subl    %6,%3          ;"
218                   "        rep;    smovb          ;"
219                   "2:      movl    %7,%2          ;"
220                   "        shrl    $2,%2          ;"
221                   "        jz      5f             ;"
222                   "        cmpl    $32,%2         ;"
223                   "        ja      4f             ;"
224                   "        subl    %4,%1          ;"
225                   "3:      movl    (%4),%%edx     ;"
226                   "        movl    %%edx,(%5,%4,1);"
227                   "        addl    $4,%0          ;"
228                   "        subl    $1,%2           ;"
229                   "        jnz     3b             ;"
230                   "        addl    %4,%1          ;"
231                   "        jmp     5f             ;"
232                   "4:      rep;    smovl          ;"
233                   "5:      movl    %7,%2          ;"
234                   "        andl    $3,%2          ;"
235                   "        jz      13f            ;"
236                   "6:      xorl    %7,%3          ;"
237                   "7:      movb    (%4,%7,1),%%dl ;"
238                   "        movb    %%dl,(%5,%7,1) ;"
239                   "        addl    $1,%3          ;"
240                   "        subl    $1,%2           ;"
241                   "        jnz     7b             ;"
242                   "        jmp     13f            ;"
243                   "8:      std                    ;"
244                   "        cmpl    $12,%2         ;"
245                   "        ja      9f             ;"
246                   "        movl    %7,%0          ;"
247                   "        leal    -1(%6,%5),%1   ;"
248                   "        jmp     11f            ;"
249                   "9:      xchgl   %3,%2          ;"
250                   "        movl    %6,%0          ;"
251                   "        addl    $1,%2          ;"
252                   "        leal    -1(%7,%5),%1   ;"
253                   "        andl    $3,%2          ;"
254                   "        jz      10f            ;"
255                   "        subl    %6,%3          ;"
256                   "        rep;    smovb          ;"
257                   "10:     movl    %7,%2          ;"
258                   "        subl    $3,%0          ;"
259                   "        shrl    $2,%2          ;"
260                   "        subl    $3,%1          ;"
261                   "        rep;    smovl          ;"
262                   "        andl    $3,%3          ;"
263                   "        jz      12f            ;"
264                   "        movl    %7,%2          ;"
265                   "        addl    $3,%0          ;"
266                   "        addl    $3,%1          ;"
267                   "11:     rep;    smovb          ;"
268                   "12:     cld                    ;"
269                   "13:     nop                    ;"
270                   : "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
271                   : "0"  (from), "1"  (to), "2"  (count), "3"  (temp)
272                   : "memory", "flags", "%edx");
273#endif // AMD64
274}
275
276static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) {
277  pd_conjoint_bytes(from, to, count);
278}
279
280static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
281  _Copy_conjoint_jshorts_atomic(from, to, count);
282}
283
284static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
285#ifdef AMD64
286  _Copy_conjoint_jints_atomic(from, to, count);
287#else
288  assert(HeapWordSize == BytesPerInt, "heapwords and jints must be the same size");
289  // pd_conjoint_words is word-atomic in this implementation.
290  pd_conjoint_words((HeapWord*)from, (HeapWord*)to, count);
291#endif // AMD64
292}
293
294static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) {
295#ifdef AMD64
296  _Copy_conjoint_jlongs_atomic(from, to, count);
297#else
298  // Guarantee use of fild/fistp or xmm regs via some asm code, because compilers won't.
299  if (from > to) {
300    while (count-- > 0) {
301      __asm__ volatile("fildll (%0); fistpll (%1)"
302                       :
303                       : "r" (from), "r" (to)
304                       : "memory" );
305      ++from;
306      ++to;
307    }
308  } else {
309    while (count-- > 0) {
310      __asm__ volatile("fildll (%0,%2,8); fistpll (%1,%2,8)"
311                       :
312                       : "r" (from), "r" (to), "r" (count)
313                       : "memory" );
314    }
315  }
316#endif // AMD64
317}
318
319static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
320#ifdef AMD64
321  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
322  _Copy_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
323#else
324  assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
325  // pd_conjoint_words is word-atomic in this implementation.
326  pd_conjoint_words((HeapWord*)from, (HeapWord*)to, count);
327#endif // AMD64
328}
329
330static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
331  _Copy_arrayof_conjoint_bytes(from, to, count);
332}
333
334static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
335  _Copy_arrayof_conjoint_jshorts(from, to, count);
336}
337
338static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
339#ifdef AMD64
340   _Copy_arrayof_conjoint_jints(from, to, count);
341#else
342  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
343#endif // AMD64
344}
345
346static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) {
347#ifdef AMD64
348  _Copy_arrayof_conjoint_jlongs(from, to, count);
349#else
350  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
351#endif // AMD64
352}
353
354static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) {
355#ifdef AMD64
356  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
357  _Copy_arrayof_conjoint_jlongs(from, to, count);
358#else
359  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
360#endif // AMD64
361}
362