1/*
2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2012, 2014 SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26#ifndef OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP
27#define OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP
28
29#ifndef PPC64
30#error "Atomic currently only implemented for PPC64"
31#endif
32
33// Implementation of class atomic
34
35inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
36inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
37inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
38inline void Atomic::store    (jlong    store_value, jlong*    dest) { *dest = store_value; }
39inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
40inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
41
42inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
43inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
44inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
45inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
46inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
47inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }
48
49inline jlong Atomic::load(const volatile jlong* src) { return *src; }
50
51//
52// machine barrier instructions:
53//
54// - sync            two-way memory barrier, aka fence
55// - lwsync          orders  Store|Store,
56//                            Load|Store,
57//                            Load|Load,
58//                   but not Store|Load
59// - eieio           orders memory accesses for device memory (only)
60// - isync           invalidates speculatively executed instructions
61//                   From the POWER ISA 2.06 documentation:
62//                    "[...] an isync instruction prevents the execution of
63//                   instructions following the isync until instructions
64//                   preceding the isync have completed, [...]"
65//                   From IBM's AIX assembler reference:
66//                    "The isync [...] instructions causes the processor to
67//                   refetch any instructions that might have been fetched
68//                   prior to the isync instruction. The instruction isync
69//                   causes the processor to wait for all previous instructions
70//                   to complete. Then any instructions already fetched are
71//                   discarded and instruction processing continues in the
72//                   environment established by the previous instructions."
73//
74// semantic barrier instructions:
75// (as defined in orderAccess.hpp)
76//
77// - release         orders Store|Store,       (maps to lwsync)
78//                           Load|Store
79// - acquire         orders  Load|Store,       (maps to lwsync)
80//                           Load|Load
81// - fence           orders Store|Store,       (maps to sync)
82//                           Load|Store,
83//                           Load|Load,
84//                          Store|Load
85//
86
87#define strasm_sync                       "\n  sync    \n"
88#define strasm_lwsync                     "\n  lwsync  \n"
89#define strasm_isync                      "\n  isync   \n"
90#define strasm_release                    strasm_lwsync
91#define strasm_acquire                    strasm_lwsync
92#define strasm_fence                      strasm_sync
93#define strasm_nobarrier                  ""
94#define strasm_nobarrier_clobber_memory   ""
95
96template<size_t byte_size>
97struct Atomic::PlatformAdd
98  : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
99{
100  template<typename I, typename D>
101  D add_and_fetch(I add_value, D volatile* dest) const;
102};
103
104template<>
105template<typename I, typename D>
106inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) const {
107  STATIC_ASSERT(4 == sizeof(I));
108  STATIC_ASSERT(4 == sizeof(D));
109
110  D result;
111
112  __asm__ __volatile__ (
113    strasm_lwsync
114    "1: lwarx   %0,  0, %2    \n"
115    "   add     %0, %0, %1    \n"
116    "   stwcx.  %0,  0, %2    \n"
117    "   bne-    1b            \n"
118    strasm_isync
119    : /*%0*/"=&r" (result)
120    : /*%1*/"r" (add_value), /*%2*/"r" (dest)
121    : "cc", "memory" );
122
123  return result;
124}
125
126
127template<>
128template<typename I, typename D>
129inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) const {
130  STATIC_ASSERT(8 == sizeof(I));
131  STATIC_ASSERT(8 == sizeof(D));
132
133  D result;
134
135  __asm__ __volatile__ (
136    strasm_lwsync
137    "1: ldarx   %0,  0, %2    \n"
138    "   add     %0, %0, %1    \n"
139    "   stdcx.  %0,  0, %2    \n"
140    "   bne-    1b            \n"
141    strasm_isync
142    : /*%0*/"=&r" (result)
143    : /*%1*/"r" (add_value), /*%2*/"r" (dest)
144    : "cc", "memory" );
145
146  return result;
147}
148
149
150inline void Atomic::inc    (volatile jint*     dest) {
151
152  unsigned int temp;
153
154  __asm__ __volatile__ (
155    strasm_nobarrier
156    "1: lwarx   %0,  0, %2    \n"
157    "   addic   %0, %0,  1    \n"
158    "   stwcx.  %0,  0, %2    \n"
159    "   bne-    1b            \n"
160    strasm_nobarrier
161    : /*%0*/"=&r" (temp), "=m" (*dest)
162    : /*%2*/"r" (dest), "m" (*dest)
163    : "cc" strasm_nobarrier_clobber_memory);
164
165}
166
167inline void Atomic::inc_ptr(volatile intptr_t* dest) {
168
169  long temp;
170
171  __asm__ __volatile__ (
172    strasm_nobarrier
173    "1: ldarx   %0,  0, %2    \n"
174    "   addic   %0, %0,  1    \n"
175    "   stdcx.  %0,  0, %2    \n"
176    "   bne-    1b            \n"
177    strasm_nobarrier
178    : /*%0*/"=&r" (temp), "=m" (*dest)
179    : /*%2*/"r" (dest), "m" (*dest)
180    : "cc" strasm_nobarrier_clobber_memory);
181
182}
183
184inline void Atomic::inc_ptr(volatile void*     dest) {
185  inc_ptr((volatile intptr_t*)dest);
186}
187
188
189inline void Atomic::dec    (volatile jint*     dest) {
190
191  unsigned int temp;
192
193  __asm__ __volatile__ (
194    strasm_nobarrier
195    "1: lwarx   %0,  0, %2    \n"
196    "   addic   %0, %0, -1    \n"
197    "   stwcx.  %0,  0, %2    \n"
198    "   bne-    1b            \n"
199    strasm_nobarrier
200    : /*%0*/"=&r" (temp), "=m" (*dest)
201    : /*%2*/"r" (dest), "m" (*dest)
202    : "cc" strasm_nobarrier_clobber_memory);
203
204}
205
206inline void Atomic::dec_ptr(volatile intptr_t* dest) {
207
208  long temp;
209
210  __asm__ __volatile__ (
211    strasm_nobarrier
212    "1: ldarx   %0,  0, %2    \n"
213    "   addic   %0, %0, -1    \n"
214    "   stdcx.  %0,  0, %2    \n"
215    "   bne-    1b            \n"
216    strasm_nobarrier
217    : /*%0*/"=&r" (temp), "=m" (*dest)
218    : /*%2*/"r" (dest), "m" (*dest)
219    : "cc" strasm_nobarrier_clobber_memory);
220
221}
222
223inline void Atomic::dec_ptr(volatile void*     dest) {
224  dec_ptr((volatile intptr_t*)dest);
225}
226
227inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
228
229  // Note that xchg_ptr doesn't necessarily do an acquire
230  // (see synchronizer.cpp).
231
232  unsigned int old_value;
233  const uint64_t zero = 0;
234
235  __asm__ __volatile__ (
236    /* lwsync */
237    strasm_lwsync
238    /* atomic loop */
239    "1:                                                 \n"
240    "   lwarx   %[old_value], %[dest], %[zero]          \n"
241    "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
242    "   bne-    1b                                      \n"
243    /* isync */
244    strasm_sync
245    /* exit */
246    "2:                                                 \n"
247    /* out */
248    : [old_value]       "=&r"   (old_value),
249                        "=m"    (*dest)
250    /* in */
251    : [dest]            "b"     (dest),
252      [zero]            "r"     (zero),
253      [exchange_value]  "r"     (exchange_value),
254                        "m"     (*dest)
255    /* clobber */
256    : "cc",
257      "memory"
258    );
259
260  return (jint) old_value;
261}
262
263inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
264
265  // Note that xchg_ptr doesn't necessarily do an acquire
266  // (see synchronizer.cpp).
267
268  long old_value;
269  const uint64_t zero = 0;
270
271  __asm__ __volatile__ (
272    /* lwsync */
273    strasm_lwsync
274    /* atomic loop */
275    "1:                                                 \n"
276    "   ldarx   %[old_value], %[dest], %[zero]          \n"
277    "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
278    "   bne-    1b                                      \n"
279    /* isync */
280    strasm_sync
281    /* exit */
282    "2:                                                 \n"
283    /* out */
284    : [old_value]       "=&r"   (old_value),
285                        "=m"    (*dest)
286    /* in */
287    : [dest]            "b"     (dest),
288      [zero]            "r"     (zero),
289      [exchange_value]  "r"     (exchange_value),
290                        "m"     (*dest)
291    /* clobber */
292    : "cc",
293      "memory"
294    );
295
296  return (intptr_t) old_value;
297}
298
299inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
300  return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
301}
302
303inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {
304  if (order != memory_order_relaxed) {
305    __asm__ __volatile__ (
306      /* fence */
307      strasm_sync
308      );
309  }
310}
311
312inline void cmpxchg_post_membar(cmpxchg_memory_order order) {
313  if (order != memory_order_relaxed) {
314    __asm__ __volatile__ (
315      /* fence */
316      strasm_sync
317      );
318  }
319}
320
321template<>
322template<typename T>
323inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
324                                                T volatile* dest,
325                                                T compare_value,
326                                                cmpxchg_memory_order order) const {
327  STATIC_ASSERT(1 == sizeof(T));
328
329  // Note that cmpxchg guarantees a two-way memory barrier across
330  // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
331  // specified otherwise (see atomic.hpp).
332
333  // Using 32 bit internally.
334  volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
335
336#ifdef VM_LITTLE_ENDIAN
337  const unsigned int shift_amount        = ((uintptr_t)dest & 3) * 8;
338#else
339  const unsigned int shift_amount        = ((~(uintptr_t)dest) & 3) * 8;
340#endif
341  const unsigned int masked_compare_val  = ((unsigned int)(unsigned char)compare_value),
342                     masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
343                     xor_value           = (masked_compare_val ^ masked_exchange_val) << shift_amount;
344
345  unsigned int old_value, value32;
346
347  cmpxchg_pre_membar(order);
348
349  __asm__ __volatile__ (
350    /* simple guard */
351    "   lbz     %[old_value], 0(%[dest])                  \n"
352    "   cmpw    %[masked_compare_val], %[old_value]       \n"
353    "   bne-    2f                                        \n"
354    /* atomic loop */
355    "1:                                                   \n"
356    "   lwarx   %[value32], 0, %[dest_base]               \n"
357    /* extract byte and compare */
358    "   srd     %[old_value], %[value32], %[shift_amount] \n"
359    "   clrldi  %[old_value], %[old_value], 56            \n"
360    "   cmpw    %[masked_compare_val], %[old_value]       \n"
361    "   bne-    2f                                        \n"
362    /* replace byte and try to store */
363    "   xor     %[value32], %[xor_value], %[value32]      \n"
364    "   stwcx.  %[value32], 0, %[dest_base]               \n"
365    "   bne-    1b                                        \n"
366    /* exit */
367    "2:                                                   \n"
368    /* out */
369    : [old_value]           "=&r"   (old_value),
370      [value32]             "=&r"   (value32),
371                            "=m"    (*dest),
372                            "=m"    (*dest_base)
373    /* in */
374    : [dest]                "b"     (dest),
375      [dest_base]           "b"     (dest_base),
376      [shift_amount]        "r"     (shift_amount),
377      [masked_compare_val]  "r"     (masked_compare_val),
378      [xor_value]           "r"     (xor_value),
379                            "m"     (*dest),
380                            "m"     (*dest_base)
381    /* clobber */
382    : "cc",
383      "memory"
384    );
385
386  cmpxchg_post_membar(order);
387
388  return PrimitiveConversions::cast<T>((unsigned char)old_value);
389}
390
391template<>
392template<typename T>
393inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
394                                                T volatile* dest,
395                                                T compare_value,
396                                                cmpxchg_memory_order order) const {
397  STATIC_ASSERT(4 == sizeof(T));
398
399  // Note that cmpxchg guarantees a two-way memory barrier across
400  // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
401  // specified otherwise (see atomic.hpp).
402
403  T old_value;
404  const uint64_t zero = 0;
405
406  cmpxchg_pre_membar(order);
407
408  __asm__ __volatile__ (
409    /* simple guard */
410    "   lwz     %[old_value], 0(%[dest])                \n"
411    "   cmpw    %[compare_value], %[old_value]          \n"
412    "   bne-    2f                                      \n"
413    /* atomic loop */
414    "1:                                                 \n"
415    "   lwarx   %[old_value], %[dest], %[zero]          \n"
416    "   cmpw    %[compare_value], %[old_value]          \n"
417    "   bne-    2f                                      \n"
418    "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
419    "   bne-    1b                                      \n"
420    /* exit */
421    "2:                                                 \n"
422    /* out */
423    : [old_value]       "=&r"   (old_value),
424                        "=m"    (*dest)
425    /* in */
426    : [dest]            "b"     (dest),
427      [zero]            "r"     (zero),
428      [compare_value]   "r"     (compare_value),
429      [exchange_value]  "r"     (exchange_value),
430                        "m"     (*dest)
431    /* clobber */
432    : "cc",
433      "memory"
434    );
435
436  cmpxchg_post_membar(order);
437
438  return old_value;
439}
440
441template<>
442template<typename T>
443inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
444                                                T volatile* dest,
445                                                T compare_value,
446                                                cmpxchg_memory_order order) const {
447  STATIC_ASSERT(8 == sizeof(T));
448
449  // Note that cmpxchg guarantees a two-way memory barrier across
450  // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
451  // specified otherwise (see atomic.hpp).
452
453  T old_value;
454  const uint64_t zero = 0;
455
456  cmpxchg_pre_membar(order);
457
458  __asm__ __volatile__ (
459    /* simple guard */
460    "   ld      %[old_value], 0(%[dest])                \n"
461    "   cmpd    %[compare_value], %[old_value]          \n"
462    "   bne-    2f                                      \n"
463    /* atomic loop */
464    "1:                                                 \n"
465    "   ldarx   %[old_value], %[dest], %[zero]          \n"
466    "   cmpd    %[compare_value], %[old_value]          \n"
467    "   bne-    2f                                      \n"
468    "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
469    "   bne-    1b                                      \n"
470    /* exit */
471    "2:                                                 \n"
472    /* out */
473    : [old_value]       "=&r"   (old_value),
474                        "=m"    (*dest)
475    /* in */
476    : [dest]            "b"     (dest),
477      [zero]            "r"     (zero),
478      [compare_value]   "r"     (compare_value),
479      [exchange_value]  "r"     (exchange_value),
480                        "m"     (*dest)
481    /* clobber */
482    : "cc",
483      "memory"
484    );
485
486  cmpxchg_post_membar(order);
487
488  return old_value;
489}
490
491#undef strasm_sync
492#undef strasm_lwsync
493#undef strasm_isync
494#undef strasm_release
495#undef strasm_acquire
496#undef strasm_fence
497#undef strasm_nobarrier
498#undef strasm_nobarrier_clobber_memory
499
500#endif // OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP
501