1/* 2 * PowerPC64 atomic bit operations. 3 * Dave Engebretsen, Todd Inglett, Don Reed, Pat McCarthy, Peter Bergner, 4 * Anton Blanchard 5 * 6 * Originally taken from the 32b PPC code. Modified to use 64b values for 7 * the various counters & memory references. 8 * 9 * Bitops are odd when viewed on big-endian systems. They were designed 10 * on little endian so the size of the bitset doesn't matter (low order bytes 11 * come first) as long as the bit in question is valid. 12 * 13 * Bits are "tested" often using the C expression (val & (1<<nr)) so we do 14 * our best to stay compatible with that. The assumption is that val will 15 * be unsigned long for such tests. As such, we assume the bits are stored 16 * as an array of unsigned long (the usual case is a single unsigned long, 17 * of course). Here's an example bitset with bit numbering: 18 * 19 * |63..........0|127........64|195.......128|255.......196| 20 * 21 * This leads to a problem. If an int, short or char is passed as a bitset 22 * it will be a bad memory reference since we want to store in chunks 23 * of unsigned long (64 bits here) size. 24 * 25 * This program is free software; you can redistribute it and/or 26 * modify it under the terms of the GNU General Public License 27 * as published by the Free Software Foundation; either version 28 * 2 of the License, or (at your option) any later version. 29 */ 30 31#ifndef _PPC64_BITOPS_H 32#define _PPC64_BITOPS_H 33 34#ifdef __KERNEL__ 35 36#include <asm/byteorder.h> 37#include <asm/memory.h> 38 39/* 40 * clear_bit doesn't imply a memory barrier 41 */ 42#define smp_mb__before_clear_bit() smp_mb() 43#define smp_mb__after_clear_bit() smp_mb() 44 45static __inline__ int test_bit(unsigned long nr, __const__ volatile void *addr) 46{ 47 return (1UL & (((__const__ long *) addr)[nr >> 6] >> (nr & 63))); 48} 49 50static __inline__ void set_bit(unsigned long nr, volatile void *addr) 51{ 52 unsigned long old; 53 unsigned long mask = 1UL << (nr & 0x3f); 54 unsigned long *p = ((unsigned long *)addr) + (nr >> 6); 55 56 __asm__ __volatile__( 57"1: ldarx %0,0,%3 # set_bit\n\ 58 or %0,%0,%2\n\ 59 stdcx. %0,0,%3\n\ 60 bne- 1b" 61 : "=&r" (old), "=m" (*p) 62 : "r" (mask), "r" (p), "m" (*p) 63 : "cc"); 64} 65 66static __inline__ void clear_bit(unsigned long nr, volatile void *addr) 67{ 68 unsigned long old; 69 unsigned long mask = 1UL << (nr & 0x3f); 70 unsigned long *p = ((unsigned long *)addr) + (nr >> 6); 71 72 __asm__ __volatile__( 73"1: ldarx %0,0,%3 # clear_bit\n\ 74 andc %0,%0,%2\n\ 75 stdcx. %0,0,%3\n\ 76 bne- 1b" 77 : "=&r" (old), "=m" (*p) 78 : "r" (mask), "r" (p), "m" (*p) 79 : "cc"); 80} 81 82static __inline__ void change_bit(unsigned long nr, volatile void *addr) 83{ 84 unsigned long old; 85 unsigned long mask = 1UL << (nr & 0x3f); 86 unsigned long *p = ((unsigned long *)addr) + (nr >> 6); 87 88 __asm__ __volatile__( 89"1: ldarx %0,0,%3 # change_bit\n\ 90 xor %0,%0,%2\n\ 91 stdcx. %0,0,%3\n\ 92 bne- 1b" 93 : "=&r" (old), "=m" (*p) 94 : "r" (mask), "r" (p), "m" (*p) 95 : "cc"); 96} 97 98static __inline__ int test_and_set_bit(unsigned long nr, volatile void *addr) 99{ 100 unsigned long old, t; 101 unsigned long mask = 1UL << (nr & 0x3f); 102 unsigned long *p = ((unsigned long *)addr) + (nr >> 6); 103 104 __asm__ __volatile__( 105 EIEIO_ON_SMP 106"1: ldarx %0,0,%3 # test_and_set_bit\n\ 107 or %1,%0,%2 \n\ 108 stdcx. %1,0,%3 \n\ 109 bne- 1b" 110 ISYNC_ON_SMP 111 : "=&r" (old), "=&r" (t) 112 : "r" (mask), "r" (p) 113 : "cc", "memory"); 114 115 return (old & mask) != 0; 116} 117 118static __inline__ int test_and_clear_bit(unsigned long nr, volatile void *addr) 119{ 120 unsigned long old, t; 121 unsigned long mask = 1UL << (nr & 0x3f); 122 unsigned long *p = ((unsigned long *)addr) + (nr >> 6); 123 124 __asm__ __volatile__( 125 EIEIO_ON_SMP 126"1: ldarx %0,0,%3 # test_and_clear_bit\n\ 127 andc %1,%0,%2\n\ 128 stdcx. %1,0,%3\n\ 129 bne- 1b" 130 ISYNC_ON_SMP 131 : "=&r" (old), "=&r" (t) 132 : "r" (mask), "r" (p) 133 : "cc", "memory"); 134 135 return (old & mask) != 0; 136} 137 138static __inline__ int test_and_change_bit(unsigned long nr, volatile void *addr) 139{ 140 unsigned long old, t; 141 unsigned long mask = 1UL << (nr & 0x3f); 142 unsigned long *p = ((unsigned long *)addr) + (nr >> 6); 143 144 __asm__ __volatile__( 145 EIEIO_ON_SMP 146"1: ldarx %0,0,%3 # test_and_change_bit\n\ 147 xor %1,%0,%2\n\ 148 stdcx. %1,0,%3\n\ 149 bne- 1b" 150 ISYNC_ON_SMP 151 : "=&r" (old), "=&r" (t) 152 : "r" (mask), "r" (p) 153 : "cc", "memory"); 154 155 return (old & mask) != 0; 156} 157 158/* 159 * non-atomic versions 160 */ 161static __inline__ void __set_bit(unsigned long nr, volatile void *addr) 162{ 163 unsigned long mask = 1UL << (nr & 0x3f); 164 unsigned long *p = ((unsigned long *)addr) + (nr >> 6); 165 166 *p |= mask; 167} 168 169static __inline__ void __clear_bit(unsigned long nr, volatile void *addr) 170{ 171 unsigned long mask = 1UL << (nr & 0x3f); 172 unsigned long *p = ((unsigned long *)addr) + (nr >> 6); 173 174 *p &= ~mask; 175} 176 177static __inline__ void __change_bit(unsigned long nr, volatile void *addr) 178{ 179 unsigned long mask = 1UL << (nr & 0x3f); 180 unsigned long *p = ((unsigned long *)addr) + (nr >> 6); 181 182 *p ^= mask; 183} 184 185static __inline__ int __test_and_set_bit(unsigned long nr, volatile void *addr) 186{ 187 unsigned long mask = 1UL << (nr & 0x3f); 188 unsigned long *p = ((unsigned long *)addr) + (nr >> 6); 189 unsigned long old = *p; 190 191 *p = old | mask; 192 return (old & mask) != 0; 193} 194 195static __inline__ int __test_and_clear_bit(unsigned long nr, volatile void *addr) 196{ 197 unsigned long mask = 1UL << (nr & 0x3f); 198 unsigned long *p = ((unsigned long *)addr) + (nr >> 6); 199 unsigned long old = *p; 200 201 *p = old & ~mask; 202 return (old & mask) != 0; 203} 204 205static __inline__ int __test_and_change_bit(unsigned long nr, volatile void *addr) 206{ 207 unsigned long mask = 1UL << (nr & 0x3f); 208 unsigned long *p = ((unsigned long *)addr) + (nr >> 6); 209 unsigned long old = *p; 210 211 *p = old ^ mask; 212 return (old & mask) != 0; 213} 214 215/* 216 * Return the zero-based bit position (from RIGHT TO LEFT, 63 -> 0) of the 217 * most significant (left-most) 1-bit in a double word. 218 */ 219static __inline__ int __ilog2(unsigned long x) 220{ 221 int lz; 222 223 asm ("cntlzd %0,%1" : "=r" (lz) : "r" (x)); 224 return 63 - lz; 225} 226 227/* Return the zero-based bit position 228 * from RIGHT TO LEFT 63 --> 0 229 * of the most significant (left-most) 1-bit in an 8-byte area. 230 */ 231static __inline__ long cnt_trailing_zeros(unsigned long mask) 232{ 233 long cnt; 234 235 asm( 236" addi %0,%1,-1 \n\ 237 andc %0,%0,%1 \n\ 238 cntlzd %0,%0 \n\ 239 subfic %0,%0,64" 240 : "=r" (cnt) 241 : "r" (mask)); 242 return cnt; 243} 244 245 246 247/* 248 * ffz = Find First Zero in word. Undefined if no zero exists, 249 * Determines the bit position of the LEAST significant 250 * (rightmost) 0 bit in the specified DOUBLE-WORD. 251 * The returned bit position will be zero-based, starting 252 * from the right side (63 - 0). 253 * the code should check against ~0UL first.. 254 */ 255static __inline__ unsigned long ffz(unsigned long x) 256{ 257 u32 tempRC; 258 259 /* Change all of x's 1s to 0s and 0s to 1s in x. 260 * And insure at least 1 zero exists in the 8 byte area. 261 */ 262 if ((x = ~x) == 0) 263 /* no zero exists anywhere in the 8 byte area. */ 264 return 64; 265 266 /* Calculate the bit position of the least significant '1' bit in x 267 * (since x has been changed this will actually be the least 268 * significant '0' bit in the original x). 269 * Note: (x & -x) gives us a mask that is the LEAST significant 270 * (RIGHT-most) 1-bit of the value in x. 271 */ 272 tempRC = __ilog2(x & -x); 273 274 return tempRC; 275} 276 277/* 278 * ffs: find first bit set. This is defined the same way as 279 * the libc and compiler builtin ffs routines, therefore 280 * differs in spirit from the above ffz (man ffs). 281 */ 282static __inline__ int ffs(int x) 283{ 284 int result = ffz(~x); 285 return x ? result+1 : 0; 286} 287 288/* 289 * hweightN: returns the hamming weight (i.e. the number 290 * of bits set) of a N-bit word 291 */ 292#define hweight32(x) generic_hweight32(x) 293#define hweight16(x) generic_hweight16(x) 294#define hweight8(x) generic_hweight8(x) 295 296extern unsigned long find_next_zero_bit(void * addr, unsigned long size, 297 unsigned long offset); 298/* 299 * The optimizer actually does good code for this case.. 300 */ 301#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) 302 303/* Bitmap functions for the ext2 filesystem. */ 304#define _EXT2_HAVE_ASM_BITOPS_ 305 306static __inline__ int ext2_set_bit(int nr, void* addr) 307{ 308 /* This method needs to take into account the fact that the ext2 file system represents 309 * it's bitmaps as "little endian" unsigned integers. 310 * Note: this method is not atomic, but ext2 does not need it to be. 311 */ 312 int mask; 313 int oldbit; 314 unsigned char* ADDR = (unsigned char*) addr; 315 316 /* Determine the BYTE containing the specified bit 317 * (nr) - important as if we go to a byte there are no 318 * little endian concerns. 319 */ 320 ADDR += nr >> 3; 321 mask = 1 << (nr & 0x07); /* Create a mask to the bit within this byte. */ 322 oldbit = *ADDR & mask; /* Save the bit's previous value. */ 323 *ADDR |= mask; /* Turn the bit on. */ 324 return oldbit; /* Return the bit's previous value. */ 325} 326 327static __inline__ int ext2_clear_bit(int nr, void* addr) 328{ 329 /* This method needs to take into account the fact that the ext2 file system represents 330 * | it's bitmaps as "little endian" unsigned integers. 331 * Note: this method is not atomic, but ext2 does not need it to be. 332 */ 333 int mask; 334 int oldbit; 335 unsigned char* ADDR = (unsigned char*) addr; 336 337 /* Determine the BYTE containing the specified bit (nr) 338 * - important as if we go to a byte there are no little endian concerns. 339 */ 340 ADDR += nr >> 3; 341 mask = 1 << (nr & 0x07); /* Create a mask to the bit within this byte. */ 342 oldbit = *ADDR & mask; /* Save the bit's previous value. */ 343 *ADDR = *ADDR & ~mask; /* Turn the bit off. */ 344 return oldbit; /* Return the bit's previous value. */ 345} 346 347static __inline__ int ext2_test_bit(int nr, __const__ void * addr) 348{ 349 /* This method needs to take into account the fact that the ext2 file system represents 350 * | it's bitmaps as "little endian" unsigned integers. 351 * Determine the BYTE containing the specified bit (nr), 352 * then shift to the right the correct number of bits and return that bit's value. 353 */ 354 __const__ unsigned char *ADDR = (__const__ unsigned char *) addr; 355 return (ADDR[nr >> 3] >> (nr & 7)) & 1; 356} 357 358/* Returns the bit position of the most significant 1 bit in a WORD. */ 359static __inline__ int ext2_ilog2(unsigned int x) 360{ 361 int lz; 362 363 asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x)); 364 return 31 - lz; 365} 366 367/* ext2_ffz = ext2's Find First Zero. 368 * Determines the bit position of the LEAST significant (rightmost) 0 bit in the specified WORD. 369 * The returned bit position will be zero-based, starting from the right side (31 - 0). 370 */ 371static __inline__ int ext2_ffz(unsigned int x) 372{ 373 u32 tempRC; 374 /* Change all of x's 1s to 0s and 0s to 1s in x. And insure at least 1 zero exists in the word. */ 375 if ((x = ~x) == 0) 376 /* no zero exists anywhere in the 4 byte area. */ 377 return 32; 378 /* Calculate the bit position of the least significant '1' bit in x 379 * (since x has been changed this will actually be the least 380 * significant '0' bit in the original x). 381 * Note: (x & -x) gives us a mask that is the LEAST significant 382 * (RIGHT-most) 1-bit of the value in x. 383 */ 384 tempRC = ext2_ilog2(x & -x); 385 return tempRC; 386} 387 388static __inline__ u32 ext2_find_next_zero_bit(void* addr, u32 size, u32 offset) 389{ 390 /* This method needs to take into account the fact that the ext2 file system represents 391 * | it's bitmaps as "little endian" unsigned integers. 392 */ 393 unsigned int *p = ((unsigned int *) addr) + (offset >> 5); 394 unsigned int result = offset & ~31; 395 unsigned int tmp; 396 397 if (offset >= size) 398 return size; 399 size -= result; 400 offset &= 31; 401 if (offset) { 402 tmp = cpu_to_le32p(p++); 403 tmp |= ~0U >> (32-offset); /* bug or feature ? */ 404 if (size < 32) 405 goto found_first; 406 if (tmp != ~0) 407 goto found_middle; 408 size -= 32; 409 result += 32; 410 } 411 while (size >= 32) { 412 if ((tmp = cpu_to_le32p(p++)) != ~0) 413 goto found_middle; 414 result += 32; 415 size -= 32; 416 } 417 if (!size) 418 return result; 419 tmp = cpu_to_le32p(p); 420found_first: 421 tmp |= ~0 << size; 422 if (tmp == ~0) /* Are any bits zero? */ 423 return result + size; /* Nope. */ 424found_middle: 425 return result + ext2_ffz(tmp); 426} 427 428#define ext2_find_first_zero_bit(addr, size) ext2_find_next_zero_bit((addr), (size), 0) 429 430#endif /* __KERNEL__ */ 431#endif /* _PPC64_BITOPS_H */ 432