1/*
2 * PowerPC64 atomic bit operations.
3 * Dave Engebretsen, Todd Inglett, Don Reed, Pat McCarthy, Peter Bergner,
4 * Anton Blanchard
5 *
6 * Originally taken from the 32b PPC code.  Modified to use 64b values for
7 * the various counters & memory references.
8 *
9 * Bitops are odd when viewed on big-endian systems.  They were designed
10 * on little endian so the size of the bitset doesn't matter (low order bytes
11 * come first) as long as the bit in question is valid.
12 *
13 * Bits are "tested" often using the C expression (val & (1<<nr)) so we do
14 * our best to stay compatible with that.  The assumption is that val will
15 * be unsigned long for such tests.  As such, we assume the bits are stored
16 * as an array of unsigned long (the usual case is a single unsigned long,
17 * of course).  Here's an example bitset with bit numbering:
18 *
19 *   |63..........0|127........64|195.......128|255.......196|
20 *
21 * This leads to a problem. If an int, short or char is passed as a bitset
22 * it will be a bad memory reference since we want to store in chunks
23 * of unsigned long (64 bits here) size.
24 *
25 * This program is free software; you can redistribute it and/or
26 * modify it under the terms of the GNU General Public License
27 * as published by the Free Software Foundation; either version
28 * 2 of the License, or (at your option) any later version.
29 */
30
31#ifndef _PPC64_BITOPS_H
32#define _PPC64_BITOPS_H
33
34#ifdef __KERNEL__
35
36#include <asm/byteorder.h>
37#include <asm/memory.h>
38
39/*
40 * clear_bit doesn't imply a memory barrier
41 */
42#define smp_mb__before_clear_bit()	smp_mb()
43#define smp_mb__after_clear_bit()	smp_mb()
44
45static __inline__ int test_bit(unsigned long nr, __const__ volatile void *addr)
46{
47	return (1UL & (((__const__ long *) addr)[nr >> 6] >> (nr & 63)));
48}
49
50static __inline__ void set_bit(unsigned long nr, volatile void *addr)
51{
52	unsigned long old;
53	unsigned long mask = 1UL << (nr & 0x3f);
54	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
55
56	__asm__ __volatile__(
57"1:	ldarx	%0,0,%3		# set_bit\n\
58	or	%0,%0,%2\n\
59	stdcx.	%0,0,%3\n\
60	bne-	1b"
61	: "=&r" (old), "=m" (*p)
62	: "r" (mask), "r" (p), "m" (*p)
63	: "cc");
64}
65
66static __inline__ void clear_bit(unsigned long nr, volatile void *addr)
67{
68	unsigned long old;
69	unsigned long mask = 1UL << (nr & 0x3f);
70	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
71
72	__asm__ __volatile__(
73"1:	ldarx	%0,0,%3		# clear_bit\n\
74	andc	%0,%0,%2\n\
75	stdcx.	%0,0,%3\n\
76	bne-	1b"
77	: "=&r" (old), "=m" (*p)
78	: "r" (mask), "r" (p), "m" (*p)
79	: "cc");
80}
81
82static __inline__ void change_bit(unsigned long nr, volatile void *addr)
83{
84	unsigned long old;
85	unsigned long mask = 1UL << (nr & 0x3f);
86	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
87
88	__asm__ __volatile__(
89"1:	ldarx	%0,0,%3		# change_bit\n\
90	xor	%0,%0,%2\n\
91	stdcx.	%0,0,%3\n\
92	bne-	1b"
93	: "=&r" (old), "=m" (*p)
94	: "r" (mask), "r" (p), "m" (*p)
95	: "cc");
96}
97
98static __inline__ int test_and_set_bit(unsigned long nr, volatile void *addr)
99{
100	unsigned long old, t;
101	unsigned long mask = 1UL << (nr & 0x3f);
102	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
103
104	__asm__ __volatile__(
105	EIEIO_ON_SMP
106"1:	ldarx	%0,0,%3		# test_and_set_bit\n\
107	or	%1,%0,%2 \n\
108	stdcx.	%1,0,%3 \n\
109	bne-	1b"
110	ISYNC_ON_SMP
111	: "=&r" (old), "=&r" (t)
112	: "r" (mask), "r" (p)
113	: "cc", "memory");
114
115	return (old & mask) != 0;
116}
117
118static __inline__ int test_and_clear_bit(unsigned long nr, volatile void *addr)
119{
120	unsigned long old, t;
121	unsigned long mask = 1UL << (nr & 0x3f);
122	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
123
124	__asm__ __volatile__(
125	EIEIO_ON_SMP
126"1:	ldarx	%0,0,%3		# test_and_clear_bit\n\
127	andc	%1,%0,%2\n\
128	stdcx.	%1,0,%3\n\
129	bne-	1b"
130	ISYNC_ON_SMP
131	: "=&r" (old), "=&r" (t)
132	: "r" (mask), "r" (p)
133	: "cc", "memory");
134
135	return (old & mask) != 0;
136}
137
138static __inline__ int test_and_change_bit(unsigned long nr, volatile void *addr)
139{
140	unsigned long old, t;
141	unsigned long mask = 1UL << (nr & 0x3f);
142	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
143
144	__asm__ __volatile__(
145	EIEIO_ON_SMP
146"1:	ldarx	%0,0,%3		# test_and_change_bit\n\
147	xor	%1,%0,%2\n\
148	stdcx.	%1,0,%3\n\
149	bne-	1b"
150	ISYNC_ON_SMP
151	: "=&r" (old), "=&r" (t)
152	: "r" (mask), "r" (p)
153	: "cc", "memory");
154
155	return (old & mask) != 0;
156}
157
158/*
159 * non-atomic versions
160 */
161static __inline__ void __set_bit(unsigned long nr, volatile void *addr)
162{
163	unsigned long mask = 1UL << (nr & 0x3f);
164	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
165
166	*p |= mask;
167}
168
169static __inline__ void __clear_bit(unsigned long nr, volatile void *addr)
170{
171	unsigned long mask = 1UL << (nr & 0x3f);
172	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
173
174	*p &= ~mask;
175}
176
177static __inline__ void __change_bit(unsigned long nr, volatile void *addr)
178{
179	unsigned long mask = 1UL << (nr & 0x3f);
180	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
181
182	*p ^= mask;
183}
184
185static __inline__ int __test_and_set_bit(unsigned long nr, volatile void *addr)
186{
187	unsigned long mask = 1UL << (nr & 0x3f);
188	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
189	unsigned long old = *p;
190
191	*p = old | mask;
192	return (old & mask) != 0;
193}
194
195static __inline__ int __test_and_clear_bit(unsigned long nr, volatile void *addr)
196{
197	unsigned long mask = 1UL << (nr & 0x3f);
198	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
199	unsigned long old = *p;
200
201	*p = old & ~mask;
202	return (old & mask) != 0;
203}
204
205static __inline__ int __test_and_change_bit(unsigned long nr, volatile void *addr)
206{
207	unsigned long mask = 1UL << (nr & 0x3f);
208	unsigned long *p = ((unsigned long *)addr) + (nr >> 6);
209	unsigned long old = *p;
210
211	*p = old ^ mask;
212	return (old & mask) != 0;
213}
214
215/*
216 * Return the zero-based bit position (from RIGHT TO LEFT, 63 -> 0) of the
217 * most significant (left-most) 1-bit in a double word.
218 */
219static __inline__ int __ilog2(unsigned long x)
220{
221	int lz;
222
223	asm ("cntlzd %0,%1" : "=r" (lz) : "r" (x));
224	return 63 - lz;
225}
226
227/* Return the zero-based bit position
228 *  from RIGHT TO LEFT  63 --> 0
229 *   of the most significant (left-most) 1-bit in an 8-byte area.
230 */
231static __inline__ long cnt_trailing_zeros(unsigned long mask)
232{
233        long cnt;
234
235	asm(
236"	addi	%0,%1,-1	\n\
237	andc	%0,%0,%1	\n\
238	cntlzd	%0,%0		\n\
239	subfic	%0,%0,64"
240	: "=r" (cnt)
241	: "r" (mask));
242	return cnt;
243}
244
245
246
247/*
248 * ffz = Find First Zero in word. Undefined if no zero exists,
249 *    Determines the bit position of the LEAST significant
250 *    (rightmost) 0 bit in the specified DOUBLE-WORD.
251 *    The returned bit position will be zero-based, starting
252 *    from the right side (63 - 0).
253 *    the code should check against ~0UL first..
254 */
255static __inline__ unsigned long ffz(unsigned long x)
256{
257	u32  tempRC;
258
259	/* Change all of x's 1s to 0s and 0s to 1s in x.
260	 * And insure at least 1 zero exists in the 8 byte area.
261	 */
262	if ((x = ~x) == 0)
263		/* no zero exists anywhere in the 8 byte area. */
264		return 64;
265
266	/* Calculate the bit position of the least significant '1' bit in x
267	 * (since x has been changed this will actually be the least
268	 * significant '0' bit in the original x).
269	 * Note: (x & -x) gives us a mask that is the LEAST significant
270	 * (RIGHT-most) 1-bit of the value in x.
271	 */
272	tempRC = __ilog2(x & -x);
273
274	return tempRC;
275}
276
277/*
278 * ffs: find first bit set. This is defined the same way as
279 * the libc and compiler builtin ffs routines, therefore
280 * differs in spirit from the above ffz (man ffs).
281 */
282static __inline__ int ffs(int x)
283{
284	int result = ffz(~x);
285	return x ? result+1 : 0;
286}
287
288/*
289 * hweightN: returns the hamming weight (i.e. the number
290 * of bits set) of a N-bit word
291 */
292#define hweight32(x) generic_hweight32(x)
293#define hweight16(x) generic_hweight16(x)
294#define hweight8(x) generic_hweight8(x)
295
296extern unsigned long find_next_zero_bit(void * addr, unsigned long size,
297					unsigned long offset);
298/*
299 * The optimizer actually does good code for this case..
300 */
301#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
302
303/* Bitmap functions for the ext2 filesystem. */
304#define _EXT2_HAVE_ASM_BITOPS_
305
306static __inline__ int ext2_set_bit(int nr, void* addr)
307{
308	/* This method needs to take into account the fact that the ext2 file system represents
309	 *  it's bitmaps as "little endian" unsigned integers.
310	 * Note: this method is not atomic, but ext2 does not need it to be.
311	 */
312	int mask;
313	int oldbit;
314	unsigned char* ADDR = (unsigned char*) addr;
315
316	/* Determine the BYTE containing the specified bit
317	 * (nr) - important as if we go to a byte there are no
318	 * little endian concerns.
319	 */
320	ADDR += nr >> 3;
321	mask = 1 << (nr & 0x07);  /* Create a mask to the bit within this byte. */
322	oldbit = *ADDR & mask;  /* Save the bit's previous value. */
323	*ADDR |= mask;  /* Turn the bit on. */
324	return oldbit;  /* Return the bit's previous value. */
325}
326
327static __inline__ int ext2_clear_bit(int nr, void* addr)
328{
329	/* This method needs to take into account the fact that the ext2 file system represents
330	 * | it's bitmaps as "little endian" unsigned integers.
331	 * Note: this method is not atomic, but ext2 does not need it to be.
332	 */
333        int     mask;
334        int oldbit;
335        unsigned char* ADDR = (unsigned char*) addr;
336
337	/* Determine the BYTE containing the specified bit (nr)
338	 *  - important as if we go to a byte there are no little endian concerns.
339	 */
340        ADDR += nr >> 3;
341        mask = 1 << (nr & 0x07);  /* Create a mask to the bit within this byte. */
342        oldbit = *ADDR & mask;  /* Save the bit's previous value. */
343        *ADDR = *ADDR & ~mask;  /* Turn the bit off. */
344        return oldbit;  /* Return the bit's previous value. */
345}
346
347static __inline__ int ext2_test_bit(int nr, __const__ void * addr)
348{
349	/* This method needs to take into account the fact that the ext2 file system represents
350	 * | it's bitmaps as "little endian" unsigned integers.
351	 * Determine the BYTE containing the specified bit (nr),
352	 *   then shift to the right the correct number of bits and return that bit's value.
353	 */
354	__const__ unsigned char	*ADDR = (__const__ unsigned char *) addr;
355	return (ADDR[nr >> 3] >> (nr & 7)) & 1;
356}
357
358/* Returns the bit position of the most significant 1 bit in a WORD. */
359static __inline__ int ext2_ilog2(unsigned int x)
360{
361        int lz;
362
363        asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
364        return 31 - lz;
365}
366
367/* ext2_ffz = ext2's Find First Zero.
368 *    Determines the bit position of the LEAST significant (rightmost) 0 bit in the specified WORD.
369 *    The returned bit position will be zero-based, starting from the right side (31 - 0).
370 */
371static __inline__ int ext2_ffz(unsigned int x)
372{
373	u32  tempRC;
374	/* Change all of x's 1s to 0s and 0s to 1s in x.  And insure at least 1 zero exists in the word. */
375	if ((x = ~x) == 0)
376		/* no zero exists anywhere in the 4 byte area. */
377		return 32;
378	/* Calculate the bit position of the least significant '1' bit in x
379	 * (since x has been changed this will actually be the least
380	 * significant '0' bit in the original x).
381	 * Note: (x & -x) gives us a mask that is the LEAST significant
382	 * (RIGHT-most) 1-bit of the value in x.
383	 */
384	tempRC = ext2_ilog2(x & -x);
385	return tempRC;
386}
387
388static __inline__ u32 ext2_find_next_zero_bit(void* addr, u32 size, u32 offset)
389{
390	/* This method needs to take into account the fact that the ext2 file system represents
391	 * | it's bitmaps as "little endian" unsigned integers.
392	 */
393        unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
394        unsigned int result = offset & ~31;
395        unsigned int tmp;
396
397        if (offset >= size)
398                return size;
399        size -= result;
400        offset &= 31;
401        if (offset) {
402                tmp = cpu_to_le32p(p++);
403                tmp |= ~0U >> (32-offset); /* bug or feature ? */
404                if (size < 32)
405                        goto found_first;
406                if (tmp != ~0)
407                        goto found_middle;
408                size -= 32;
409                result += 32;
410        }
411        while (size >= 32) {
412                if ((tmp = cpu_to_le32p(p++)) != ~0)
413                        goto found_middle;
414                result += 32;
415                size -= 32;
416        }
417        if (!size)
418                return result;
419        tmp = cpu_to_le32p(p);
420found_first:
421        tmp |= ~0 << size;
422        if (tmp == ~0)          /* Are any bits zero? */
423                return result + size; /* Nope. */
424found_middle:
425        return result + ext2_ffz(tmp);
426}
427
428#define ext2_find_first_zero_bit(addr, size) ext2_find_next_zero_bit((addr), (size), 0)
429
430#endif /* __KERNEL__ */
431#endif /* _PPC64_BITOPS_H */
432