1207753Smm///////////////////////////////////////////////////////////////////////////////
2207753Smm//
3207753Smm/// \file       tuklib_integer.h
4207753Smm/// \brief      Various integer and bit operations
5207753Smm///
6207753Smm/// This file provides macros or functions to do some basic integer and bit
7207753Smm/// operations.
8207753Smm///
9207753Smm/// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l):
10207753Smm///   - Byte swapping: bswapXX(num)
11207753Smm///   - Byte order conversions to/from native: convXXYe(num)
12207753Smm///   - Aligned reads: readXXYe(ptr)
13207753Smm///   - Aligned writes: writeXXYe(ptr, num)
14207753Smm///   - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr)
15207753Smm///   - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num)
16207753Smm///
17207753Smm/// Since they can macros, the arguments should have no side effects since
18207753Smm/// they may be evaluated more than once.
19207753Smm///
20207753Smm/// \todo       PowerPC and possibly some other architectures support
21207753Smm///             byte swapping load and store instructions. This file
22207753Smm///             doesn't take advantage of those instructions.
23207753Smm///
24207753Smm/// Bit scan operations for non-zero 32-bit integers:
25207753Smm///   - Bit scan reverse (find highest non-zero bit): bsr32(num)
26207753Smm///   - Count leading zeros: clz32(num)
27207753Smm///   - Count trailing zeros: ctz32(num)
28207753Smm///   - Bit scan forward (simply an alias for ctz32()): bsf32(num)
29207753Smm///
30207753Smm/// The above bit scan operations return 0-31. If num is zero,
31207753Smm/// the result is undefined.
32207753Smm//
33207753Smm//  Authors:    Lasse Collin
34207753Smm//              Joachim Henke
35207753Smm//
36207753Smm//  This file has been put into the public domain.
37207753Smm//  You can do whatever you want with this file.
38207753Smm//
39207753Smm///////////////////////////////////////////////////////////////////////////////
40207753Smm
41207753Smm#ifndef TUKLIB_INTEGER_H
42207753Smm#define TUKLIB_INTEGER_H
43207753Smm
44207753Smm#include "tuklib_common.h"
45207753Smm
46207753Smm
47207753Smm////////////////////////////////////////
48207753Smm// Operating system specific features //
49207753Smm////////////////////////////////////////
50207753Smm
51207753Smm#if defined(HAVE_BYTESWAP_H)
52207753Smm	// glibc, uClibc, dietlibc
53207753Smm#	include <byteswap.h>
54207753Smm#	ifdef HAVE_BSWAP_16
55207753Smm#		define bswap16(num) bswap_16(num)
56207753Smm#	endif
57207753Smm#	ifdef HAVE_BSWAP_32
58207753Smm#		define bswap32(num) bswap_32(num)
59207753Smm#	endif
60207753Smm#	ifdef HAVE_BSWAP_64
61207753Smm#		define bswap64(num) bswap_64(num)
62207753Smm#	endif
63207753Smm
64207753Smm#elif defined(HAVE_SYS_ENDIAN_H)
65207753Smm	// *BSDs and Darwin
66207753Smm#	include <sys/endian.h>
67207753Smm
68207753Smm#elif defined(HAVE_SYS_BYTEORDER_H)
69207753Smm	// Solaris
70207753Smm#	include <sys/byteorder.h>
71207753Smm#	ifdef BSWAP_16
72207753Smm#		define bswap16(num) BSWAP_16(num)
73207753Smm#	endif
74207753Smm#	ifdef BSWAP_32
75207753Smm#		define bswap32(num) BSWAP_32(num)
76207753Smm#	endif
77207753Smm#	ifdef BSWAP_64
78207753Smm#		define bswap64(num) BSWAP_64(num)
79207753Smm#	endif
80207753Smm#	ifdef BE_16
81207753Smm#		define conv16be(num) BE_16(num)
82207753Smm#	endif
83207753Smm#	ifdef BE_32
84207753Smm#		define conv32be(num) BE_32(num)
85207753Smm#	endif
86207753Smm#	ifdef BE_64
87207753Smm#		define conv64be(num) BE_64(num)
88207753Smm#	endif
89207753Smm#	ifdef LE_16
90207753Smm#		define conv16le(num) LE_16(num)
91207753Smm#	endif
92207753Smm#	ifdef LE_32
93207753Smm#		define conv32le(num) LE_32(num)
94207753Smm#	endif
95207753Smm#	ifdef LE_64
96207753Smm#		define conv64le(num) LE_64(num)
97207753Smm#	endif
98207753Smm#endif
99207753Smm
100207753Smm
101207753Smm///////////////////
102207753Smm// Byte swapping //
103207753Smm///////////////////
104207753Smm
105207753Smm#ifndef bswap16
106207753Smm#	define bswap16(num) \
107207753Smm		(((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8))
108207753Smm#endif
109207753Smm
110207753Smm#ifndef bswap32
111207753Smm#	define bswap32(num) \
112207753Smm		( (((uint32_t)(num) << 24)                       ) \
113207753Smm		| (((uint32_t)(num) <<  8) & UINT32_C(0x00FF0000)) \
114207753Smm		| (((uint32_t)(num) >>  8) & UINT32_C(0x0000FF00)) \
115207753Smm		| (((uint32_t)(num) >> 24)                       ) )
116207753Smm#endif
117207753Smm
118207753Smm#ifndef bswap64
119207753Smm#	define bswap64(num) \
120207753Smm		( (((uint64_t)(num) << 56)                               ) \
121207753Smm		| (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \
122207753Smm		| (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \
123207753Smm		| (((uint64_t)(num) <<  8) & UINT64_C(0x000000FF00000000)) \
124207753Smm		| (((uint64_t)(num) >>  8) & UINT64_C(0x00000000FF000000)) \
125207753Smm		| (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \
126207753Smm		| (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \
127207753Smm		| (((uint64_t)(num) >> 56)                               ) )
128207753Smm#endif
129207753Smm
130207753Smm// Define conversion macros using the basic byte swapping macros.
131207753Smm#ifdef WORDS_BIGENDIAN
132207753Smm#	ifndef conv16be
133207753Smm#		define conv16be(num) ((uint16_t)(num))
134207753Smm#	endif
135207753Smm#	ifndef conv32be
136207753Smm#		define conv32be(num) ((uint32_t)(num))
137207753Smm#	endif
138207753Smm#	ifndef conv64be
139207753Smm#		define conv64be(num) ((uint64_t)(num))
140207753Smm#	endif
141207753Smm#	ifndef conv16le
142207753Smm#		define conv16le(num) bswap16(num)
143207753Smm#	endif
144207753Smm#	ifndef conv32le
145207753Smm#		define conv32le(num) bswap32(num)
146207753Smm#	endif
147207753Smm#	ifndef conv64le
148207753Smm#		define conv64le(num) bswap64(num)
149207753Smm#	endif
150207753Smm#else
151207753Smm#	ifndef conv16be
152207753Smm#		define conv16be(num) bswap16(num)
153207753Smm#	endif
154207753Smm#	ifndef conv32be
155207753Smm#		define conv32be(num) bswap32(num)
156207753Smm#	endif
157207753Smm#	ifndef conv64be
158207753Smm#		define conv64be(num) bswap64(num)
159207753Smm#	endif
160207753Smm#	ifndef conv16le
161207753Smm#		define conv16le(num) ((uint16_t)(num))
162207753Smm#	endif
163207753Smm#	ifndef conv32le
164207753Smm#		define conv32le(num) ((uint32_t)(num))
165207753Smm#	endif
166207753Smm#	ifndef conv64le
167207753Smm#		define conv64le(num) ((uint64_t)(num))
168207753Smm#	endif
169207753Smm#endif
170207753Smm
171207753Smm
172207753Smm//////////////////////////////
173207753Smm// Aligned reads and writes //
174207753Smm//////////////////////////////
175207753Smm
176207753Smmstatic inline uint16_t
177207753Smmread16be(const uint8_t *buf)
178207753Smm{
179207753Smm	uint16_t num = *(const uint16_t *)buf;
180207753Smm	return conv16be(num);
181207753Smm}
182207753Smm
183207753Smm
184207753Smmstatic inline uint16_t
185207753Smmread16le(const uint8_t *buf)
186207753Smm{
187207753Smm	uint16_t num = *(const uint16_t *)buf;
188207753Smm	return conv16le(num);
189207753Smm}
190207753Smm
191207753Smm
192207753Smmstatic inline uint32_t
193207753Smmread32be(const uint8_t *buf)
194207753Smm{
195207753Smm	uint32_t num = *(const uint32_t *)buf;
196207753Smm	return conv32be(num);
197207753Smm}
198207753Smm
199207753Smm
200207753Smmstatic inline uint32_t
201207753Smmread32le(const uint8_t *buf)
202207753Smm{
203207753Smm	uint32_t num = *(const uint32_t *)buf;
204207753Smm	return conv32le(num);
205207753Smm}
206207753Smm
207207753Smm
208207753Smmstatic inline uint64_t
209207753Smmread64be(const uint8_t *buf)
210207753Smm{
211207753Smm	uint64_t num = *(const uint64_t *)buf;
212207753Smm	return conv64be(num);
213207753Smm}
214207753Smm
215207753Smm
216207753Smmstatic inline uint64_t
217207753Smmread64le(const uint8_t *buf)
218207753Smm{
219207753Smm	uint64_t num = *(const uint64_t *)buf;
220207753Smm	return conv64le(num);
221207753Smm}
222207753Smm
223207753Smm
224207753Smm// NOTE: Possible byte swapping must be done in a macro to allow GCC
225207753Smm// to optimize byte swapping of constants when using glibc's or *BSD's
226207753Smm// byte swapping macros. The actual write is done in an inline function
227207753Smm// to make type checking of the buf pointer possible similarly to readXXYe()
228207753Smm// functions.
229207753Smm
230207753Smm#define write16be(buf, num) write16ne((buf), conv16be(num))
231207753Smm#define write16le(buf, num) write16ne((buf), conv16le(num))
232207753Smm#define write32be(buf, num) write32ne((buf), conv32be(num))
233207753Smm#define write32le(buf, num) write32ne((buf), conv32le(num))
234207753Smm#define write64be(buf, num) write64ne((buf), conv64be(num))
235207753Smm#define write64le(buf, num) write64ne((buf), conv64le(num))
236207753Smm
237207753Smm
238207753Smmstatic inline void
239207753Smmwrite16ne(uint8_t *buf, uint16_t num)
240207753Smm{
241207753Smm	*(uint16_t *)buf = num;
242207753Smm	return;
243207753Smm}
244207753Smm
245207753Smm
246207753Smmstatic inline void
247207753Smmwrite32ne(uint8_t *buf, uint32_t num)
248207753Smm{
249207753Smm	*(uint32_t *)buf = num;
250207753Smm	return;
251207753Smm}
252207753Smm
253207753Smm
254207753Smmstatic inline void
255207753Smmwrite64ne(uint8_t *buf, uint64_t num)
256207753Smm{
257207753Smm	*(uint64_t *)buf = num;
258207753Smm	return;
259207753Smm}
260207753Smm
261207753Smm
262207753Smm////////////////////////////////
263207753Smm// Unaligned reads and writes //
264207753Smm////////////////////////////////
265207753Smm
266207753Smm// NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and
267207753Smm// 32-bit unaligned integer loads and stores. It's possible that 64-bit
268207753Smm// unaligned access doesn't work or is slower than byte-by-byte access.
269207753Smm// Since unaligned 64-bit is probably not needed as often as 16-bit or
270207753Smm// 32-bit, we simply don't support 64-bit unaligned access for now.
271207753Smm#ifdef TUKLIB_FAST_UNALIGNED_ACCESS
272207753Smm#	define unaligned_read16be read16be
273207753Smm#	define unaligned_read16le read16le
274207753Smm#	define unaligned_read32be read32be
275207753Smm#	define unaligned_read32le read32le
276207753Smm#	define unaligned_write16be write16be
277207753Smm#	define unaligned_write16le write16le
278207753Smm#	define unaligned_write32be write32be
279207753Smm#	define unaligned_write32le write32le
280207753Smm
281207753Smm#else
282207753Smm
283207753Smmstatic inline uint16_t
284207753Smmunaligned_read16be(const uint8_t *buf)
285207753Smm{
286207753Smm	uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
287207753Smm	return num;
288207753Smm}
289207753Smm
290207753Smm
291207753Smmstatic inline uint16_t
292207753Smmunaligned_read16le(const uint8_t *buf)
293207753Smm{
294207753Smm	uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
295207753Smm	return num;
296207753Smm}
297207753Smm
298207753Smm
299207753Smmstatic inline uint32_t
300207753Smmunaligned_read32be(const uint8_t *buf)
301207753Smm{
302207753Smm	uint32_t num = (uint32_t)buf[0] << 24;
303207753Smm	num |= (uint32_t)buf[1] << 16;
304207753Smm	num |= (uint32_t)buf[2] << 8;
305207753Smm	num |= (uint32_t)buf[3];
306207753Smm	return num;
307207753Smm}
308207753Smm
309207753Smm
310207753Smmstatic inline uint32_t
311207753Smmunaligned_read32le(const uint8_t *buf)
312207753Smm{
313207753Smm	uint32_t num = (uint32_t)buf[0];
314207753Smm	num |= (uint32_t)buf[1] << 8;
315207753Smm	num |= (uint32_t)buf[2] << 16;
316207753Smm	num |= (uint32_t)buf[3] << 24;
317207753Smm	return num;
318207753Smm}
319207753Smm
320207753Smm
321207753Smmstatic inline void
322207753Smmunaligned_write16be(uint8_t *buf, uint16_t num)
323207753Smm{
324292588Sdelphij	buf[0] = (uint8_t)(num >> 8);
325292588Sdelphij	buf[1] = (uint8_t)num;
326207753Smm	return;
327207753Smm}
328207753Smm
329207753Smm
330207753Smmstatic inline void
331207753Smmunaligned_write16le(uint8_t *buf, uint16_t num)
332207753Smm{
333292588Sdelphij	buf[0] = (uint8_t)num;
334292588Sdelphij	buf[1] = (uint8_t)(num >> 8);
335207753Smm	return;
336207753Smm}
337207753Smm
338207753Smm
339207753Smmstatic inline void
340207753Smmunaligned_write32be(uint8_t *buf, uint32_t num)
341207753Smm{
342292588Sdelphij	buf[0] = (uint8_t)(num >> 24);
343292588Sdelphij	buf[1] = (uint8_t)(num >> 16);
344292588Sdelphij	buf[2] = (uint8_t)(num >> 8);
345292588Sdelphij	buf[3] = (uint8_t)num;
346207753Smm	return;
347207753Smm}
348207753Smm
349207753Smm
350207753Smmstatic inline void
351207753Smmunaligned_write32le(uint8_t *buf, uint32_t num)
352207753Smm{
353292588Sdelphij	buf[0] = (uint8_t)num;
354292588Sdelphij	buf[1] = (uint8_t)(num >> 8);
355292588Sdelphij	buf[2] = (uint8_t)(num >> 16);
356292588Sdelphij	buf[3] = (uint8_t)(num >> 24);
357207753Smm	return;
358207753Smm}
359207753Smm
360207753Smm#endif
361207753Smm
362207753Smm
363207753Smmstatic inline uint32_t
364207753Smmbsr32(uint32_t n)
365207753Smm{
366207753Smm	// Check for ICC first, since it tends to define __GNUC__ too.
367207753Smm#if defined(__INTEL_COMPILER)
368207753Smm	return _bit_scan_reverse(n);
369207753Smm
370207753Smm#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
371207753Smm	// GCC >= 3.4 has __builtin_clz(), which gives good results on
372207753Smm	// multiple architectures. On x86, __builtin_clz() ^ 31U becomes
373207753Smm	// either plain BSR (so the XOR gets optimized away) or LZCNT and
374207753Smm	// XOR (if -march indicates that SSE4a instructions are supported).
375207753Smm	return __builtin_clz(n) ^ 31U;
376207753Smm
377207753Smm#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
378207753Smm	uint32_t i;
379207753Smm	__asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
380207753Smm	return i;
381207753Smm
382207753Smm#elif defined(_MSC_VER) && _MSC_VER >= 1400
383207753Smm	// MSVC isn't supported by tuklib, but since this code exists,
384207753Smm	// it doesn't hurt to have it here anyway.
385207753Smm	uint32_t i;
386207753Smm	_BitScanReverse((DWORD *)&i, n);
387207753Smm	return i;
388207753Smm
389207753Smm#else
390207753Smm	uint32_t i = 31;
391207753Smm
392207753Smm	if ((n & UINT32_C(0xFFFF0000)) == 0) {
393207753Smm		n <<= 16;
394207753Smm		i = 15;
395207753Smm	}
396207753Smm
397207753Smm	if ((n & UINT32_C(0xFF000000)) == 0) {
398207753Smm		n <<= 8;
399207753Smm		i -= 8;
400207753Smm	}
401207753Smm
402207753Smm	if ((n & UINT32_C(0xF0000000)) == 0) {
403207753Smm		n <<= 4;
404207753Smm		i -= 4;
405207753Smm	}
406207753Smm
407207753Smm	if ((n & UINT32_C(0xC0000000)) == 0) {
408207753Smm		n <<= 2;
409207753Smm		i -= 2;
410207753Smm	}
411207753Smm
412207753Smm	if ((n & UINT32_C(0x80000000)) == 0)
413207753Smm		--i;
414207753Smm
415207753Smm	return i;
416207753Smm#endif
417207753Smm}
418207753Smm
419207753Smm
420207753Smmstatic inline uint32_t
421207753Smmclz32(uint32_t n)
422207753Smm{
423207753Smm#if defined(__INTEL_COMPILER)
424207753Smm	return _bit_scan_reverse(n) ^ 31U;
425207753Smm
426207753Smm#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
427207753Smm	return __builtin_clz(n);
428207753Smm
429207753Smm#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
430207753Smm	uint32_t i;
431207753Smm	__asm__("bsrl %1, %0\n\t"
432207753Smm		"xorl $31, %0"
433207753Smm		: "=r" (i) : "rm" (n));
434207753Smm	return i;
435207753Smm
436207753Smm#elif defined(_MSC_VER) && _MSC_VER >= 1400
437207753Smm	uint32_t i;
438207753Smm	_BitScanReverse((DWORD *)&i, n);
439207753Smm	return i ^ 31U;
440207753Smm
441207753Smm#else
442207753Smm	uint32_t i = 0;
443207753Smm
444207753Smm	if ((n & UINT32_C(0xFFFF0000)) == 0) {
445207753Smm		n <<= 16;
446207753Smm		i = 16;
447207753Smm	}
448207753Smm
449207753Smm	if ((n & UINT32_C(0xFF000000)) == 0) {
450207753Smm		n <<= 8;
451207753Smm		i += 8;
452207753Smm	}
453207753Smm
454207753Smm	if ((n & UINT32_C(0xF0000000)) == 0) {
455207753Smm		n <<= 4;
456207753Smm		i += 4;
457207753Smm	}
458207753Smm
459207753Smm	if ((n & UINT32_C(0xC0000000)) == 0) {
460207753Smm		n <<= 2;
461207753Smm		i += 2;
462207753Smm	}
463207753Smm
464207753Smm	if ((n & UINT32_C(0x80000000)) == 0)
465207753Smm		++i;
466207753Smm
467207753Smm	return i;
468207753Smm#endif
469207753Smm}
470207753Smm
471207753Smm
472207753Smmstatic inline uint32_t
473207753Smmctz32(uint32_t n)
474207753Smm{
475207753Smm#if defined(__INTEL_COMPILER)
476207753Smm	return _bit_scan_forward(n);
477207753Smm
478207753Smm#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX
479207753Smm	return __builtin_ctz(n);
480207753Smm
481207753Smm#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
482207753Smm	uint32_t i;
483207753Smm	__asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
484207753Smm	return i;
485207753Smm
486207753Smm#elif defined(_MSC_VER) && _MSC_VER >= 1400
487207753Smm	uint32_t i;
488207753Smm	_BitScanForward((DWORD *)&i, n);
489207753Smm	return i;
490207753Smm
491207753Smm#else
492207753Smm	uint32_t i = 0;
493207753Smm
494207753Smm	if ((n & UINT32_C(0x0000FFFF)) == 0) {
495207753Smm		n >>= 16;
496207753Smm		i = 16;
497207753Smm	}
498207753Smm
499207753Smm	if ((n & UINT32_C(0x000000FF)) == 0) {
500207753Smm		n >>= 8;
501207753Smm		i += 8;
502207753Smm	}
503207753Smm
504207753Smm	if ((n & UINT32_C(0x0000000F)) == 0) {
505207753Smm		n >>= 4;
506207753Smm		i += 4;
507207753Smm	}
508207753Smm
509207753Smm	if ((n & UINT32_C(0x00000003)) == 0) {
510207753Smm		n >>= 2;
511207753Smm		i += 2;
512207753Smm	}
513207753Smm
514207753Smm	if ((n & UINT32_C(0x00000001)) == 0)
515207753Smm		++i;
516207753Smm
517207753Smm	return i;
518207753Smm#endif
519207753Smm}
520207753Smm
521207753Smm#define bsf32 ctz32
522207753Smm
523207753Smm#endif
524