1///////////////////////////////////////////////////////////////////////////////
2//
3/// \file       tuklib_integer.h
4/// \brief      Various integer and bit operations
5///
6/// This file provides macros or functions to do some basic integer and bit
7/// operations.
8///
9/// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l):
10///   - Byte swapping: bswapXX(num)
11///   - Byte order conversions to/from native: convXXYe(num)
12///   - Aligned reads: readXXYe(ptr)
13///   - Aligned writes: writeXXYe(ptr, num)
14///   - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr)
15///   - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num)
16///
17/// Since they can macros, the arguments should have no side effects since
18/// they may be evaluated more than once.
19///
20/// \todo       PowerPC and possibly some other architectures support
21///             byte swapping load and store instructions. This file
22///             doesn't take advantage of those instructions.
23///
24/// Bit scan operations for non-zero 32-bit integers:
25///   - Bit scan reverse (find highest non-zero bit): bsr32(num)
26///   - Count leading zeros: clz32(num)
27///   - Count trailing zeros: ctz32(num)
28///   - Bit scan forward (simply an alias for ctz32()): bsf32(num)
29///
30/// The above bit scan operations return 0-31. If num is zero,
31/// the result is undefined.
32//
33//  Authors:    Lasse Collin
34//              Joachim Henke
35//
36//  This file has been put into the public domain.
37//  You can do whatever you want with this file.
38//
39///////////////////////////////////////////////////////////////////////////////
40
41#ifndef TUKLIB_INTEGER_H
42#define TUKLIB_INTEGER_H
43
44#include "tuklib_common.h"
45
46
47////////////////////////////////////////
48// Operating system specific features //
49////////////////////////////////////////
50
51#if defined(HAVE_BYTESWAP_H)
52	// glibc, uClibc, dietlibc
53#	include <byteswap.h>
54#	ifdef HAVE_BSWAP_16
55#		define bswap16(num) bswap_16(num)
56#	endif
57#	ifdef HAVE_BSWAP_32
58#		define bswap32(num) bswap_32(num)
59#	endif
60#	ifdef HAVE_BSWAP_64
61#		define bswap64(num) bswap_64(num)
62#	endif
63
64#elif defined(HAVE_SYS_ENDIAN_H)
65	// *BSDs and Darwin
66#	include <sys/endian.h>
67
68#elif defined(HAVE_SYS_BYTEORDER_H)
69	// Solaris
70#	include <sys/byteorder.h>
71#	ifdef BSWAP_16
72#		define bswap16(num) BSWAP_16(num)
73#	endif
74#	ifdef BSWAP_32
75#		define bswap32(num) BSWAP_32(num)
76#	endif
77#	ifdef BSWAP_64
78#		define bswap64(num) BSWAP_64(num)
79#	endif
80#	ifdef BE_16
81#		define conv16be(num) BE_16(num)
82#	endif
83#	ifdef BE_32
84#		define conv32be(num) BE_32(num)
85#	endif
86#	ifdef BE_64
87#		define conv64be(num) BE_64(num)
88#	endif
89#	ifdef LE_16
90#		define conv16le(num) LE_16(num)
91#	endif
92#	ifdef LE_32
93#		define conv32le(num) LE_32(num)
94#	endif
95#	ifdef LE_64
96#		define conv64le(num) LE_64(num)
97#	endif
98#endif
99
100
101////////////////////////////////
102// Compiler-specific features //
103////////////////////////////////
104
105// Newer Intel C compilers require immintrin.h for _bit_scan_reverse()
106// and such functions.
107#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500)
108#	include <immintrin.h>
109#endif
110
111
112///////////////////
113// Byte swapping //
114///////////////////
115
116#ifndef bswap16
117#	define bswap16(num) \
118		(((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8))
119#endif
120
121#ifndef bswap32
122#	define bswap32(num) \
123		( (((uint32_t)(num) << 24)                       ) \
124		| (((uint32_t)(num) <<  8) & UINT32_C(0x00FF0000)) \
125		| (((uint32_t)(num) >>  8) & UINT32_C(0x0000FF00)) \
126		| (((uint32_t)(num) >> 24)                       ) )
127#endif
128
129#ifndef bswap64
130#	define bswap64(num) \
131		( (((uint64_t)(num) << 56)                               ) \
132		| (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \
133		| (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \
134		| (((uint64_t)(num) <<  8) & UINT64_C(0x000000FF00000000)) \
135		| (((uint64_t)(num) >>  8) & UINT64_C(0x00000000FF000000)) \
136		| (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \
137		| (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \
138		| (((uint64_t)(num) >> 56)                               ) )
139#endif
140
141// Define conversion macros using the basic byte swapping macros.
142#ifdef WORDS_BIGENDIAN
143#	ifndef conv16be
144#		define conv16be(num) ((uint16_t)(num))
145#	endif
146#	ifndef conv32be
147#		define conv32be(num) ((uint32_t)(num))
148#	endif
149#	ifndef conv64be
150#		define conv64be(num) ((uint64_t)(num))
151#	endif
152#	ifndef conv16le
153#		define conv16le(num) bswap16(num)
154#	endif
155#	ifndef conv32le
156#		define conv32le(num) bswap32(num)
157#	endif
158#	ifndef conv64le
159#		define conv64le(num) bswap64(num)
160#	endif
161#else
162#	ifndef conv16be
163#		define conv16be(num) bswap16(num)
164#	endif
165#	ifndef conv32be
166#		define conv32be(num) bswap32(num)
167#	endif
168#	ifndef conv64be
169#		define conv64be(num) bswap64(num)
170#	endif
171#	ifndef conv16le
172#		define conv16le(num) ((uint16_t)(num))
173#	endif
174#	ifndef conv32le
175#		define conv32le(num) ((uint32_t)(num))
176#	endif
177#	ifndef conv64le
178#		define conv64le(num) ((uint64_t)(num))
179#	endif
180#endif
181
182
183//////////////////////////////
184// Aligned reads and writes //
185//////////////////////////////
186
187static inline uint16_t
188read16be(const uint8_t *buf)
189{
190	uint16_t num = *(const uint16_t *)buf;
191	return conv16be(num);
192}
193
194
195static inline uint16_t
196read16le(const uint8_t *buf)
197{
198	uint16_t num = *(const uint16_t *)buf;
199	return conv16le(num);
200}
201
202
203static inline uint32_t
204read32be(const uint8_t *buf)
205{
206	uint32_t num = *(const uint32_t *)buf;
207	return conv32be(num);
208}
209
210
211static inline uint32_t
212read32le(const uint8_t *buf)
213{
214	uint32_t num = *(const uint32_t *)buf;
215	return conv32le(num);
216}
217
218
219static inline uint64_t
220read64be(const uint8_t *buf)
221{
222	uint64_t num = *(const uint64_t *)buf;
223	return conv64be(num);
224}
225
226
227static inline uint64_t
228read64le(const uint8_t *buf)
229{
230	uint64_t num = *(const uint64_t *)buf;
231	return conv64le(num);
232}
233
234
235// NOTE: Possible byte swapping must be done in a macro to allow GCC
236// to optimize byte swapping of constants when using glibc's or *BSD's
237// byte swapping macros. The actual write is done in an inline function
238// to make type checking of the buf pointer possible similarly to readXXYe()
239// functions.
240
241#define write16be(buf, num) write16ne((buf), conv16be(num))
242#define write16le(buf, num) write16ne((buf), conv16le(num))
243#define write32be(buf, num) write32ne((buf), conv32be(num))
244#define write32le(buf, num) write32ne((buf), conv32le(num))
245#define write64be(buf, num) write64ne((buf), conv64be(num))
246#define write64le(buf, num) write64ne((buf), conv64le(num))
247
248
249static inline void
250write16ne(uint8_t *buf, uint16_t num)
251{
252	*(uint16_t *)buf = num;
253	return;
254}
255
256
257static inline void
258write32ne(uint8_t *buf, uint32_t num)
259{
260	*(uint32_t *)buf = num;
261	return;
262}
263
264
265static inline void
266write64ne(uint8_t *buf, uint64_t num)
267{
268	*(uint64_t *)buf = num;
269	return;
270}
271
272
273////////////////////////////////
274// Unaligned reads and writes //
275////////////////////////////////
276
277// NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and
278// 32-bit unaligned integer loads and stores. It's possible that 64-bit
279// unaligned access doesn't work or is slower than byte-by-byte access.
280// Since unaligned 64-bit is probably not needed as often as 16-bit or
281// 32-bit, we simply don't support 64-bit unaligned access for now.
282#ifdef TUKLIB_FAST_UNALIGNED_ACCESS
283#	define unaligned_read16be read16be
284#	define unaligned_read16le read16le
285#	define unaligned_read32be read32be
286#	define unaligned_read32le read32le
287#	define unaligned_write16be write16be
288#	define unaligned_write16le write16le
289#	define unaligned_write32be write32be
290#	define unaligned_write32le write32le
291
292#else
293
294static inline uint16_t
295unaligned_read16be(const uint8_t *buf)
296{
297	uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
298	return num;
299}
300
301
302static inline uint16_t
303unaligned_read16le(const uint8_t *buf)
304{
305	uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
306	return num;
307}
308
309
310static inline uint32_t
311unaligned_read32be(const uint8_t *buf)
312{
313	uint32_t num = (uint32_t)buf[0] << 24;
314	num |= (uint32_t)buf[1] << 16;
315	num |= (uint32_t)buf[2] << 8;
316	num |= (uint32_t)buf[3];
317	return num;
318}
319
320
321static inline uint32_t
322unaligned_read32le(const uint8_t *buf)
323{
324	uint32_t num = (uint32_t)buf[0];
325	num |= (uint32_t)buf[1] << 8;
326	num |= (uint32_t)buf[2] << 16;
327	num |= (uint32_t)buf[3] << 24;
328	return num;
329}
330
331
332static inline void
333unaligned_write16be(uint8_t *buf, uint16_t num)
334{
335	buf[0] = (uint8_t)(num >> 8);
336	buf[1] = (uint8_t)num;
337	return;
338}
339
340
341static inline void
342unaligned_write16le(uint8_t *buf, uint16_t num)
343{
344	buf[0] = (uint8_t)num;
345	buf[1] = (uint8_t)(num >> 8);
346	return;
347}
348
349
350static inline void
351unaligned_write32be(uint8_t *buf, uint32_t num)
352{
353	buf[0] = (uint8_t)(num >> 24);
354	buf[1] = (uint8_t)(num >> 16);
355	buf[2] = (uint8_t)(num >> 8);
356	buf[3] = (uint8_t)num;
357	return;
358}
359
360
361static inline void
362unaligned_write32le(uint8_t *buf, uint32_t num)
363{
364	buf[0] = (uint8_t)num;
365	buf[1] = (uint8_t)(num >> 8);
366	buf[2] = (uint8_t)(num >> 16);
367	buf[3] = (uint8_t)(num >> 24);
368	return;
369}
370
371#endif
372
373
374static inline uint32_t
375bsr32(uint32_t n)
376{
377	// Check for ICC first, since it tends to define __GNUC__ too.
378#if defined(__INTEL_COMPILER)
379	return _bit_scan_reverse(n);
380
381#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
382	// GCC >= 3.4 has __builtin_clz(), which gives good results on
383	// multiple architectures. On x86, __builtin_clz() ^ 31U becomes
384	// either plain BSR (so the XOR gets optimized away) or LZCNT and
385	// XOR (if -march indicates that SSE4a instructions are supported).
386	return __builtin_clz(n) ^ 31U;
387
388#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
389	uint32_t i;
390	__asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
391	return i;
392
393#elif defined(_MSC_VER) && _MSC_VER >= 1400
394	// MSVC isn't supported by tuklib, but since this code exists,
395	// it doesn't hurt to have it here anyway.
396	uint32_t i;
397	_BitScanReverse((DWORD *)&i, n);
398	return i;
399
400#else
401	uint32_t i = 31;
402
403	if ((n & UINT32_C(0xFFFF0000)) == 0) {
404		n <<= 16;
405		i = 15;
406	}
407
408	if ((n & UINT32_C(0xFF000000)) == 0) {
409		n <<= 8;
410		i -= 8;
411	}
412
413	if ((n & UINT32_C(0xF0000000)) == 0) {
414		n <<= 4;
415		i -= 4;
416	}
417
418	if ((n & UINT32_C(0xC0000000)) == 0) {
419		n <<= 2;
420		i -= 2;
421	}
422
423	if ((n & UINT32_C(0x80000000)) == 0)
424		--i;
425
426	return i;
427#endif
428}
429
430
431static inline uint32_t
432clz32(uint32_t n)
433{
434#if defined(__INTEL_COMPILER)
435	return _bit_scan_reverse(n) ^ 31U;
436
437#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
438	return __builtin_clz(n);
439
440#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
441	uint32_t i;
442	__asm__("bsrl %1, %0\n\t"
443		"xorl $31, %0"
444		: "=r" (i) : "rm" (n));
445	return i;
446
447#elif defined(_MSC_VER) && _MSC_VER >= 1400
448	uint32_t i;
449	_BitScanReverse((DWORD *)&i, n);
450	return i ^ 31U;
451
452#else
453	uint32_t i = 0;
454
455	if ((n & UINT32_C(0xFFFF0000)) == 0) {
456		n <<= 16;
457		i = 16;
458	}
459
460	if ((n & UINT32_C(0xFF000000)) == 0) {
461		n <<= 8;
462		i += 8;
463	}
464
465	if ((n & UINT32_C(0xF0000000)) == 0) {
466		n <<= 4;
467		i += 4;
468	}
469
470	if ((n & UINT32_C(0xC0000000)) == 0) {
471		n <<= 2;
472		i += 2;
473	}
474
475	if ((n & UINT32_C(0x80000000)) == 0)
476		++i;
477
478	return i;
479#endif
480}
481
482
483static inline uint32_t
484ctz32(uint32_t n)
485{
486#if defined(__INTEL_COMPILER)
487	return _bit_scan_forward(n);
488
489#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX
490	return __builtin_ctz(n);
491
492#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
493	uint32_t i;
494	__asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
495	return i;
496
497#elif defined(_MSC_VER) && _MSC_VER >= 1400
498	uint32_t i;
499	_BitScanForward((DWORD *)&i, n);
500	return i;
501
502#else
503	uint32_t i = 0;
504
505	if ((n & UINT32_C(0x0000FFFF)) == 0) {
506		n >>= 16;
507		i = 16;
508	}
509
510	if ((n & UINT32_C(0x000000FF)) == 0) {
511		n >>= 8;
512		i += 8;
513	}
514
515	if ((n & UINT32_C(0x0000000F)) == 0) {
516		n >>= 4;
517		i += 4;
518	}
519
520	if ((n & UINT32_C(0x00000003)) == 0) {
521		n >>= 2;
522		i += 2;
523	}
524
525	if ((n & UINT32_C(0x00000001)) == 0)
526		++i;
527
528	return i;
529#endif
530}
531
532#define bsf32 ctz32
533
534#endif
535