1/////////////////////////////////////////////////////////////////////////////// 2// 3/// \file tuklib_integer.h 4/// \brief Various integer and bit operations 5/// 6/// This file provides macros or functions to do some basic integer and bit 7/// operations. 8/// 9/// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l): 10/// - Byte swapping: bswapXX(num) 11/// - Byte order conversions to/from native: convXXYe(num) 12/// - Aligned reads: readXXYe(ptr) 13/// - Aligned writes: writeXXYe(ptr, num) 14/// - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr) 15/// - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num) 16/// 17/// Since they can macros, the arguments should have no side effects since 18/// they may be evaluated more than once. 19/// 20/// \todo PowerPC and possibly some other architectures support 21/// byte swapping load and store instructions. This file 22/// doesn't take advantage of those instructions. 23/// 24/// Bit scan operations for non-zero 32-bit integers: 25/// - Bit scan reverse (find highest non-zero bit): bsr32(num) 26/// - Count leading zeros: clz32(num) 27/// - Count trailing zeros: ctz32(num) 28/// - Bit scan forward (simply an alias for ctz32()): bsf32(num) 29/// 30/// The above bit scan operations return 0-31. If num is zero, 31/// the result is undefined. 32// 33// Authors: Lasse Collin 34// Joachim Henke 35// 36// This file has been put into the public domain. 37// You can do whatever you want with this file. 38// 39/////////////////////////////////////////////////////////////////////////////// 40 41#ifndef TUKLIB_INTEGER_H 42#define TUKLIB_INTEGER_H 43 44#include "tuklib_common.h" 45 46 47//////////////////////////////////////// 48// Operating system specific features // 49//////////////////////////////////////// 50 51#if defined(HAVE_BYTESWAP_H) 52 // glibc, uClibc, dietlibc 53# include <byteswap.h> 54# ifdef HAVE_BSWAP_16 55# define bswap16(num) bswap_16(num) 56# endif 57# ifdef HAVE_BSWAP_32 58# define bswap32(num) bswap_32(num) 59# endif 60# ifdef HAVE_BSWAP_64 61# define bswap64(num) bswap_64(num) 62# endif 63 64#elif defined(HAVE_SYS_ENDIAN_H) 65 // *BSDs and Darwin 66# include <sys/endian.h> 67 68#elif defined(HAVE_SYS_BYTEORDER_H) 69 // Solaris 70# include <sys/byteorder.h> 71# ifdef BSWAP_16 72# define bswap16(num) BSWAP_16(num) 73# endif 74# ifdef BSWAP_32 75# define bswap32(num) BSWAP_32(num) 76# endif 77# ifdef BSWAP_64 78# define bswap64(num) BSWAP_64(num) 79# endif 80# ifdef BE_16 81# define conv16be(num) BE_16(num) 82# endif 83# ifdef BE_32 84# define conv32be(num) BE_32(num) 85# endif 86# ifdef BE_64 87# define conv64be(num) BE_64(num) 88# endif 89# ifdef LE_16 90# define conv16le(num) LE_16(num) 91# endif 92# ifdef LE_32 93# define conv32le(num) LE_32(num) 94# endif 95# ifdef LE_64 96# define conv64le(num) LE_64(num) 97# endif 98#endif 99 100 101//////////////////////////////// 102// Compiler-specific features // 103//////////////////////////////// 104 105// Newer Intel C compilers require immintrin.h for _bit_scan_reverse() 106// and such functions. 107#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500) 108# include <immintrin.h> 109#endif 110 111 112/////////////////// 113// Byte swapping // 114/////////////////// 115 116#ifndef bswap16 117# define bswap16(num) \ 118 (((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8)) 119#endif 120 121#ifndef bswap32 122# define bswap32(num) \ 123 ( (((uint32_t)(num) << 24) ) \ 124 | (((uint32_t)(num) << 8) & UINT32_C(0x00FF0000)) \ 125 | (((uint32_t)(num) >> 8) & UINT32_C(0x0000FF00)) \ 126 | (((uint32_t)(num) >> 24) ) ) 127#endif 128 129#ifndef bswap64 130# define bswap64(num) \ 131 ( (((uint64_t)(num) << 56) ) \ 132 | (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \ 133 | (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \ 134 | (((uint64_t)(num) << 8) & UINT64_C(0x000000FF00000000)) \ 135 | (((uint64_t)(num) >> 8) & UINT64_C(0x00000000FF000000)) \ 136 | (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \ 137 | (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \ 138 | (((uint64_t)(num) >> 56) ) ) 139#endif 140 141// Define conversion macros using the basic byte swapping macros. 142#ifdef WORDS_BIGENDIAN 143# ifndef conv16be 144# define conv16be(num) ((uint16_t)(num)) 145# endif 146# ifndef conv32be 147# define conv32be(num) ((uint32_t)(num)) 148# endif 149# ifndef conv64be 150# define conv64be(num) ((uint64_t)(num)) 151# endif 152# ifndef conv16le 153# define conv16le(num) bswap16(num) 154# endif 155# ifndef conv32le 156# define conv32le(num) bswap32(num) 157# endif 158# ifndef conv64le 159# define conv64le(num) bswap64(num) 160# endif 161#else 162# ifndef conv16be 163# define conv16be(num) bswap16(num) 164# endif 165# ifndef conv32be 166# define conv32be(num) bswap32(num) 167# endif 168# ifndef conv64be 169# define conv64be(num) bswap64(num) 170# endif 171# ifndef conv16le 172# define conv16le(num) ((uint16_t)(num)) 173# endif 174# ifndef conv32le 175# define conv32le(num) ((uint32_t)(num)) 176# endif 177# ifndef conv64le 178# define conv64le(num) ((uint64_t)(num)) 179# endif 180#endif 181 182 183////////////////////////////// 184// Aligned reads and writes // 185////////////////////////////// 186 187static inline uint16_t 188read16be(const uint8_t *buf) 189{ 190 uint16_t num = *(const uint16_t *)buf; 191 return conv16be(num); 192} 193 194 195static inline uint16_t 196read16le(const uint8_t *buf) 197{ 198 uint16_t num = *(const uint16_t *)buf; 199 return conv16le(num); 200} 201 202 203static inline uint32_t 204read32be(const uint8_t *buf) 205{ 206 uint32_t num = *(const uint32_t *)buf; 207 return conv32be(num); 208} 209 210 211static inline uint32_t 212read32le(const uint8_t *buf) 213{ 214 uint32_t num = *(const uint32_t *)buf; 215 return conv32le(num); 216} 217 218 219static inline uint64_t 220read64be(const uint8_t *buf) 221{ 222 uint64_t num = *(const uint64_t *)buf; 223 return conv64be(num); 224} 225 226 227static inline uint64_t 228read64le(const uint8_t *buf) 229{ 230 uint64_t num = *(const uint64_t *)buf; 231 return conv64le(num); 232} 233 234 235// NOTE: Possible byte swapping must be done in a macro to allow GCC 236// to optimize byte swapping of constants when using glibc's or *BSD's 237// byte swapping macros. The actual write is done in an inline function 238// to make type checking of the buf pointer possible similarly to readXXYe() 239// functions. 240 241#define write16be(buf, num) write16ne((buf), conv16be(num)) 242#define write16le(buf, num) write16ne((buf), conv16le(num)) 243#define write32be(buf, num) write32ne((buf), conv32be(num)) 244#define write32le(buf, num) write32ne((buf), conv32le(num)) 245#define write64be(buf, num) write64ne((buf), conv64be(num)) 246#define write64le(buf, num) write64ne((buf), conv64le(num)) 247 248 249static inline void 250write16ne(uint8_t *buf, uint16_t num) 251{ 252 *(uint16_t *)buf = num; 253 return; 254} 255 256 257static inline void 258write32ne(uint8_t *buf, uint32_t num) 259{ 260 *(uint32_t *)buf = num; 261 return; 262} 263 264 265static inline void 266write64ne(uint8_t *buf, uint64_t num) 267{ 268 *(uint64_t *)buf = num; 269 return; 270} 271 272 273//////////////////////////////// 274// Unaligned reads and writes // 275//////////////////////////////// 276 277// NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and 278// 32-bit unaligned integer loads and stores. It's possible that 64-bit 279// unaligned access doesn't work or is slower than byte-by-byte access. 280// Since unaligned 64-bit is probably not needed as often as 16-bit or 281// 32-bit, we simply don't support 64-bit unaligned access for now. 282#ifdef TUKLIB_FAST_UNALIGNED_ACCESS 283# define unaligned_read16be read16be 284# define unaligned_read16le read16le 285# define unaligned_read32be read32be 286# define unaligned_read32le read32le 287# define unaligned_write16be write16be 288# define unaligned_write16le write16le 289# define unaligned_write32be write32be 290# define unaligned_write32le write32le 291 292#else 293 294static inline uint16_t 295unaligned_read16be(const uint8_t *buf) 296{ 297 uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1]; 298 return num; 299} 300 301 302static inline uint16_t 303unaligned_read16le(const uint8_t *buf) 304{ 305 uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8); 306 return num; 307} 308 309 310static inline uint32_t 311unaligned_read32be(const uint8_t *buf) 312{ 313 uint32_t num = (uint32_t)buf[0] << 24; 314 num |= (uint32_t)buf[1] << 16; 315 num |= (uint32_t)buf[2] << 8; 316 num |= (uint32_t)buf[3]; 317 return num; 318} 319 320 321static inline uint32_t 322unaligned_read32le(const uint8_t *buf) 323{ 324 uint32_t num = (uint32_t)buf[0]; 325 num |= (uint32_t)buf[1] << 8; 326 num |= (uint32_t)buf[2] << 16; 327 num |= (uint32_t)buf[3] << 24; 328 return num; 329} 330 331 332static inline void 333unaligned_write16be(uint8_t *buf, uint16_t num) 334{ 335 buf[0] = (uint8_t)(num >> 8); 336 buf[1] = (uint8_t)num; 337 return; 338} 339 340 341static inline void 342unaligned_write16le(uint8_t *buf, uint16_t num) 343{ 344 buf[0] = (uint8_t)num; 345 buf[1] = (uint8_t)(num >> 8); 346 return; 347} 348 349 350static inline void 351unaligned_write32be(uint8_t *buf, uint32_t num) 352{ 353 buf[0] = (uint8_t)(num >> 24); 354 buf[1] = (uint8_t)(num >> 16); 355 buf[2] = (uint8_t)(num >> 8); 356 buf[3] = (uint8_t)num; 357 return; 358} 359 360 361static inline void 362unaligned_write32le(uint8_t *buf, uint32_t num) 363{ 364 buf[0] = (uint8_t)num; 365 buf[1] = (uint8_t)(num >> 8); 366 buf[2] = (uint8_t)(num >> 16); 367 buf[3] = (uint8_t)(num >> 24); 368 return; 369} 370 371#endif 372 373 374static inline uint32_t 375bsr32(uint32_t n) 376{ 377 // Check for ICC first, since it tends to define __GNUC__ too. 378#if defined(__INTEL_COMPILER) 379 return _bit_scan_reverse(n); 380 381#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX 382 // GCC >= 3.4 has __builtin_clz(), which gives good results on 383 // multiple architectures. On x86, __builtin_clz() ^ 31U becomes 384 // either plain BSR (so the XOR gets optimized away) or LZCNT and 385 // XOR (if -march indicates that SSE4a instructions are supported). 386 return __builtin_clz(n) ^ 31U; 387 388#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 389 uint32_t i; 390 __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n)); 391 return i; 392 393#elif defined(_MSC_VER) && _MSC_VER >= 1400 394 // MSVC isn't supported by tuklib, but since this code exists, 395 // it doesn't hurt to have it here anyway. 396 uint32_t i; 397 _BitScanReverse((DWORD *)&i, n); 398 return i; 399 400#else 401 uint32_t i = 31; 402 403 if ((n & UINT32_C(0xFFFF0000)) == 0) { 404 n <<= 16; 405 i = 15; 406 } 407 408 if ((n & UINT32_C(0xFF000000)) == 0) { 409 n <<= 8; 410 i -= 8; 411 } 412 413 if ((n & UINT32_C(0xF0000000)) == 0) { 414 n <<= 4; 415 i -= 4; 416 } 417 418 if ((n & UINT32_C(0xC0000000)) == 0) { 419 n <<= 2; 420 i -= 2; 421 } 422 423 if ((n & UINT32_C(0x80000000)) == 0) 424 --i; 425 426 return i; 427#endif 428} 429 430 431static inline uint32_t 432clz32(uint32_t n) 433{ 434#if defined(__INTEL_COMPILER) 435 return _bit_scan_reverse(n) ^ 31U; 436 437#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX 438 return __builtin_clz(n); 439 440#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 441 uint32_t i; 442 __asm__("bsrl %1, %0\n\t" 443 "xorl $31, %0" 444 : "=r" (i) : "rm" (n)); 445 return i; 446 447#elif defined(_MSC_VER) && _MSC_VER >= 1400 448 uint32_t i; 449 _BitScanReverse((DWORD *)&i, n); 450 return i ^ 31U; 451 452#else 453 uint32_t i = 0; 454 455 if ((n & UINT32_C(0xFFFF0000)) == 0) { 456 n <<= 16; 457 i = 16; 458 } 459 460 if ((n & UINT32_C(0xFF000000)) == 0) { 461 n <<= 8; 462 i += 8; 463 } 464 465 if ((n & UINT32_C(0xF0000000)) == 0) { 466 n <<= 4; 467 i += 4; 468 } 469 470 if ((n & UINT32_C(0xC0000000)) == 0) { 471 n <<= 2; 472 i += 2; 473 } 474 475 if ((n & UINT32_C(0x80000000)) == 0) 476 ++i; 477 478 return i; 479#endif 480} 481 482 483static inline uint32_t 484ctz32(uint32_t n) 485{ 486#if defined(__INTEL_COMPILER) 487 return _bit_scan_forward(n); 488 489#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX 490 return __builtin_ctz(n); 491 492#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 493 uint32_t i; 494 __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n)); 495 return i; 496 497#elif defined(_MSC_VER) && _MSC_VER >= 1400 498 uint32_t i; 499 _BitScanForward((DWORD *)&i, n); 500 return i; 501 502#else 503 uint32_t i = 0; 504 505 if ((n & UINT32_C(0x0000FFFF)) == 0) { 506 n >>= 16; 507 i = 16; 508 } 509 510 if ((n & UINT32_C(0x000000FF)) == 0) { 511 n >>= 8; 512 i += 8; 513 } 514 515 if ((n & UINT32_C(0x0000000F)) == 0) { 516 n >>= 4; 517 i += 4; 518 } 519 520 if ((n & UINT32_C(0x00000003)) == 0) { 521 n >>= 2; 522 i += 2; 523 } 524 525 if ((n & UINT32_C(0x00000001)) == 0) 526 ++i; 527 528 return i; 529#endif 530} 531 532#define bsf32 ctz32 533 534#endif 535