1207753Smm/////////////////////////////////////////////////////////////////////////////// 2207753Smm// 3207753Smm/// \file tuklib_integer.h 4207753Smm/// \brief Various integer and bit operations 5207753Smm/// 6207753Smm/// This file provides macros or functions to do some basic integer and bit 7207753Smm/// operations. 8207753Smm/// 9207753Smm/// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l): 10207753Smm/// - Byte swapping: bswapXX(num) 11207753Smm/// - Byte order conversions to/from native: convXXYe(num) 12207753Smm/// - Aligned reads: readXXYe(ptr) 13207753Smm/// - Aligned writes: writeXXYe(ptr, num) 14207753Smm/// - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr) 15207753Smm/// - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num) 16207753Smm/// 17207753Smm/// Since they can macros, the arguments should have no side effects since 18207753Smm/// they may be evaluated more than once. 19207753Smm/// 20207753Smm/// \todo PowerPC and possibly some other architectures support 21207753Smm/// byte swapping load and store instructions. This file 22207753Smm/// doesn't take advantage of those instructions. 23207753Smm/// 24207753Smm/// Bit scan operations for non-zero 32-bit integers: 25207753Smm/// - Bit scan reverse (find highest non-zero bit): bsr32(num) 26207753Smm/// - Count leading zeros: clz32(num) 27207753Smm/// - Count trailing zeros: ctz32(num) 28207753Smm/// - Bit scan forward (simply an alias for ctz32()): bsf32(num) 29207753Smm/// 30207753Smm/// The above bit scan operations return 0-31. If num is zero, 31207753Smm/// the result is undefined. 32207753Smm// 33207753Smm// Authors: Lasse Collin 34207753Smm// Joachim Henke 35207753Smm// 36207753Smm// This file has been put into the public domain. 37207753Smm// You can do whatever you want with this file. 38207753Smm// 39207753Smm/////////////////////////////////////////////////////////////////////////////// 40207753Smm 41207753Smm#ifndef TUKLIB_INTEGER_H 42207753Smm#define TUKLIB_INTEGER_H 43207753Smm 44207753Smm#include "tuklib_common.h" 45207753Smm 46207753Smm 47207753Smm//////////////////////////////////////// 48207753Smm// Operating system specific features // 49207753Smm//////////////////////////////////////// 50207753Smm 51207753Smm#if defined(HAVE_BYTESWAP_H) 52207753Smm // glibc, uClibc, dietlibc 53207753Smm# include <byteswap.h> 54207753Smm# ifdef HAVE_BSWAP_16 55207753Smm# define bswap16(num) bswap_16(num) 56207753Smm# endif 57207753Smm# ifdef HAVE_BSWAP_32 58207753Smm# define bswap32(num) bswap_32(num) 59207753Smm# endif 60207753Smm# ifdef HAVE_BSWAP_64 61207753Smm# define bswap64(num) bswap_64(num) 62207753Smm# endif 63207753Smm 64207753Smm#elif defined(HAVE_SYS_ENDIAN_H) 65207753Smm // *BSDs and Darwin 66207753Smm# include <sys/endian.h> 67207753Smm 68207753Smm#elif defined(HAVE_SYS_BYTEORDER_H) 69207753Smm // Solaris 70207753Smm# include <sys/byteorder.h> 71207753Smm# ifdef BSWAP_16 72207753Smm# define bswap16(num) BSWAP_16(num) 73207753Smm# endif 74207753Smm# ifdef BSWAP_32 75207753Smm# define bswap32(num) BSWAP_32(num) 76207753Smm# endif 77207753Smm# ifdef BSWAP_64 78207753Smm# define bswap64(num) BSWAP_64(num) 79207753Smm# endif 80207753Smm# ifdef BE_16 81207753Smm# define conv16be(num) BE_16(num) 82207753Smm# endif 83207753Smm# ifdef BE_32 84207753Smm# define conv32be(num) BE_32(num) 85207753Smm# endif 86207753Smm# ifdef BE_64 87207753Smm# define conv64be(num) BE_64(num) 88207753Smm# endif 89207753Smm# ifdef LE_16 90207753Smm# define conv16le(num) LE_16(num) 91207753Smm# endif 92207753Smm# ifdef LE_32 93207753Smm# define conv32le(num) LE_32(num) 94207753Smm# endif 95207753Smm# ifdef LE_64 96207753Smm# define conv64le(num) LE_64(num) 97207753Smm# endif 98207753Smm#endif 99207753Smm 100207753Smm 101207753Smm/////////////////// 102207753Smm// Byte swapping // 103207753Smm/////////////////// 104207753Smm 105207753Smm#ifndef bswap16 106207753Smm# define bswap16(num) \ 107207753Smm (((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8)) 108207753Smm#endif 109207753Smm 110207753Smm#ifndef bswap32 111207753Smm# define bswap32(num) \ 112207753Smm ( (((uint32_t)(num) << 24) ) \ 113207753Smm | (((uint32_t)(num) << 8) & UINT32_C(0x00FF0000)) \ 114207753Smm | (((uint32_t)(num) >> 8) & UINT32_C(0x0000FF00)) \ 115207753Smm | (((uint32_t)(num) >> 24) ) ) 116207753Smm#endif 117207753Smm 118207753Smm#ifndef bswap64 119207753Smm# define bswap64(num) \ 120207753Smm ( (((uint64_t)(num) << 56) ) \ 121207753Smm | (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \ 122207753Smm | (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \ 123207753Smm | (((uint64_t)(num) << 8) & UINT64_C(0x000000FF00000000)) \ 124207753Smm | (((uint64_t)(num) >> 8) & UINT64_C(0x00000000FF000000)) \ 125207753Smm | (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \ 126207753Smm | (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \ 127207753Smm | (((uint64_t)(num) >> 56) ) ) 128207753Smm#endif 129207753Smm 130207753Smm// Define conversion macros using the basic byte swapping macros. 131207753Smm#ifdef WORDS_BIGENDIAN 132207753Smm# ifndef conv16be 133207753Smm# define conv16be(num) ((uint16_t)(num)) 134207753Smm# endif 135207753Smm# ifndef conv32be 136207753Smm# define conv32be(num) ((uint32_t)(num)) 137207753Smm# endif 138207753Smm# ifndef conv64be 139207753Smm# define conv64be(num) ((uint64_t)(num)) 140207753Smm# endif 141207753Smm# ifndef conv16le 142207753Smm# define conv16le(num) bswap16(num) 143207753Smm# endif 144207753Smm# ifndef conv32le 145207753Smm# define conv32le(num) bswap32(num) 146207753Smm# endif 147207753Smm# ifndef conv64le 148207753Smm# define conv64le(num) bswap64(num) 149207753Smm# endif 150207753Smm#else 151207753Smm# ifndef conv16be 152207753Smm# define conv16be(num) bswap16(num) 153207753Smm# endif 154207753Smm# ifndef conv32be 155207753Smm# define conv32be(num) bswap32(num) 156207753Smm# endif 157207753Smm# ifndef conv64be 158207753Smm# define conv64be(num) bswap64(num) 159207753Smm# endif 160207753Smm# ifndef conv16le 161207753Smm# define conv16le(num) ((uint16_t)(num)) 162207753Smm# endif 163207753Smm# ifndef conv32le 164207753Smm# define conv32le(num) ((uint32_t)(num)) 165207753Smm# endif 166207753Smm# ifndef conv64le 167207753Smm# define conv64le(num) ((uint64_t)(num)) 168207753Smm# endif 169207753Smm#endif 170207753Smm 171207753Smm 172207753Smm////////////////////////////// 173207753Smm// Aligned reads and writes // 174207753Smm////////////////////////////// 175207753Smm 176207753Smmstatic inline uint16_t 177207753Smmread16be(const uint8_t *buf) 178207753Smm{ 179207753Smm uint16_t num = *(const uint16_t *)buf; 180207753Smm return conv16be(num); 181207753Smm} 182207753Smm 183207753Smm 184207753Smmstatic inline uint16_t 185207753Smmread16le(const uint8_t *buf) 186207753Smm{ 187207753Smm uint16_t num = *(const uint16_t *)buf; 188207753Smm return conv16le(num); 189207753Smm} 190207753Smm 191207753Smm 192207753Smmstatic inline uint32_t 193207753Smmread32be(const uint8_t *buf) 194207753Smm{ 195207753Smm uint32_t num = *(const uint32_t *)buf; 196207753Smm return conv32be(num); 197207753Smm} 198207753Smm 199207753Smm 200207753Smmstatic inline uint32_t 201207753Smmread32le(const uint8_t *buf) 202207753Smm{ 203207753Smm uint32_t num = *(const uint32_t *)buf; 204207753Smm return conv32le(num); 205207753Smm} 206207753Smm 207207753Smm 208207753Smmstatic inline uint64_t 209207753Smmread64be(const uint8_t *buf) 210207753Smm{ 211207753Smm uint64_t num = *(const uint64_t *)buf; 212207753Smm return conv64be(num); 213207753Smm} 214207753Smm 215207753Smm 216207753Smmstatic inline uint64_t 217207753Smmread64le(const uint8_t *buf) 218207753Smm{ 219207753Smm uint64_t num = *(const uint64_t *)buf; 220207753Smm return conv64le(num); 221207753Smm} 222207753Smm 223207753Smm 224207753Smm// NOTE: Possible byte swapping must be done in a macro to allow GCC 225207753Smm// to optimize byte swapping of constants when using glibc's or *BSD's 226207753Smm// byte swapping macros. The actual write is done in an inline function 227207753Smm// to make type checking of the buf pointer possible similarly to readXXYe() 228207753Smm// functions. 229207753Smm 230207753Smm#define write16be(buf, num) write16ne((buf), conv16be(num)) 231207753Smm#define write16le(buf, num) write16ne((buf), conv16le(num)) 232207753Smm#define write32be(buf, num) write32ne((buf), conv32be(num)) 233207753Smm#define write32le(buf, num) write32ne((buf), conv32le(num)) 234207753Smm#define write64be(buf, num) write64ne((buf), conv64be(num)) 235207753Smm#define write64le(buf, num) write64ne((buf), conv64le(num)) 236207753Smm 237207753Smm 238207753Smmstatic inline void 239207753Smmwrite16ne(uint8_t *buf, uint16_t num) 240207753Smm{ 241207753Smm *(uint16_t *)buf = num; 242207753Smm return; 243207753Smm} 244207753Smm 245207753Smm 246207753Smmstatic inline void 247207753Smmwrite32ne(uint8_t *buf, uint32_t num) 248207753Smm{ 249207753Smm *(uint32_t *)buf = num; 250207753Smm return; 251207753Smm} 252207753Smm 253207753Smm 254207753Smmstatic inline void 255207753Smmwrite64ne(uint8_t *buf, uint64_t num) 256207753Smm{ 257207753Smm *(uint64_t *)buf = num; 258207753Smm return; 259207753Smm} 260207753Smm 261207753Smm 262207753Smm//////////////////////////////// 263207753Smm// Unaligned reads and writes // 264207753Smm//////////////////////////////// 265207753Smm 266207753Smm// NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and 267207753Smm// 32-bit unaligned integer loads and stores. It's possible that 64-bit 268207753Smm// unaligned access doesn't work or is slower than byte-by-byte access. 269207753Smm// Since unaligned 64-bit is probably not needed as often as 16-bit or 270207753Smm// 32-bit, we simply don't support 64-bit unaligned access for now. 271207753Smm#ifdef TUKLIB_FAST_UNALIGNED_ACCESS 272207753Smm# define unaligned_read16be read16be 273207753Smm# define unaligned_read16le read16le 274207753Smm# define unaligned_read32be read32be 275207753Smm# define unaligned_read32le read32le 276207753Smm# define unaligned_write16be write16be 277207753Smm# define unaligned_write16le write16le 278207753Smm# define unaligned_write32be write32be 279207753Smm# define unaligned_write32le write32le 280207753Smm 281207753Smm#else 282207753Smm 283207753Smmstatic inline uint16_t 284207753Smmunaligned_read16be(const uint8_t *buf) 285207753Smm{ 286207753Smm uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1]; 287207753Smm return num; 288207753Smm} 289207753Smm 290207753Smm 291207753Smmstatic inline uint16_t 292207753Smmunaligned_read16le(const uint8_t *buf) 293207753Smm{ 294207753Smm uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8); 295207753Smm return num; 296207753Smm} 297207753Smm 298207753Smm 299207753Smmstatic inline uint32_t 300207753Smmunaligned_read32be(const uint8_t *buf) 301207753Smm{ 302207753Smm uint32_t num = (uint32_t)buf[0] << 24; 303207753Smm num |= (uint32_t)buf[1] << 16; 304207753Smm num |= (uint32_t)buf[2] << 8; 305207753Smm num |= (uint32_t)buf[3]; 306207753Smm return num; 307207753Smm} 308207753Smm 309207753Smm 310207753Smmstatic inline uint32_t 311207753Smmunaligned_read32le(const uint8_t *buf) 312207753Smm{ 313207753Smm uint32_t num = (uint32_t)buf[0]; 314207753Smm num |= (uint32_t)buf[1] << 8; 315207753Smm num |= (uint32_t)buf[2] << 16; 316207753Smm num |= (uint32_t)buf[3] << 24; 317207753Smm return num; 318207753Smm} 319207753Smm 320207753Smm 321207753Smmstatic inline void 322207753Smmunaligned_write16be(uint8_t *buf, uint16_t num) 323207753Smm{ 324292588Sdelphij buf[0] = (uint8_t)(num >> 8); 325292588Sdelphij buf[1] = (uint8_t)num; 326207753Smm return; 327207753Smm} 328207753Smm 329207753Smm 330207753Smmstatic inline void 331207753Smmunaligned_write16le(uint8_t *buf, uint16_t num) 332207753Smm{ 333292588Sdelphij buf[0] = (uint8_t)num; 334292588Sdelphij buf[1] = (uint8_t)(num >> 8); 335207753Smm return; 336207753Smm} 337207753Smm 338207753Smm 339207753Smmstatic inline void 340207753Smmunaligned_write32be(uint8_t *buf, uint32_t num) 341207753Smm{ 342292588Sdelphij buf[0] = (uint8_t)(num >> 24); 343292588Sdelphij buf[1] = (uint8_t)(num >> 16); 344292588Sdelphij buf[2] = (uint8_t)(num >> 8); 345292588Sdelphij buf[3] = (uint8_t)num; 346207753Smm return; 347207753Smm} 348207753Smm 349207753Smm 350207753Smmstatic inline void 351207753Smmunaligned_write32le(uint8_t *buf, uint32_t num) 352207753Smm{ 353292588Sdelphij buf[0] = (uint8_t)num; 354292588Sdelphij buf[1] = (uint8_t)(num >> 8); 355292588Sdelphij buf[2] = (uint8_t)(num >> 16); 356292588Sdelphij buf[3] = (uint8_t)(num >> 24); 357207753Smm return; 358207753Smm} 359207753Smm 360207753Smm#endif 361207753Smm 362207753Smm 363207753Smmstatic inline uint32_t 364207753Smmbsr32(uint32_t n) 365207753Smm{ 366207753Smm // Check for ICC first, since it tends to define __GNUC__ too. 367207753Smm#if defined(__INTEL_COMPILER) 368207753Smm return _bit_scan_reverse(n); 369207753Smm 370207753Smm#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX 371207753Smm // GCC >= 3.4 has __builtin_clz(), which gives good results on 372207753Smm // multiple architectures. On x86, __builtin_clz() ^ 31U becomes 373207753Smm // either plain BSR (so the XOR gets optimized away) or LZCNT and 374207753Smm // XOR (if -march indicates that SSE4a instructions are supported). 375207753Smm return __builtin_clz(n) ^ 31U; 376207753Smm 377207753Smm#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 378207753Smm uint32_t i; 379207753Smm __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n)); 380207753Smm return i; 381207753Smm 382207753Smm#elif defined(_MSC_VER) && _MSC_VER >= 1400 383207753Smm // MSVC isn't supported by tuklib, but since this code exists, 384207753Smm // it doesn't hurt to have it here anyway. 385207753Smm uint32_t i; 386207753Smm _BitScanReverse((DWORD *)&i, n); 387207753Smm return i; 388207753Smm 389207753Smm#else 390207753Smm uint32_t i = 31; 391207753Smm 392207753Smm if ((n & UINT32_C(0xFFFF0000)) == 0) { 393207753Smm n <<= 16; 394207753Smm i = 15; 395207753Smm } 396207753Smm 397207753Smm if ((n & UINT32_C(0xFF000000)) == 0) { 398207753Smm n <<= 8; 399207753Smm i -= 8; 400207753Smm } 401207753Smm 402207753Smm if ((n & UINT32_C(0xF0000000)) == 0) { 403207753Smm n <<= 4; 404207753Smm i -= 4; 405207753Smm } 406207753Smm 407207753Smm if ((n & UINT32_C(0xC0000000)) == 0) { 408207753Smm n <<= 2; 409207753Smm i -= 2; 410207753Smm } 411207753Smm 412207753Smm if ((n & UINT32_C(0x80000000)) == 0) 413207753Smm --i; 414207753Smm 415207753Smm return i; 416207753Smm#endif 417207753Smm} 418207753Smm 419207753Smm 420207753Smmstatic inline uint32_t 421207753Smmclz32(uint32_t n) 422207753Smm{ 423207753Smm#if defined(__INTEL_COMPILER) 424207753Smm return _bit_scan_reverse(n) ^ 31U; 425207753Smm 426207753Smm#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX 427207753Smm return __builtin_clz(n); 428207753Smm 429207753Smm#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 430207753Smm uint32_t i; 431207753Smm __asm__("bsrl %1, %0\n\t" 432207753Smm "xorl $31, %0" 433207753Smm : "=r" (i) : "rm" (n)); 434207753Smm return i; 435207753Smm 436207753Smm#elif defined(_MSC_VER) && _MSC_VER >= 1400 437207753Smm uint32_t i; 438207753Smm _BitScanReverse((DWORD *)&i, n); 439207753Smm return i ^ 31U; 440207753Smm 441207753Smm#else 442207753Smm uint32_t i = 0; 443207753Smm 444207753Smm if ((n & UINT32_C(0xFFFF0000)) == 0) { 445207753Smm n <<= 16; 446207753Smm i = 16; 447207753Smm } 448207753Smm 449207753Smm if ((n & UINT32_C(0xFF000000)) == 0) { 450207753Smm n <<= 8; 451207753Smm i += 8; 452207753Smm } 453207753Smm 454207753Smm if ((n & UINT32_C(0xF0000000)) == 0) { 455207753Smm n <<= 4; 456207753Smm i += 4; 457207753Smm } 458207753Smm 459207753Smm if ((n & UINT32_C(0xC0000000)) == 0) { 460207753Smm n <<= 2; 461207753Smm i += 2; 462207753Smm } 463207753Smm 464207753Smm if ((n & UINT32_C(0x80000000)) == 0) 465207753Smm ++i; 466207753Smm 467207753Smm return i; 468207753Smm#endif 469207753Smm} 470207753Smm 471207753Smm 472207753Smmstatic inline uint32_t 473207753Smmctz32(uint32_t n) 474207753Smm{ 475207753Smm#if defined(__INTEL_COMPILER) 476207753Smm return _bit_scan_forward(n); 477207753Smm 478207753Smm#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX 479207753Smm return __builtin_ctz(n); 480207753Smm 481207753Smm#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 482207753Smm uint32_t i; 483207753Smm __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n)); 484207753Smm return i; 485207753Smm 486207753Smm#elif defined(_MSC_VER) && _MSC_VER >= 1400 487207753Smm uint32_t i; 488207753Smm _BitScanForward((DWORD *)&i, n); 489207753Smm return i; 490207753Smm 491207753Smm#else 492207753Smm uint32_t i = 0; 493207753Smm 494207753Smm if ((n & UINT32_C(0x0000FFFF)) == 0) { 495207753Smm n >>= 16; 496207753Smm i = 16; 497207753Smm } 498207753Smm 499207753Smm if ((n & UINT32_C(0x000000FF)) == 0) { 500207753Smm n >>= 8; 501207753Smm i += 8; 502207753Smm } 503207753Smm 504207753Smm if ((n & UINT32_C(0x0000000F)) == 0) { 505207753Smm n >>= 4; 506207753Smm i += 4; 507207753Smm } 508207753Smm 509207753Smm if ((n & UINT32_C(0x00000003)) == 0) { 510207753Smm n >>= 2; 511207753Smm i += 2; 512207753Smm } 513207753Smm 514207753Smm if ((n & UINT32_C(0x00000001)) == 0) 515207753Smm ++i; 516207753Smm 517207753Smm return i; 518207753Smm#endif 519207753Smm} 520207753Smm 521207753Smm#define bsf32 ctz32 522207753Smm 523207753Smm#endif 524