1207753Smm/////////////////////////////////////////////////////////////////////////////// 2207753Smm// 3207753Smm/// \file tuklib_integer.h 4207753Smm/// \brief Various integer and bit operations 5207753Smm/// 6207753Smm/// This file provides macros or functions to do some basic integer and bit 7207753Smm/// operations. 8207753Smm/// 9360523Sdelphij/// Native endian inline functions (XX = 16, 32, or 64): 10360523Sdelphij/// - Unaligned native endian reads: readXXne(ptr) 11360523Sdelphij/// - Unaligned native endian writes: writeXXne(ptr, num) 12360523Sdelphij/// - Aligned native endian reads: aligned_readXXne(ptr) 13360523Sdelphij/// - Aligned native endian writes: aligned_writeXXne(ptr, num) 14360523Sdelphij/// 15360523Sdelphij/// Endianness-converting integer operations (these can be macros!) 16360523Sdelphij/// (XX = 16, 32, or 64; Y = b or l): 17207753Smm/// - Byte swapping: bswapXX(num) 18360523Sdelphij/// - Byte order conversions to/from native (byteswaps if Y isn't 19360523Sdelphij/// the native endianness): convXXYe(num) 20360523Sdelphij/// - Unaligned reads (16/32-bit only): readXXYe(ptr) 21360523Sdelphij/// - Unaligned writes (16/32-bit only): writeXXYe(ptr, num) 22360523Sdelphij/// - Aligned reads: aligned_readXXYe(ptr) 23360523Sdelphij/// - Aligned writes: aligned_writeXXYe(ptr, num) 24207753Smm/// 25360523Sdelphij/// Since the above can macros, the arguments should have no side effects 26360523Sdelphij/// because they may be evaluated more than once. 27207753Smm/// 28360523Sdelphij/// Bit scan operations for non-zero 32-bit integers (inline functions): 29207753Smm/// - Bit scan reverse (find highest non-zero bit): bsr32(num) 30207753Smm/// - Count leading zeros: clz32(num) 31207753Smm/// - Count trailing zeros: ctz32(num) 32207753Smm/// - Bit scan forward (simply an alias for ctz32()): bsf32(num) 33207753Smm/// 34207753Smm/// The above bit scan operations return 0-31. If num is zero, 35207753Smm/// the result is undefined. 36207753Smm// 37207753Smm// Authors: Lasse Collin 38207753Smm// Joachim Henke 39207753Smm// 40207753Smm// This file has been put into the public domain. 41207753Smm// You can do whatever you want with this file. 42207753Smm// 43207753Smm/////////////////////////////////////////////////////////////////////////////// 44207753Smm 45207753Smm#ifndef TUKLIB_INTEGER_H 46207753Smm#define TUKLIB_INTEGER_H 47207753Smm 48207753Smm#include "tuklib_common.h" 49360523Sdelphij#include <string.h> 50207753Smm 51360523Sdelphij// Newer Intel C compilers require immintrin.h for _bit_scan_reverse() 52360523Sdelphij// and such functions. 53360523Sdelphij#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500) 54360523Sdelphij# include <immintrin.h> 55360523Sdelphij#endif 56207753Smm 57207753Smm 58360523Sdelphij/////////////////// 59360523Sdelphij// Byte swapping // 60360523Sdelphij/////////////////// 61360523Sdelphij 62360523Sdelphij#if defined(HAVE___BUILTIN_BSWAPXX) 63360523Sdelphij // GCC >= 4.8 and Clang 64360523Sdelphij# define bswap16(n) __builtin_bswap16(n) 65360523Sdelphij# define bswap32(n) __builtin_bswap32(n) 66360523Sdelphij# define bswap64(n) __builtin_bswap64(n) 67360523Sdelphij 68360523Sdelphij#elif defined(HAVE_BYTESWAP_H) 69207753Smm // glibc, uClibc, dietlibc 70207753Smm# include <byteswap.h> 71207753Smm# ifdef HAVE_BSWAP_16 72207753Smm# define bswap16(num) bswap_16(num) 73207753Smm# endif 74207753Smm# ifdef HAVE_BSWAP_32 75207753Smm# define bswap32(num) bswap_32(num) 76207753Smm# endif 77207753Smm# ifdef HAVE_BSWAP_64 78207753Smm# define bswap64(num) bswap_64(num) 79207753Smm# endif 80207753Smm 81207753Smm#elif defined(HAVE_SYS_ENDIAN_H) 82207753Smm // *BSDs and Darwin 83207753Smm# include <sys/endian.h> 84207753Smm 85207753Smm#elif defined(HAVE_SYS_BYTEORDER_H) 86207753Smm // Solaris 87207753Smm# include <sys/byteorder.h> 88207753Smm# ifdef BSWAP_16 89207753Smm# define bswap16(num) BSWAP_16(num) 90207753Smm# endif 91207753Smm# ifdef BSWAP_32 92207753Smm# define bswap32(num) BSWAP_32(num) 93207753Smm# endif 94207753Smm# ifdef BSWAP_64 95207753Smm# define bswap64(num) BSWAP_64(num) 96207753Smm# endif 97207753Smm# ifdef BE_16 98207753Smm# define conv16be(num) BE_16(num) 99207753Smm# endif 100207753Smm# ifdef BE_32 101207753Smm# define conv32be(num) BE_32(num) 102207753Smm# endif 103207753Smm# ifdef BE_64 104207753Smm# define conv64be(num) BE_64(num) 105207753Smm# endif 106207753Smm# ifdef LE_16 107207753Smm# define conv16le(num) LE_16(num) 108207753Smm# endif 109207753Smm# ifdef LE_32 110207753Smm# define conv32le(num) LE_32(num) 111207753Smm# endif 112207753Smm# ifdef LE_64 113207753Smm# define conv64le(num) LE_64(num) 114207753Smm# endif 115207753Smm#endif 116207753Smm 117207753Smm#ifndef bswap16 118360523Sdelphij# define bswap16(n) (uint16_t)( \ 119360523Sdelphij (((n) & 0x00FFU) << 8) \ 120360523Sdelphij | (((n) & 0xFF00U) >> 8) \ 121360523Sdelphij ) 122207753Smm#endif 123207753Smm 124207753Smm#ifndef bswap32 125360523Sdelphij# define bswap32(n) (uint32_t)( \ 126360523Sdelphij (((n) & UINT32_C(0x000000FF)) << 24) \ 127360523Sdelphij | (((n) & UINT32_C(0x0000FF00)) << 8) \ 128360523Sdelphij | (((n) & UINT32_C(0x00FF0000)) >> 8) \ 129360523Sdelphij | (((n) & UINT32_C(0xFF000000)) >> 24) \ 130360523Sdelphij ) 131207753Smm#endif 132207753Smm 133207753Smm#ifndef bswap64 134360523Sdelphij# define bswap64(n) (uint64_t)( \ 135360523Sdelphij (((n) & UINT64_C(0x00000000000000FF)) << 56) \ 136360523Sdelphij | (((n) & UINT64_C(0x000000000000FF00)) << 40) \ 137360523Sdelphij | (((n) & UINT64_C(0x0000000000FF0000)) << 24) \ 138360523Sdelphij | (((n) & UINT64_C(0x00000000FF000000)) << 8) \ 139360523Sdelphij | (((n) & UINT64_C(0x000000FF00000000)) >> 8) \ 140360523Sdelphij | (((n) & UINT64_C(0x0000FF0000000000)) >> 24) \ 141360523Sdelphij | (((n) & UINT64_C(0x00FF000000000000)) >> 40) \ 142360523Sdelphij | (((n) & UINT64_C(0xFF00000000000000)) >> 56) \ 143360523Sdelphij ) 144207753Smm#endif 145207753Smm 146207753Smm// Define conversion macros using the basic byte swapping macros. 147207753Smm#ifdef WORDS_BIGENDIAN 148207753Smm# ifndef conv16be 149207753Smm# define conv16be(num) ((uint16_t)(num)) 150207753Smm# endif 151207753Smm# ifndef conv32be 152207753Smm# define conv32be(num) ((uint32_t)(num)) 153207753Smm# endif 154207753Smm# ifndef conv64be 155207753Smm# define conv64be(num) ((uint64_t)(num)) 156207753Smm# endif 157207753Smm# ifndef conv16le 158207753Smm# define conv16le(num) bswap16(num) 159207753Smm# endif 160207753Smm# ifndef conv32le 161207753Smm# define conv32le(num) bswap32(num) 162207753Smm# endif 163207753Smm# ifndef conv64le 164207753Smm# define conv64le(num) bswap64(num) 165207753Smm# endif 166207753Smm#else 167207753Smm# ifndef conv16be 168207753Smm# define conv16be(num) bswap16(num) 169207753Smm# endif 170207753Smm# ifndef conv32be 171207753Smm# define conv32be(num) bswap32(num) 172207753Smm# endif 173207753Smm# ifndef conv64be 174207753Smm# define conv64be(num) bswap64(num) 175207753Smm# endif 176207753Smm# ifndef conv16le 177207753Smm# define conv16le(num) ((uint16_t)(num)) 178207753Smm# endif 179207753Smm# ifndef conv32le 180207753Smm# define conv32le(num) ((uint32_t)(num)) 181207753Smm# endif 182207753Smm# ifndef conv64le 183207753Smm# define conv64le(num) ((uint64_t)(num)) 184207753Smm# endif 185207753Smm#endif 186207753Smm 187207753Smm 188360523Sdelphij//////////////////////////////// 189360523Sdelphij// Unaligned reads and writes // 190360523Sdelphij//////////////////////////////// 191207753Smm 192360523Sdelphij// The traditional way of casting e.g. *(const uint16_t *)uint8_pointer 193360523Sdelphij// is bad even if the uint8_pointer is properly aligned because this kind 194360523Sdelphij// of casts break strict aliasing rules and result in undefined behavior. 195360523Sdelphij// With unaligned pointers it's even worse: compilers may emit vector 196360523Sdelphij// instructions that require aligned pointers even if non-vector 197360523Sdelphij// instructions work with unaligned pointers. 198360523Sdelphij// 199360523Sdelphij// Using memcpy() is the standard compliant way to do unaligned access. 200360523Sdelphij// Many modern compilers inline it so there is no function call overhead. 201360523Sdelphij// For those compilers that don't handle the memcpy() method well, the 202360523Sdelphij// old casting method (that violates strict aliasing) can be requested at 203360523Sdelphij// build time. A third method, casting to a packed struct, would also be 204360523Sdelphij// an option but isn't provided to keep things simpler (it's already a mess). 205360523Sdelphij// Hopefully this is flexible enough in practice. 206207753Smm 207207753Smmstatic inline uint16_t 208360523Sdelphijread16ne(const uint8_t *buf) 209207753Smm{ 210360523Sdelphij#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 211360523Sdelphij && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 212360523Sdelphij return *(const uint16_t *)buf; 213360523Sdelphij#else 214360523Sdelphij uint16_t num; 215360523Sdelphij memcpy(&num, buf, sizeof(num)); 216360523Sdelphij return num; 217360523Sdelphij#endif 218207753Smm} 219207753Smm 220207753Smm 221207753Smmstatic inline uint32_t 222360523Sdelphijread32ne(const uint8_t *buf) 223207753Smm{ 224360523Sdelphij#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 225360523Sdelphij && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 226360523Sdelphij return *(const uint32_t *)buf; 227360523Sdelphij#else 228360523Sdelphij uint32_t num; 229360523Sdelphij memcpy(&num, buf, sizeof(num)); 230360523Sdelphij return num; 231360523Sdelphij#endif 232207753Smm} 233207753Smm 234207753Smm 235207753Smmstatic inline uint64_t 236360523Sdelphijread64ne(const uint8_t *buf) 237207753Smm{ 238360523Sdelphij#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 239360523Sdelphij && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 240360523Sdelphij return *(const uint64_t *)buf; 241360523Sdelphij#else 242360523Sdelphij uint64_t num; 243360523Sdelphij memcpy(&num, buf, sizeof(num)); 244360523Sdelphij return num; 245360523Sdelphij#endif 246207753Smm} 247207753Smm 248207753Smm 249207753Smmstatic inline void 250207753Smmwrite16ne(uint8_t *buf, uint16_t num) 251207753Smm{ 252360523Sdelphij#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 253360523Sdelphij && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 254207753Smm *(uint16_t *)buf = num; 255360523Sdelphij#else 256360523Sdelphij memcpy(buf, &num, sizeof(num)); 257360523Sdelphij#endif 258207753Smm return; 259207753Smm} 260207753Smm 261207753Smm 262207753Smmstatic inline void 263207753Smmwrite32ne(uint8_t *buf, uint32_t num) 264207753Smm{ 265360523Sdelphij#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 266360523Sdelphij && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 267207753Smm *(uint32_t *)buf = num; 268360523Sdelphij#else 269360523Sdelphij memcpy(buf, &num, sizeof(num)); 270360523Sdelphij#endif 271207753Smm return; 272207753Smm} 273207753Smm 274207753Smm 275207753Smmstatic inline void 276207753Smmwrite64ne(uint8_t *buf, uint64_t num) 277207753Smm{ 278360523Sdelphij#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 279360523Sdelphij && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 280207753Smm *(uint64_t *)buf = num; 281360523Sdelphij#else 282360523Sdelphij memcpy(buf, &num, sizeof(num)); 283360523Sdelphij#endif 284207753Smm return; 285207753Smm} 286207753Smm 287207753Smm 288207753Smmstatic inline uint16_t 289360523Sdelphijread16be(const uint8_t *buf) 290207753Smm{ 291360523Sdelphij#if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 292360523Sdelphij uint16_t num = read16ne(buf); 293360523Sdelphij return conv16be(num); 294360523Sdelphij#else 295207753Smm uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1]; 296207753Smm return num; 297360523Sdelphij#endif 298207753Smm} 299207753Smm 300207753Smm 301207753Smmstatic inline uint16_t 302360523Sdelphijread16le(const uint8_t *buf) 303207753Smm{ 304360523Sdelphij#if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 305360523Sdelphij uint16_t num = read16ne(buf); 306360523Sdelphij return conv16le(num); 307360523Sdelphij#else 308207753Smm uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8); 309207753Smm return num; 310360523Sdelphij#endif 311207753Smm} 312207753Smm 313207753Smm 314207753Smmstatic inline uint32_t 315360523Sdelphijread32be(const uint8_t *buf) 316207753Smm{ 317360523Sdelphij#if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 318360523Sdelphij uint32_t num = read32ne(buf); 319360523Sdelphij return conv32be(num); 320360523Sdelphij#else 321207753Smm uint32_t num = (uint32_t)buf[0] << 24; 322207753Smm num |= (uint32_t)buf[1] << 16; 323207753Smm num |= (uint32_t)buf[2] << 8; 324207753Smm num |= (uint32_t)buf[3]; 325207753Smm return num; 326360523Sdelphij#endif 327207753Smm} 328207753Smm 329207753Smm 330207753Smmstatic inline uint32_t 331360523Sdelphijread32le(const uint8_t *buf) 332207753Smm{ 333360523Sdelphij#if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 334360523Sdelphij uint32_t num = read32ne(buf); 335360523Sdelphij return conv32le(num); 336360523Sdelphij#else 337207753Smm uint32_t num = (uint32_t)buf[0]; 338207753Smm num |= (uint32_t)buf[1] << 8; 339207753Smm num |= (uint32_t)buf[2] << 16; 340207753Smm num |= (uint32_t)buf[3] << 24; 341207753Smm return num; 342360523Sdelphij#endif 343207753Smm} 344207753Smm 345207753Smm 346360523Sdelphij// NOTE: Possible byte swapping must be done in a macro to allow the compiler 347360523Sdelphij// to optimize byte swapping of constants when using glibc's or *BSD's 348360523Sdelphij// byte swapping macros. The actual write is done in an inline function 349360523Sdelphij// to make type checking of the buf pointer possible. 350360523Sdelphij#if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 351360523Sdelphij# define write16be(buf, num) write16ne(buf, conv16be(num)) 352360523Sdelphij# define write32be(buf, num) write32ne(buf, conv32be(num)) 353360523Sdelphij#endif 354360523Sdelphij 355360523Sdelphij#if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 356360523Sdelphij# define write16le(buf, num) write16ne(buf, conv16le(num)) 357360523Sdelphij# define write32le(buf, num) write32ne(buf, conv32le(num)) 358360523Sdelphij#endif 359360523Sdelphij 360360523Sdelphij 361360523Sdelphij#ifndef write16be 362207753Smmstatic inline void 363360523Sdelphijwrite16be(uint8_t *buf, uint16_t num) 364207753Smm{ 365281316Sdelphij buf[0] = (uint8_t)(num >> 8); 366281316Sdelphij buf[1] = (uint8_t)num; 367207753Smm return; 368207753Smm} 369360523Sdelphij#endif 370207753Smm 371207753Smm 372360523Sdelphij#ifndef write16le 373207753Smmstatic inline void 374360523Sdelphijwrite16le(uint8_t *buf, uint16_t num) 375207753Smm{ 376281316Sdelphij buf[0] = (uint8_t)num; 377281316Sdelphij buf[1] = (uint8_t)(num >> 8); 378207753Smm return; 379207753Smm} 380360523Sdelphij#endif 381207753Smm 382207753Smm 383360523Sdelphij#ifndef write32be 384207753Smmstatic inline void 385360523Sdelphijwrite32be(uint8_t *buf, uint32_t num) 386207753Smm{ 387281316Sdelphij buf[0] = (uint8_t)(num >> 24); 388281316Sdelphij buf[1] = (uint8_t)(num >> 16); 389281316Sdelphij buf[2] = (uint8_t)(num >> 8); 390281316Sdelphij buf[3] = (uint8_t)num; 391207753Smm return; 392207753Smm} 393360523Sdelphij#endif 394207753Smm 395207753Smm 396360523Sdelphij#ifndef write32le 397207753Smmstatic inline void 398360523Sdelphijwrite32le(uint8_t *buf, uint32_t num) 399207753Smm{ 400281316Sdelphij buf[0] = (uint8_t)num; 401281316Sdelphij buf[1] = (uint8_t)(num >> 8); 402281316Sdelphij buf[2] = (uint8_t)(num >> 16); 403281316Sdelphij buf[3] = (uint8_t)(num >> 24); 404207753Smm return; 405207753Smm} 406360523Sdelphij#endif 407207753Smm 408360523Sdelphij 409360523Sdelphij////////////////////////////// 410360523Sdelphij// Aligned reads and writes // 411360523Sdelphij////////////////////////////// 412360523Sdelphij 413360523Sdelphij// Separate functions for aligned reads and writes are provided since on 414360523Sdelphij// strict-align archs aligned access is much faster than unaligned access. 415360523Sdelphij// 416360523Sdelphij// Just like in the unaligned case, memcpy() is needed to avoid 417360523Sdelphij// strict aliasing violations. However, on archs that don't support 418360523Sdelphij// unaligned access the compiler cannot know that the pointers given 419360523Sdelphij// to memcpy() are aligned which results in slow code. As of C11 there is 420360523Sdelphij// no standard way to tell the compiler that we know that the address is 421360523Sdelphij// aligned but some compilers have language extensions to do that. With 422360523Sdelphij// such language extensions the memcpy() method gives excellent results. 423360523Sdelphij// 424360523Sdelphij// What to do on a strict-align system when no known language extentensions 425360523Sdelphij// are available? Falling back to byte-by-byte access would be safe but ruin 426360523Sdelphij// optimizations that have been made specifically with aligned access in mind. 427360523Sdelphij// As a compromise, aligned reads will fall back to non-compliant type punning 428360523Sdelphij// but aligned writes will be byte-by-byte, that is, fast reads are preferred 429360523Sdelphij// over fast writes. This obviously isn't great but hopefully it's a working 430360523Sdelphij// compromise for now. 431360523Sdelphij// 432360523Sdelphij// __builtin_assume_aligned is support by GCC >= 4.7 and clang >= 3.6. 433360523Sdelphij#ifdef HAVE___BUILTIN_ASSUME_ALIGNED 434360523Sdelphij# define tuklib_memcpy_aligned(dest, src, size) \ 435360523Sdelphij memcpy(dest, __builtin_assume_aligned(src, size), size) 436360523Sdelphij#else 437360523Sdelphij# define tuklib_memcpy_aligned(dest, src, size) \ 438360523Sdelphij memcpy(dest, src, size) 439360523Sdelphij# ifndef TUKLIB_FAST_UNALIGNED_ACCESS 440360523Sdelphij# define TUKLIB_USE_UNSAFE_ALIGNED_READS 1 441360523Sdelphij# endif 442207753Smm#endif 443207753Smm 444207753Smm 445360523Sdelphijstatic inline uint16_t 446360523Sdelphijaligned_read16ne(const uint8_t *buf) 447360523Sdelphij{ 448360523Sdelphij#if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ 449360523Sdelphij || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) 450360523Sdelphij return *(const uint16_t *)buf; 451360523Sdelphij#else 452360523Sdelphij uint16_t num; 453360523Sdelphij tuklib_memcpy_aligned(&num, buf, sizeof(num)); 454360523Sdelphij return num; 455360523Sdelphij#endif 456360523Sdelphij} 457360523Sdelphij 458360523Sdelphij 459207753Smmstatic inline uint32_t 460360523Sdelphijaligned_read32ne(const uint8_t *buf) 461360523Sdelphij{ 462360523Sdelphij#if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ 463360523Sdelphij || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) 464360523Sdelphij return *(const uint32_t *)buf; 465360523Sdelphij#else 466360523Sdelphij uint32_t num; 467360523Sdelphij tuklib_memcpy_aligned(&num, buf, sizeof(num)); 468360523Sdelphij return num; 469360523Sdelphij#endif 470360523Sdelphij} 471360523Sdelphij 472360523Sdelphij 473360523Sdelphijstatic inline uint64_t 474360523Sdelphijaligned_read64ne(const uint8_t *buf) 475360523Sdelphij{ 476360523Sdelphij#if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ 477360523Sdelphij || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) 478360523Sdelphij return *(const uint64_t *)buf; 479360523Sdelphij#else 480360523Sdelphij uint64_t num; 481360523Sdelphij tuklib_memcpy_aligned(&num, buf, sizeof(num)); 482360523Sdelphij return num; 483360523Sdelphij#endif 484360523Sdelphij} 485360523Sdelphij 486360523Sdelphij 487360523Sdelphijstatic inline void 488360523Sdelphijaligned_write16ne(uint8_t *buf, uint16_t num) 489360523Sdelphij{ 490360523Sdelphij#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING 491360523Sdelphij *(uint16_t *)buf = num; 492360523Sdelphij#else 493360523Sdelphij tuklib_memcpy_aligned(buf, &num, sizeof(num)); 494360523Sdelphij#endif 495360523Sdelphij return; 496360523Sdelphij} 497360523Sdelphij 498360523Sdelphij 499360523Sdelphijstatic inline void 500360523Sdelphijaligned_write32ne(uint8_t *buf, uint32_t num) 501360523Sdelphij{ 502360523Sdelphij#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING 503360523Sdelphij *(uint32_t *)buf = num; 504360523Sdelphij#else 505360523Sdelphij tuklib_memcpy_aligned(buf, &num, sizeof(num)); 506360523Sdelphij#endif 507360523Sdelphij return; 508360523Sdelphij} 509360523Sdelphij 510360523Sdelphij 511360523Sdelphijstatic inline void 512360523Sdelphijaligned_write64ne(uint8_t *buf, uint64_t num) 513360523Sdelphij{ 514360523Sdelphij#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING 515360523Sdelphij *(uint64_t *)buf = num; 516360523Sdelphij#else 517360523Sdelphij tuklib_memcpy_aligned(buf, &num, sizeof(num)); 518360523Sdelphij#endif 519360523Sdelphij return; 520360523Sdelphij} 521360523Sdelphij 522360523Sdelphij 523360523Sdelphijstatic inline uint16_t 524360523Sdelphijaligned_read16be(const uint8_t *buf) 525360523Sdelphij{ 526360523Sdelphij uint16_t num = aligned_read16ne(buf); 527360523Sdelphij return conv16be(num); 528360523Sdelphij} 529360523Sdelphij 530360523Sdelphij 531360523Sdelphijstatic inline uint16_t 532360523Sdelphijaligned_read16le(const uint8_t *buf) 533360523Sdelphij{ 534360523Sdelphij uint16_t num = aligned_read16ne(buf); 535360523Sdelphij return conv16le(num); 536360523Sdelphij} 537360523Sdelphij 538360523Sdelphij 539360523Sdelphijstatic inline uint32_t 540360523Sdelphijaligned_read32be(const uint8_t *buf) 541360523Sdelphij{ 542360523Sdelphij uint32_t num = aligned_read32ne(buf); 543360523Sdelphij return conv32be(num); 544360523Sdelphij} 545360523Sdelphij 546360523Sdelphij 547360523Sdelphijstatic inline uint32_t 548360523Sdelphijaligned_read32le(const uint8_t *buf) 549360523Sdelphij{ 550360523Sdelphij uint32_t num = aligned_read32ne(buf); 551360523Sdelphij return conv32le(num); 552360523Sdelphij} 553360523Sdelphij 554360523Sdelphij 555360523Sdelphijstatic inline uint64_t 556360523Sdelphijaligned_read64be(const uint8_t *buf) 557360523Sdelphij{ 558360523Sdelphij uint64_t num = aligned_read64ne(buf); 559360523Sdelphij return conv64be(num); 560360523Sdelphij} 561360523Sdelphij 562360523Sdelphij 563360523Sdelphijstatic inline uint64_t 564360523Sdelphijaligned_read64le(const uint8_t *buf) 565360523Sdelphij{ 566360523Sdelphij uint64_t num = aligned_read64ne(buf); 567360523Sdelphij return conv64le(num); 568360523Sdelphij} 569360523Sdelphij 570360523Sdelphij 571360523Sdelphij// These need to be macros like in the unaligned case. 572360523Sdelphij#define aligned_write16be(buf, num) aligned_write16ne((buf), conv16be(num)) 573360523Sdelphij#define aligned_write16le(buf, num) aligned_write16ne((buf), conv16le(num)) 574360523Sdelphij#define aligned_write32be(buf, num) aligned_write32ne((buf), conv32be(num)) 575360523Sdelphij#define aligned_write32le(buf, num) aligned_write32ne((buf), conv32le(num)) 576360523Sdelphij#define aligned_write64be(buf, num) aligned_write64ne((buf), conv64be(num)) 577360523Sdelphij#define aligned_write64le(buf, num) aligned_write64ne((buf), conv64le(num)) 578360523Sdelphij 579360523Sdelphij 580360523Sdelphij//////////////////// 581360523Sdelphij// Bit operations // 582360523Sdelphij//////////////////// 583360523Sdelphij 584360523Sdelphijstatic inline uint32_t 585207753Smmbsr32(uint32_t n) 586207753Smm{ 587207753Smm // Check for ICC first, since it tends to define __GNUC__ too. 588207753Smm#if defined(__INTEL_COMPILER) 589207753Smm return _bit_scan_reverse(n); 590207753Smm 591207753Smm#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX 592207753Smm // GCC >= 3.4 has __builtin_clz(), which gives good results on 593207753Smm // multiple architectures. On x86, __builtin_clz() ^ 31U becomes 594207753Smm // either plain BSR (so the XOR gets optimized away) or LZCNT and 595207753Smm // XOR (if -march indicates that SSE4a instructions are supported). 596360523Sdelphij return (uint32_t)__builtin_clz(n) ^ 31U; 597207753Smm 598207753Smm#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 599207753Smm uint32_t i; 600207753Smm __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n)); 601207753Smm return i; 602207753Smm 603360523Sdelphij#elif defined(_MSC_VER) 604360523Sdelphij unsigned long i; 605360523Sdelphij _BitScanReverse(&i, n); 606207753Smm return i; 607207753Smm 608207753Smm#else 609207753Smm uint32_t i = 31; 610207753Smm 611360523Sdelphij if ((n & 0xFFFF0000) == 0) { 612207753Smm n <<= 16; 613207753Smm i = 15; 614207753Smm } 615207753Smm 616360523Sdelphij if ((n & 0xFF000000) == 0) { 617207753Smm n <<= 8; 618207753Smm i -= 8; 619207753Smm } 620207753Smm 621360523Sdelphij if ((n & 0xF0000000) == 0) { 622207753Smm n <<= 4; 623207753Smm i -= 4; 624207753Smm } 625207753Smm 626360523Sdelphij if ((n & 0xC0000000) == 0) { 627207753Smm n <<= 2; 628207753Smm i -= 2; 629207753Smm } 630207753Smm 631360523Sdelphij if ((n & 0x80000000) == 0) 632207753Smm --i; 633207753Smm 634207753Smm return i; 635207753Smm#endif 636207753Smm} 637207753Smm 638207753Smm 639207753Smmstatic inline uint32_t 640207753Smmclz32(uint32_t n) 641207753Smm{ 642207753Smm#if defined(__INTEL_COMPILER) 643207753Smm return _bit_scan_reverse(n) ^ 31U; 644207753Smm 645207753Smm#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX 646360523Sdelphij return (uint32_t)__builtin_clz(n); 647207753Smm 648207753Smm#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 649207753Smm uint32_t i; 650207753Smm __asm__("bsrl %1, %0\n\t" 651207753Smm "xorl $31, %0" 652207753Smm : "=r" (i) : "rm" (n)); 653207753Smm return i; 654207753Smm 655360523Sdelphij#elif defined(_MSC_VER) 656360523Sdelphij unsigned long i; 657360523Sdelphij _BitScanReverse(&i, n); 658207753Smm return i ^ 31U; 659207753Smm 660207753Smm#else 661207753Smm uint32_t i = 0; 662207753Smm 663360523Sdelphij if ((n & 0xFFFF0000) == 0) { 664207753Smm n <<= 16; 665207753Smm i = 16; 666207753Smm } 667207753Smm 668360523Sdelphij if ((n & 0xFF000000) == 0) { 669207753Smm n <<= 8; 670207753Smm i += 8; 671207753Smm } 672207753Smm 673360523Sdelphij if ((n & 0xF0000000) == 0) { 674207753Smm n <<= 4; 675207753Smm i += 4; 676207753Smm } 677207753Smm 678360523Sdelphij if ((n & 0xC0000000) == 0) { 679207753Smm n <<= 2; 680207753Smm i += 2; 681207753Smm } 682207753Smm 683360523Sdelphij if ((n & 0x80000000) == 0) 684207753Smm ++i; 685207753Smm 686207753Smm return i; 687207753Smm#endif 688207753Smm} 689207753Smm 690207753Smm 691207753Smmstatic inline uint32_t 692207753Smmctz32(uint32_t n) 693207753Smm{ 694207753Smm#if defined(__INTEL_COMPILER) 695207753Smm return _bit_scan_forward(n); 696207753Smm 697207753Smm#elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX 698360523Sdelphij return (uint32_t)__builtin_ctz(n); 699207753Smm 700207753Smm#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 701207753Smm uint32_t i; 702207753Smm __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n)); 703207753Smm return i; 704207753Smm 705360523Sdelphij#elif defined(_MSC_VER) 706360523Sdelphij unsigned long i; 707360523Sdelphij _BitScanForward(&i, n); 708207753Smm return i; 709207753Smm 710207753Smm#else 711207753Smm uint32_t i = 0; 712207753Smm 713360523Sdelphij if ((n & 0x0000FFFF) == 0) { 714207753Smm n >>= 16; 715207753Smm i = 16; 716207753Smm } 717207753Smm 718360523Sdelphij if ((n & 0x000000FF) == 0) { 719207753Smm n >>= 8; 720207753Smm i += 8; 721207753Smm } 722207753Smm 723360523Sdelphij if ((n & 0x0000000F) == 0) { 724207753Smm n >>= 4; 725207753Smm i += 4; 726207753Smm } 727207753Smm 728360523Sdelphij if ((n & 0x00000003) == 0) { 729207753Smm n >>= 2; 730207753Smm i += 2; 731207753Smm } 732207753Smm 733360523Sdelphij if ((n & 0x00000001) == 0) 734207753Smm ++i; 735207753Smm 736207753Smm return i; 737207753Smm#endif 738207753Smm} 739207753Smm 740207753Smm#define bsf32 ctz32 741207753Smm 742207753Smm#endif 743