#include #include double fmod(double x, double y) { union { double f; uint64_t i; } ux = {x}, uy = {y}; int ex = ux.i >> 52 & 0x7ff; int ey = uy.i >> 52 & 0x7ff; int sx = ux.i >> 63; uint64_t i; /* in the followings uxi should be ux.i, but then gcc wrongly adds */ /* float load/store to inner loops ruining performance and code size */ uint64_t uxi = ux.i; if (uy.i << 1 == 0 || isnan(y) || ex == 0x7ff) return (x * y) / (x * y); if (uxi << 1 <= uy.i << 1) { if (uxi << 1 == uy.i << 1) return 0 * x; return x; } /* normalize x and y */ if (!ex) { for (i = uxi << 12; i >> 63 == 0; ex--, i <<= 1) ; uxi <<= -ex + 1; } else { uxi &= -1ULL >> 12; uxi |= 1ULL << 52; } if (!ey) { for (i = uy.i << 12; i >> 63 == 0; ey--, i <<= 1) ; uy.i <<= -ey + 1; } else { uy.i &= -1ULL >> 12; uy.i |= 1ULL << 52; } /* x mod y */ for (; ex > ey; ex--) { i = uxi - uy.i; if (i >> 63 == 0) { if (i == 0) return 0 * x; uxi = i; } uxi <<= 1; } i = uxi - uy.i; if (i >> 63 == 0) { if (i == 0) return 0 * x; uxi = i; } for (; uxi >> 52 == 0; uxi <<= 1, ex--) ; /* scale result */ if (ex > 0) { uxi -= 1ULL << 52; uxi |= (uint64_t)ex << 52; } else { uxi >>= -ex + 1; } uxi |= (uint64_t)sx << 63; ux.i = uxi; return ux.f; }