src/ryu/f2s.cpp

38032Speter//===----------------------------------------------------------------------===//
64565Sgshapiro//
64565Sgshapiro// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
38032Speter// See https://llvm.org/LICENSE.txt for license information.
38032Speter// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
38032Speter//
38032Speter//===----------------------------------------------------------------------===//
38032Speter
38032Speter// Copyright (c) Microsoft Corporation.
38032Speter// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
38032Speter
38032Speter// Copyright 2018 Ulf Adams
38032Speter// Copyright (c) Microsoft Corporation. All rights reserved.
38032Speter
71348Sgshapiro// Boost Software License - Version 1.0 - August 17th, 2003
64565Sgshapiro
38032Speter// Permission is hereby granted, free of charge, to any person or organization
64565Sgshapiro// obtaining a copy of the software and accompanying documentation covered by
64565Sgshapiro// this license (the "Software") to use, reproduce, display, distribute,
64565Sgshapiro// execute, and transmit the Software, and to prepare derivative works of the
64565Sgshapiro// Software, and to permit third-parties to whom the Software is furnished to
64565Sgshapiro// do so, all subject to the following:
64565Sgshapiro
64565Sgshapiro// The copyright notices in the Software and this entire statement, including
64565Sgshapiro// the above license grant, this restriction and the following disclaimer,
64565Sgshapiro// must be included in all copies of the Software, in whole or in part, and
38032Speter// all derivative works of the Software, unless such copies or derivative
38032Speter// works are solely in the form of machine-executable object code generated by
64565Sgshapiro// a source language processor.
64565Sgshapiro
64565Sgshapiro// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
64565Sgshapiro// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
64565Sgshapiro// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
64565Sgshapiro// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
64565Sgshapiro// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
38032Speter// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
38032Speter// DEALINGS IN THE SOFTWARE.
38032Speter
38032Speter// Avoid formatting to keep the changes with the original code minimal.
38032Speter// clang-format off
38032Speter
38032Speter#include <__assert>
38032Speter#include <__config>
38032Speter#include <charconv>
38032Speter
38032Speter#include "include/ryu/common.h"
38032Speter#include "include/ryu/d2fixed.h"
38032Speter#include "include/ryu/d2s_intrinsics.h"
38032Speter#include "include/ryu/digit_table.h"
38032Speter#include "include/ryu/f2s.h"
38032Speter#include "include/ryu/ryu.h"
38032Speter
38032Speter_LIBCPP_BEGIN_NAMESPACE_STD
38032Speter
38032Speterinline constexpr int __FLOAT_MANTISSA_BITS = 23;
38032Speterinline constexpr int __FLOAT_EXPONENT_BITS = 8;
38032Speterinline constexpr int __FLOAT_BIAS = 127;
38032Speter
38032Speterinline constexpr int __FLOAT_POW5_INV_BITCOUNT = 59;
38032Speterinline constexpr uint64_t __FLOAT_POW5_INV_SPLIT[31] = {
38032Speter  576460752303423489u, 461168601842738791u, 368934881474191033u, 295147905179352826u,
38032Speter  472236648286964522u, 377789318629571618u, 302231454903657294u, 483570327845851670u,
38032Speter  386856262276681336u, 309485009821345069u, 495176015714152110u, 396140812571321688u,
38032Speter  316912650057057351u, 507060240091291761u, 405648192073033409u, 324518553658426727u,
38032Speter  519229685853482763u, 415383748682786211u, 332306998946228969u, 531691198313966350u,
38032Speter  425352958651173080u, 340282366920938464u, 544451787073501542u, 435561429658801234u,
64565Sgshapiro  348449143727040987u, 557518629963265579u, 446014903970612463u, 356811923176489971u,
38032Speter  570899077082383953u, 456719261665907162u, 365375409332725730u
38032Speter};
38032Speterinline constexpr int __FLOAT_POW5_BITCOUNT = 61;
38032Speterinline constexpr uint64_t __FLOAT_POW5_SPLIT[47] = {
38032Speter  1152921504606846976u, 1441151880758558720u, 1801439850948198400u, 2251799813685248000u,
38032Speter  1407374883553280000u, 1759218604441600000u, 2199023255552000000u, 1374389534720000000u,
38032Speter  1717986918400000000u, 2147483648000000000u, 1342177280000000000u, 1677721600000000000u,
38032Speter  2097152000000000000u, 1310720000000000000u, 1638400000000000000u, 2048000000000000000u,
38032Speter  1280000000000000000u, 1600000000000000000u, 2000000000000000000u, 1250000000000000000u,
38032Speter  1562500000000000000u, 1953125000000000000u, 1220703125000000000u, 1525878906250000000u,
38032Speter  1907348632812500000u, 1192092895507812500u, 1490116119384765625u, 1862645149230957031u,
38032Speter  1164153218269348144u, 1455191522836685180u, 1818989403545856475u, 2273736754432320594u,
38032Speter  1421085471520200371u, 1776356839400250464u, 2220446049250313080u, 1387778780781445675u,
38032Speter  1734723475976807094u, 2168404344971008868u, 1355252715606880542u, 1694065894508600678u,
38032Speter  2117582368135750847u, 1323488980084844279u, 1654361225106055349u, 2067951531382569187u,
38032Speter  1292469707114105741u, 1615587133892632177u, 2019483917365790221u
38032Speter};
38032Speter
38032Speter[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __pow5Factor(uint32_t __value) {
38032Speter  uint32_t __count = 0;
38032Speter  for (;;) {
38032Speter    _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
38032Speter    const uint32_t __q = __value / 5;
38032Speter    const uint32_t __r = __value % 5;
38032Speter    if (__r != 0) {
38032Speter      break;
38032Speter    }
38032Speter    __value = __q;
38032Speter    ++__count;
38032Speter  }
38032Speter  return __count;
38032Speter}
38032Speter
38032Speter// Returns true if __value is divisible by 5^__p.
38032Speter[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf5(const uint32_t __value, const uint32_t __p) {
38032Speter  return __pow5Factor(__value) >= __p;
38032Speter}
38032Speter
38032Speter// Returns true if __value is divisible by 2^__p.
38032Speter[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint32_t __value, const uint32_t __p) {
38032Speter  _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
38032Speter  _LIBCPP_ASSERT_INTERNAL(__p < 32, "");
38032Speter  // __builtin_ctz doesn't appear to be faster here.
64565Sgshapiro  return (__value & ((1u << __p) - 1)) == 0;
64565Sgshapiro}
64565Sgshapiro
38032Speter[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mulShift(const uint32_t __m, const uint64_t __factor, const int32_t __shift) {
38032Speter  _LIBCPP_ASSERT_INTERNAL(__shift > 32, "");
64565Sgshapiro
64565Sgshapiro  // The casts here help MSVC to avoid calls to the __allmul library
38032Speter  // function.
38032Speter  const uint32_t __factorLo = static_cast<uint32_t>(__factor);
38032Speter  const uint32_t __factorHi = static_cast<uint32_t>(__factor >> 32);
38032Speter  const uint64_t __bits0 = static_cast<uint64_t>(__m) * __factorLo;
38032Speter  const uint64_t __bits1 = static_cast<uint64_t>(__m) * __factorHi;
38032Speter
38032Speter#ifndef _LIBCPP_64_BIT
38032Speter  // On 32-bit platforms we can avoid a 64-bit shift-right since we only
38032Speter  // need the upper 32 bits of the result and the shift value is > 32.
38032Speter  const uint32_t __bits0Hi = static_cast<uint32_t>(__bits0 >> 32);
38032Speter  uint32_t __bits1Lo = static_cast<uint32_t>(__bits1);
38032Speter  uint32_t __bits1Hi = static_cast<uint32_t>(__bits1 >> 32);
38032Speter  __bits1Lo += __bits0Hi;
38032Speter  __bits1Hi += (__bits1Lo < __bits0Hi);
38032Speter  const int32_t __s = __shift - 32;
38032Speter  return (__bits1Hi << (32 - __s)) | (__bits1Lo >> __s);
38032Speter#else // ^^^ 32-bit ^^^ / vvv 64-bit vvv
38032Speter  const uint64_t __sum = (__bits0 >> 32) + __bits1;
38032Speter  const uint64_t __shiftedSum = __sum >> (__shift - 32);
38032Speter  _LIBCPP_ASSERT_INTERNAL(__shiftedSum <= UINT32_MAX, "");
38032Speter  return static_cast<uint32_t>(__shiftedSum);
38032Speter#endif // ^^^ 64-bit ^^^
38032Speter}
38032Speter
38032Speter[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mulPow5InvDivPow2(const uint32_t __m, const uint32_t __q, const int32_t __j) {
38032Speter  return __mulShift(__m, __FLOAT_POW5_INV_SPLIT[__q], __j);
38032Speter}
38032Speter
38032Speter[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mulPow5divPow2(const uint32_t __m, const uint32_t __i, const int32_t __j) {
38032Speter  return __mulShift(__m, __FLOAT_POW5_SPLIT[__i], __j);
38032Speter}
38032Speter
38032Speter// A floating decimal representing m * 10^e.
64565Sgshapirostruct __floating_decimal_32 {
64565Sgshapiro  uint32_t __mantissa;
38032Speter  int32_t __exponent;
38032Speter};
38032Speter
38032Speter[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline __floating_decimal_32 __f2d(const uint32_t __ieeeMantissa, const uint32_t __ieeeExponent) {
38032Speter  int32_t __e2;
38032Speter  uint32_t __m2;
38032Speter  if (__ieeeExponent == 0) {
38032Speter    // We subtract 2 so that the bounds computation has 2 additional bits.
38032Speter    __e2 = 1 - __FLOAT_BIAS - __FLOAT_MANTISSA_BITS - 2;
38032Speter    __m2 = __ieeeMantissa;
38032Speter  } else {
38032Speter    __e2 = static_cast<int32_t>(__ieeeExponent) - __FLOAT_BIAS - __FLOAT_MANTISSA_BITS - 2;
64565Sgshapiro    __m2 = (1u << __FLOAT_MANTISSA_BITS) | __ieeeMantissa;
64565Sgshapiro  }
64565Sgshapiro  const bool __even = (__m2 & 1) == 0;
64565Sgshapiro  const bool __acceptBounds = __even;
64565Sgshapiro
38032Speter  // Step 2: Determine the interval of valid decimal representations.
64565Sgshapiro  const uint32_t __mv = 4 * __m2;
38032Speter  const uint32_t __mp = 4 * __m2 + 2;
64565Sgshapiro  // Implicit bool -> int conversion. True is 1, false is 0.
64565Sgshapiro  const uint32_t __mmShift = __ieeeMantissa != 0 || __ieeeExponent <= 1;
38032Speter  const uint32_t __mm = 4 * __m2 - 1 - __mmShift;
38032Speter
38032Speter  // Step 3: Convert to a decimal power base using 64-bit arithmetic.
38032Speter  uint32_t __vr, __vp, __vm;
38032Speter  int32_t __e10;
38032Speter  bool __vmIsTrailingZeros = false;
38032Speter  bool __vrIsTrailingZeros = false;
38032Speter  uint8_t __lastRemovedDigit = 0;
38032Speter  if (__e2 >= 0) {
38032Speter    const uint32_t __q = __log10Pow2(__e2);
38032Speter    __e10 = static_cast<int32_t>(__q);
38032Speter    const int32_t __k = __FLOAT_POW5_INV_BITCOUNT + __pow5bits(static_cast<int32_t>(__q)) - 1;
38032Speter    const int32_t __i = -__e2 + static_cast<int32_t>(__q) + __k;
38032Speter    __vr = __mulPow5InvDivPow2(__mv, __q, __i);
38032Speter    __vp = __mulPow5InvDivPow2(__mp, __q, __i);
38032Speter    __vm = __mulPow5InvDivPow2(__mm, __q, __i);
38032Speter    if (__q != 0 && (__vp - 1) / 10 <= __vm / 10) {
38032Speter      // We need to know one removed digit even if we are not going to loop below. We could use
38032Speter      // __q = X - 1 above, except that would require 33 bits for the result, and we've found that
38032Speter      // 32-bit arithmetic is faster even on 64-bit machines.
38032Speter      const int32_t __l = __FLOAT_POW5_INV_BITCOUNT + __pow5bits(static_cast<int32_t>(__q - 1)) - 1;
38032Speter      __lastRemovedDigit = static_cast<uint8_t>(__mulPow5InvDivPow2(__mv, __q - 1,
38032Speter        -__e2 + static_cast<int32_t>(__q) - 1 + __l) % 10);
38032Speter    }
38032Speter    if (__q <= 9) {
64565Sgshapiro      // The largest power of 5 that fits in 24 bits is 5^10, but __q <= 9 seems to be safe as well.
38032Speter      // Only one of __mp, __mv, and __mm can be a multiple of 5, if any.
38032Speter      if (__mv % 5 == 0) {
38032Speter        __vrIsTrailingZeros = __multipleOfPowerOf5(__mv, __q);
38032Speter      } else if (__acceptBounds) {
38032Speter        __vmIsTrailingZeros = __multipleOfPowerOf5(__mm, __q);
38032Speter      } else {
38032Speter        __vp -= __multipleOfPowerOf5(__mp, __q);
38032Speter      }
38032Speter    }
38032Speter  } else {
38032Speter    const uint32_t __q = __log10Pow5(-__e2);
38032Speter    __e10 = static_cast<int32_t>(__q) + __e2;
38032Speter    const int32_t __i = -__e2 - static_cast<int32_t>(__q);
38032Speter    const int32_t __k = __pow5bits(__i) - __FLOAT_POW5_BITCOUNT;
38032Speter    int32_t __j = static_cast<int32_t>(__q) - __k;
38032Speter    __vr = __mulPow5divPow2(__mv, static_cast<uint32_t>(__i), __j);
38032Speter    __vp = __mulPow5divPow2(__mp, static_cast<uint32_t>(__i), __j);
64565Sgshapiro    __vm = __mulPow5divPow2(__mm, static_cast<uint32_t>(__i), __j);
38032Speter    if (__q != 0 && (__vp - 1) / 10 <= __vm / 10) {
38032Speter      __j = static_cast<int32_t>(__q) - 1 - (__pow5bits(__i + 1) - __FLOAT_POW5_BITCOUNT);
38032Speter      __lastRemovedDigit = static_cast<uint8_t>(__mulPow5divPow2(__mv, static_cast<uint32_t>(__i + 1), __j) % 10);
38032Speter    }
64565Sgshapiro    if (__q <= 1) {
64565Sgshapiro      // {__vr,__vp,__vm} is trailing zeros if {__mv,__mp,__mm} has at least __q trailing 0 bits.
64565Sgshapiro      // __mv = 4 * __m2, so it always has at least two trailing 0 bits.
38032Speter      __vrIsTrailingZeros = true;
64565Sgshapiro      if (__acceptBounds) {
38032Speter        // __mm = __mv - 1 - __mmShift, so it has 1 trailing 0 bit iff __mmShift == 1.
38032Speter        __vmIsTrailingZeros = __mmShift == 1;
38032Speter      } else {
38032Speter        // __mp = __mv + 2, so it always has at least one trailing 0 bit.
38032Speter        --__vp;
38032Speter      }
38032Speter    } else if (__q < 31) { // TRANSITION(ulfjack): Use a tighter bound here.
38032Speter      __vrIsTrailingZeros = __multipleOfPowerOf2(__mv, __q - 1);
38032Speter    }
38032Speter  }
64565Sgshapiro
38032Speter  // Step 4: Find the shortest decimal representation in the interval of valid representations.
64565Sgshapiro  int32_t __removed = 0;
38032Speter  uint32_t _Output;
38032Speter  if (__vmIsTrailingZeros || __vrIsTrailingZeros) {
64565Sgshapiro    // General case, which happens rarely (~4.0%).
38032Speter    while (__vp / 10 > __vm / 10) {
38032Speter#ifdef __clang__ // TRANSITION, LLVM-23106
64565Sgshapiro      __vmIsTrailingZeros &= __vm - (__vm / 10) * 10 == 0;
38032Speter#else
64565Sgshapiro      __vmIsTrailingZeros &= __vm % 10 == 0;
38032Speter#endif
38032Speter      __vrIsTrailingZeros &= __lastRemovedDigit == 0;
38032Speter      __lastRemovedDigit = static_cast<uint8_t>(__vr % 10);
38032Speter      __vr /= 10;
38032Speter      __vp /= 10;
38032Speter      __vm /= 10;
38032Speter      ++__removed;
38032Speter    }
38032Speter    if (__vmIsTrailingZeros) {
38032Speter      while (__vm % 10 == 0) {
38032Speter        __vrIsTrailingZeros &= __lastRemovedDigit == 0;
38032Speter        __lastRemovedDigit = static_cast<uint8_t>(__vr % 10);
38032Speter        __vr /= 10;
38032Speter        __vp /= 10;
38032Speter        __vm /= 10;
38032Speter        ++__removed;
38032Speter      }
38032Speter    }
38032Speter    if (__vrIsTrailingZeros && __lastRemovedDigit == 5 && __vr % 2 == 0) {
38032Speter      // Round even if the exact number is .....50..0.
38032Speter      __lastRemovedDigit = 4;
38032Speter    }
38032Speter    // We need to take __vr + 1 if __vr is outside bounds or we need to round up.
38032Speter    _Output = __vr + ((__vr == __vm && (!__acceptBounds || !__vmIsTrailingZeros)) || __lastRemovedDigit >= 5);
38032Speter  } else {
38032Speter    // Specialized for the common case (~96.0%). Percentages below are relative to this.
38032Speter    // Loop iterations below (approximately):
38032Speter    // 0: 13.6%, 1: 70.7%, 2: 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01%
38032Speter    while (__vp / 10 > __vm / 10) {
38032Speter      __lastRemovedDigit = static_cast<uint8_t>(__vr % 10);
71348Sgshapiro      __vr /= 10;
38032Speter      __vp /= 10;
38032Speter      __vm /= 10;
71348Sgshapiro      ++__removed;
38032Speter    }
38032Speter    // We need to take __vr + 1 if __vr is outside bounds or we need to round up.
38032Speter    _Output = __vr + (__vr == __vm || __lastRemovedDigit >= 5);
38032Speter  }
38032Speter  const int32_t __exp = __e10 + __removed;
38032Speter
38032Speter  __floating_decimal_32 __fd;
38032Speter  __fd.__exponent = __exp;
38032Speter  __fd.__mantissa = _Output;
38032Speter  return __fd;
38032Speter}
38032Speter
38032Speter[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline to_chars_result _Large_integer_to_chars(char* const _First, char* const _Last,
38032Speter  const uint32_t _Mantissa2, const int32_t _Exponent2) {
38032Speter
38032Speter  // Print the integer _Mantissa2 * 2^_Exponent2 exactly.
38032Speter
64565Sgshapiro  // For nonzero integers, _Exponent2 >= -23. (The minimum value occurs when _Mantissa2 * 2^_Exponent2 is 1.
64565Sgshapiro  // In that case, _Mantissa2 is the implicit 1 bit followed by 23 zeros, so _Exponent2 is -23 to shift away
38032Speter  // the zeros.) The dense range of exactly representable integers has negative or zero exponents
38032Speter  // (as positive exponents make the range non-dense). For that dense range, Ryu will always be used:
38032Speter  // every digit is necessary to uniquely identify the value, so Ryu must print them all.
38032Speter
38032Speter  // Positive exponents are the non-dense range of exactly representable integers.
71348Sgshapiro  // This contains all of the values for which Ryu can't be used (and a few Ryu-friendly values).
64565Sgshapiro
38032Speter  // Performance note: Long division appears to be faster than losslessly widening float to double and calling
38032Speter  // __d2fixed_buffered_n(). If __f2fixed_buffered_n() is implemented, it might be faster than long division.
38032Speter
64565Sgshapiro  _LIBCPP_ASSERT_INTERNAL(_Exponent2 > 0, "");
64565Sgshapiro  _LIBCPP_ASSERT_INTERNAL(_Exponent2 <= 104, ""); // because __ieeeExponent <= 254
64565Sgshapiro
64565Sgshapiro  // Manually represent _Mantissa2 * 2^_Exponent2 as a large integer. _Mantissa2 is always 24 bits
64565Sgshapiro  // (due to the implicit bit), while _Exponent2 indicates a shift of at most 104 bits.
64565Sgshapiro  // 24 + 104 equals 128 equals 4 * 32, so we need exactly 4 32-bit elements.
64565Sgshapiro  // We use a little-endian representation, visualized like this:
64565Sgshapiro
38032Speter  // << left shift <<
38032Speter  // most significant
64565Sgshapiro  // _Data[3] _Data[2] _Data[1] _Data[0]
64565Sgshapiro  //                   least significant
38032Speter  //                   >> right shift >>
38032Speter
38032Speter  constexpr uint32_t _Data_size = 4;
38032Speter  uint32_t _Data[_Data_size]{};
38032Speter
38032Speter  // _Maxidx is the index of the most significant nonzero element.
38032Speter  uint32_t _Maxidx = ((24 + static_cast<uint32_t>(_Exponent2) + 31) / 32) - 1;
38032Speter  _LIBCPP_ASSERT_INTERNAL(_Maxidx < _Data_size, "");
64565Sgshapiro
38032Speter  const uint32_t _Bit_shift = static_cast<uint32_t>(_Exponent2) % 32;
38032Speter  if (_Bit_shift <= 8) { // _Mantissa2's 24 bits don't cross an element boundary
38032Speter    _Data[_Maxidx] = _Mantissa2 << _Bit_shift;
38032Speter  } else { // _Mantissa2's 24 bits cross an element boundary
38032Speter    _Data[_Maxidx - 1] = _Mantissa2 << _Bit_shift;
38032Speter    _Data[_Maxidx] = _Mantissa2 >> (32 - _Bit_shift);
38032Speter  }
38032Speter
38032Speter  // If Ryu hasn't determined the total output length, we need to buffer the digits generated from right to left
38032Speter  // by long division. The largest possible float is: 340'282346638'528859811'704183484'516925440
38032Speter  uint32_t _Blocks[4];
38032Speter  int32_t _Filled_blocks = 0;
38032Speter  // From left to right, we're going to print:
38032Speter  // _Data[0] will be [1, 10] digits.
38032Speter  // Then if _Filled_blocks > 0:
38032Speter  // _Blocks[_Filled_blocks - 1], ..., _Blocks[0] will be 0-filled 9-digit blocks.
38032Speter
38032Speter  if (_Maxidx != 0) { // If the integer is actually large, perform long division.
38032Speter                      // Otherwise, skip to printing _Data[0].
38032Speter    for (;;) {
38032Speter      // Loop invariant: _Maxidx != 0 (i.e. the integer is actually large)
38032Speter
38032Speter      const uint32_t _Most_significant_elem = _Data[_Maxidx];
38032Speter      const uint32_t _Initial_remainder = _Most_significant_elem % 1000000000;
38032Speter      const uint32_t _Initial_quotient = _Most_significant_elem / 1000000000;
38032Speter      _Data[_Maxidx] = _Initial_quotient;
38032Speter      uint64_t _Remainder = _Initial_remainder;
64565Sgshapiro
38032Speter      // Process less significant elements.
64565Sgshapiro      uint32_t _Idx = _Maxidx;
38032Speter      do {
38032Speter        --_Idx; // Initially, _Remainder is at most 10^9 - 1.
38032Speter
38032Speter        // Now, _Remainder is at most (10^9 - 1) * 2^32 + 2^32 - 1, simplified to 10^9 * 2^32 - 1.
38032Speter        _Remainder = (_Remainder << 32) | _Data[_Idx];
38032Speter
38032Speter        // floor((10^9 * 2^32 - 1) / 10^9) == 2^32 - 1, so uint32_t _Quotient is lossless.
38032Speter        const uint32_t _Quotient = static_cast<uint32_t>(__div1e9(_Remainder));
38032Speter
38032Speter        // _Remainder is at most 10^9 - 1 again.
38032Speter        // For uint32_t truncation, see the __mod1e9() comment in d2s_intrinsics.h.
38032Speter        _Remainder = static_cast<uint32_t>(_Remainder) - 1000000000u * _Quotient;
38032Speter
38032Speter        _Data[_Idx] = _Quotient;
38032Speter      } while (_Idx != 0);
38032Speter
64565Sgshapiro      // Store a 0-filled 9-digit block.
64565Sgshapiro      _Blocks[_Filled_blocks++] = static_cast<uint32_t>(_Remainder);
64565Sgshapiro
64565Sgshapiro      if (_Initial_quotient == 0) { // Is the large integer shrinking?
64565Sgshapiro        --_Maxidx; // log2(10^9) is 29.9, so we can't shrink by more than one element.
64565Sgshapiro        if (_Maxidx == 0) {
64565Sgshapiro          break; // We've finished long division. Now we need to print _Data[0].
64565Sgshapiro        }
64565Sgshapiro      }
64565Sgshapiro    }
64565Sgshapiro  }
71348Sgshapiro
64565Sgshapiro  _LIBCPP_ASSERT_INTERNAL(_Data[0] != 0, "");
64565Sgshapiro  for (uint32_t _Idx = 1; _Idx < _Data_size; ++_Idx) {
64565Sgshapiro    _LIBCPP_ASSERT_INTERNAL(_Data[_Idx] == 0, "");
64565Sgshapiro  }
64565Sgshapiro
64565Sgshapiro  const uint32_t _Data_olength = _Data[0] >= 1000000000 ? 10 : __decimalLength9(_Data[0]);
64565Sgshapiro  const uint32_t _Total_fixed_length = _Data_olength + 9 * _Filled_blocks;
64565Sgshapiro
64565Sgshapiro  if (_Last - _First < static_cast<ptrdiff_t>(_Total_fixed_length)) {
64565Sgshapiro    return { _Last, errc::value_too_large };
64565Sgshapiro  }
38032Speter
38032Speter  char* _Result = _First;
38032Speter
38032Speter  // Print _Data[0]. While it's up to 10 digits,
38032Speter  // which is more than Ryu generates, the code below can handle this.
38032Speter  __append_n_digits(_Data_olength, _Data[0], _Result);
38032Speter  _Result += _Data_olength;
38032Speter
38032Speter  // Print 0-filled 9-digit blocks.
38032Speter  for (int32_t _Idx = _Filled_blocks - 1; _Idx >= 0; --_Idx) {
38032Speter    __append_nine_digits(_Blocks[_Idx], _Result);
38032Speter    _Result += 9;
38032Speter  }
38032Speter
38032Speter  return { _Result, errc{} };
38032Speter}
38032Speter
38032Speter[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline to_chars_result __to_chars(char* const _First, char* const _Last, const __floating_decimal_32 __v,
38032Speter  chars_format _Fmt, const uint32_t __ieeeMantissa, const uint32_t __ieeeExponent) {
38032Speter  // Step 5: Print the decimal representation.
38032Speter  uint32_t _Output = __v.__mantissa;
38032Speter  int32_t _Ryu_exponent = __v.__exponent;
38032Speter  const uint32_t __olength = __decimalLength9(_Output);
38032Speter  int32_t _Scientific_exponent = _Ryu_exponent + static_cast<int32_t>(__olength) - 1;
38032Speter
38032Speter  if (_Fmt == chars_format{}) {
38032Speter    int32_t _Lower;
38032Speter    int32_t _Upper;
38032Speter
38032Speter    if (__olength == 1) {
38032Speter      // Value | Fixed   | Scientific
38032Speter      // 1e-3  | "0.001" | "1e-03"
38032Speter      // 1e4   | "10000" | "1e+04"
38032Speter      _Lower = -3;
38032Speter      _Upper = 4;
38032Speter    } else {
38032Speter      // Value   | Fixed       | Scientific
38032Speter      // 1234e-7 | "0.0001234" | "1.234e-04"
38032Speter      // 1234e5  | "123400000" | "1.234e+08"
38032Speter      _Lower = -static_cast<int32_t>(__olength + 3);
38032Speter      _Upper = 5;
38032Speter    }
38032Speter
38032Speter    if (_Lower <= _Ryu_exponent && _Ryu_exponent <= _Upper) {
38032Speter      _Fmt = chars_format::fixed;
38032Speter    } else {
38032Speter      _Fmt = chars_format::scientific;
38032Speter    }
64565Sgshapiro  } else if (_Fmt == chars_format::general) {
38032Speter    // C11 7.21.6.1 "The fprintf function"/8:
38032Speter    // "Let P equal [...] 6 if the precision is omitted [...].
38032Speter    // Then, if a conversion with style E would have an exponent of X:
38032Speter    // - if P > X >= -4, the conversion is with style f [...].
38032Speter    // - otherwise, the conversion is with style e [...]."
38032Speter    if (-4 <= _Scientific_exponent && _Scientific_exponent < 6) {
38032Speter      _Fmt = chars_format::fixed;
38032Speter    } else {
38032Speter      _Fmt = chars_format::scientific;
38032Speter    }
38032Speter  }
38032Speter
38032Speter  if (_Fmt == chars_format::fixed) {
38032Speter    // Example: _Output == 1729, __olength == 4
38032Speter
38032Speter    // _Ryu_exponent | Printed  | _Whole_digits | _Total_fixed_length  | Notes
71348Sgshapiro    // --------------|----------|---------------|----------------------|---------------------------------------
38032Speter    //             2 | 172900   |  6            | _Whole_digits        | Ryu can't be used for printing
38032Speter    //             1 | 17290    |  5            | (sometimes adjusted) | when the trimmed digits are nonzero.
38032Speter    // --------------|----------|---------------|----------------------|---------------------------------------
38032Speter    //             0 | 1729     |  4            | _Whole_digits        | Unified length cases.
38032Speter    // --------------|----------|---------------|----------------------|---------------------------------------
38032Speter    //            -1 | 172.9    |  3            | __olength + 1        | This case can't happen for
38032Speter    //            -2 | 17.29    |  2            |                      | __olength == 1, but no additional
38032Speter    //            -3 | 1.729    |  1            |                      | code is needed to avoid it.
38032Speter    // --------------|----------|---------------|----------------------|---------------------------------------
38032Speter    //            -4 | 0.1729   |  0            | 2 - _Ryu_exponent    | C11 7.21.6.1 "The fprintf function"/8:
38032Speter    //            -5 | 0.01729  | -1            |                      | "If a decimal-point character appears,
38032Speter    //            -6 | 0.001729 | -2            |                      | at least one digit appears before it."
38032Speter
38032Speter    const int32_t _Whole_digits = static_cast<int32_t>(__olength) + _Ryu_exponent;
40498Sbde
40498Sbde    uint32_t _Total_fixed_length;
40498Sbde    if (_Ryu_exponent >= 0) { // cases "172900" and "1729"
38032Speter      _Total_fixed_length = static_cast<uint32_t>(_Whole_digits);
38032Speter      if (_Output == 1) {
38032Speter        // Rounding can affect the number of digits.
38032Speter        // For example, 1e11f is exactly "99999997952" which is 11 digits instead of 12.
38032Speter        // We can use a lookup table to detect this and adjust the total length.
38032Speter        static constexpr uint8_t _Adjustment[39] = {
64565Sgshapiro          0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1 };
38032Speter        _Total_fixed_length -= _Adjustment[_Ryu_exponent];
38032Speter        // _Whole_digits doesn't need to be adjusted because these cases won't refer to it later.
38032Speter      }
38032Speter    } else if (_Whole_digits > 0) { // case "17.29"
38032Speter      _Total_fixed_length = __olength + 1;
38032Speter    } else { // case "0.001729"
38032Speter      _Total_fixed_length = static_cast<uint32_t>(2 - _Ryu_exponent);
38032Speter    }
38032Speter
38032Speter    if (_Last - _First < static_cast<ptrdiff_t>(_Total_fixed_length)) {
38032Speter      return { _Last, errc::value_too_large };
38032Speter    }
38032Speter
38032Speter    char* _Mid;
38032Speter    if (_Ryu_exponent > 0) { // case "172900"
38032Speter      bool _Can_use_ryu;
38032Speter
38032Speter      if (_Ryu_exponent > 10) { // 10^10 is the largest power of 10 that's exactly representable as a float.
64565Sgshapiro        _Can_use_ryu = false;
38032Speter      } else {
38032Speter        // Ryu generated X: __v.__mantissa * 10^_Ryu_exponent
38032Speter        // __v.__mantissa == 2^_Trailing_zero_bits * (__v.__mantissa >> _Trailing_zero_bits)
38032Speter        // 10^_Ryu_exponent == 2^_Ryu_exponent * 5^_Ryu_exponent
38032Speter
38032Speter        // _Trailing_zero_bits is [0, 29] (aside: because 2^29 is the largest power of 2
38032Speter        // with 9 decimal digits, which is float's round-trip limit.)
38032Speter        // _Ryu_exponent is [1, 10].
38032Speter        // Normalization adds [2, 23] (aside: at least 2 because the pre-normalized mantissa is at least 5).
38032Speter        // This adds up to [3, 62], which is well below float's maximum binary exponent 127.
38032Speter
38032Speter        // Therefore, we just need to consider (__v.__mantissa >> _Trailing_zero_bits) * 5^_Ryu_exponent.
38032Speter
38032Speter        // If that product would exceed 24 bits, then X can't be exactly represented as a float.
38032Speter        // (That's not a problem for round-tripping, because X is close enough to the original float,
38032Speter        // but X isn't mathematically equal to the original float.) This requires a high-precision fallback.
38032Speter
38032Speter        // If the product is 24 bits or smaller, then X can be exactly represented as a float (and we don't
38032Speter        // need to re-synthesize it; the original float must have been X, because Ryu wouldn't produce the
38032Speter        // same output for two different floats X and Y). This allows Ryu's output to be used (zero-filled).
38032Speter
38032Speter        // (2^24 - 1) / 5^0 (for indexing), (2^24 - 1) / 5^1, ..., (2^24 - 1) / 5^10
38032Speter        static constexpr uint32_t _Max_shifted_mantissa[11] = {
38032Speter          16777215, 3355443, 671088, 134217, 26843, 5368, 1073, 214, 42, 8, 1 };
38032Speter
38032Speter        unsigned long _Trailing_zero_bits;
38032Speter        (void) _BitScanForward(&_Trailing_zero_bits, __v.__mantissa); // __v.__mantissa is guaranteed nonzero
38032Speter        const uint32_t _Shifted_mantissa = __v.__mantissa >> _Trailing_zero_bits;
38032Speter        _Can_use_ryu = _Shifted_mantissa <= _Max_shifted_mantissa[_Ryu_exponent];
38032Speter      }
38032Speter
38032Speter      if (!_Can_use_ryu) {
38032Speter        const uint32_t _Mantissa2 = __ieeeMantissa | (1u << __FLOAT_MANTISSA_BITS); // restore implicit bit
38032Speter        const int32_t _Exponent2 = static_cast<int32_t>(__ieeeExponent)
38032Speter          - __FLOAT_BIAS - __FLOAT_MANTISSA_BITS; // bias and normalization
38032Speter
38032Speter        // Performance note: We've already called Ryu, so this will redundantly perform buffering and bounds checking.
38032Speter        return _Large_integer_to_chars(_First, _Last, _Mantissa2, _Exponent2);
38032Speter      }
38032Speter
38032Speter      // _Can_use_ryu
38032Speter      // Print the decimal digits, left-aligned within [_First, _First + _Total_fixed_length).
38032Speter      _Mid = _First + __olength;
38032Speter    } else { // cases "1729", "17.29", and "0.001729"
38032Speter      // Print the decimal digits, right-aligned within [_First, _First + _Total_fixed_length).
38032Speter      _Mid = _First + _Total_fixed_length;
38032Speter    }
38032Speter
42580Speter    while (_Output >= 10000) {
38032Speter#ifdef __clang__ // TRANSITION, LLVM-38217
38032Speter      const uint32_t __c = _Output - 10000 * (_Output / 10000);
38032Speter#else
38032Speter      const uint32_t __c = _Output % 10000;
38032Speter#endif
38032Speter      _Output /= 10000;
38032Speter      const uint32_t __c0 = (__c % 100) << 1;
38032Speter      const uint32_t __c1 = (__c / 100) << 1;
38032Speter      std::memcpy(_Mid -= 2, __DIGIT_TABLE + __c0, 2);
38032Speter      std::memcpy(_Mid -= 2, __DIGIT_TABLE + __c1, 2);
38032Speter    }
38032Speter    if (_Output >= 100) {
38032Speter      const uint32_t __c = (_Output % 100) << 1;
64565Sgshapiro      _Output /= 100;
38032Speter      std::memcpy(_Mid -= 2, __DIGIT_TABLE + __c, 2);
38032Speter    }
38032Speter    if (_Output >= 10) {
38032Speter      const uint32_t __c = _Output << 1;
38032Speter      std::memcpy(_Mid -= 2, __DIGIT_TABLE + __c, 2);
38032Speter    } else {
38032Speter      *--_Mid = static_cast<char>('0' + _Output);
38032Speter    }
38032Speter
38032Speter    if (_Ryu_exponent > 0) { // case "172900" with _Can_use_ryu
64565Sgshapiro      // Performance note: it might be more efficient to do this immediately after setting _Mid.
38032Speter      std::memset(_First + __olength, '0', static_cast<size_t>(_Ryu_exponent));
38032Speter    } else if (_Ryu_exponent == 0) { // case "1729"
38032Speter      // Done!
38032Speter    } else if (_Whole_digits > 0) { // case "17.29"
38032Speter      // Performance note: moving digits might not be optimal.
38032Speter      std::memmove(_First, _First + 1, static_cast<size_t>(_Whole_digits));
38032Speter      _First[_Whole_digits] = '.';
38032Speter    } else { // case "0.001729"
38032Speter      // Performance note: a larger memset() followed by overwriting '.' might be more efficient.
64565Sgshapiro      _First[0] = '0';
38032Speter      _First[1] = '.';
38032Speter      std::memset(_First + 2, '0', static_cast<size_t>(-_Whole_digits));
38032Speter    }
64565Sgshapiro
38032Speter    return { _First + _Total_fixed_length, errc{} };
64565Sgshapiro  }
38032Speter
38032Speter  const uint32_t _Total_scientific_length =
38032Speter    __olength + (__olength > 1) + 4; // digits + possible decimal point + scientific exponent
38032Speter  if (_Last - _First < static_cast<ptrdiff_t>(_Total_scientific_length)) {
38032Speter    return { _Last, errc::value_too_large };
64565Sgshapiro  }
38032Speter  char* const __result = _First;
38032Speter
38032Speter  // Print the decimal digits.
38032Speter  uint32_t __i = 0;
38032Speter  while (_Output >= 10000) {
38032Speter#ifdef __clang__ // TRANSITION, LLVM-38217
64565Sgshapiro    const uint32_t __c = _Output - 10000 * (_Output / 10000);
64565Sgshapiro#else
38032Speter    const uint32_t __c = _Output % 10000;
38032Speter#endif
38032Speter    _Output /= 10000;
38032Speter    const uint32_t __c0 = (__c % 100) << 1;
38032Speter    const uint32_t __c1 = (__c / 100) << 1;
38032Speter    std::memcpy(__result + __olength - __i - 1, __DIGIT_TABLE + __c0, 2);
64565Sgshapiro    std::memcpy(__result + __olength - __i - 3, __DIGIT_TABLE + __c1, 2);
38032Speter    __i += 4;
38032Speter  }
38032Speter  if (_Output >= 100) {
38032Speter    const uint32_t __c = (_Output % 100) << 1;
38032Speter    _Output /= 100;
38032Speter    std::memcpy(__result + __olength - __i - 1, __DIGIT_TABLE + __c, 2);
38032Speter    __i += 2;
38032Speter  }
38032Speter  if (_Output >= 10) {
38032Speter    const uint32_t __c = _Output << 1;
38032Speter    // We can't use memcpy here: the decimal dot goes between these two digits.
64565Sgshapiro    __result[2] = __DIGIT_TABLE[__c + 1];
64565Sgshapiro    __result[0] = __DIGIT_TABLE[__c];
64565Sgshapiro  } else {
38032Speter    __result[0] = static_cast<char>('0' + _Output);
38032Speter  }
38032Speter
38032Speter  // Print decimal point if needed.
38032Speter  uint32_t __index;
38032Speter  if (__olength > 1) {
64565Sgshapiro    __result[1] = '.';
38032Speter    __index = __olength + 1;
38032Speter  } else {
64565Sgshapiro    __index = 1;
38032Speter  }
38032Speter
38032Speter  // Print the exponent.
38032Speter  __result[__index++] = 'e';
38032Speter  if (_Scientific_exponent < 0) {
38032Speter    __result[__index++] = '-';
38032Speter    _Scientific_exponent = -_Scientific_exponent;
38032Speter  } else {
38032Speter    __result[__index++] = '+';
38032Speter  }
38032Speter
38032Speter  std::memcpy(__result + __index, __DIGIT_TABLE + 2 * _Scientific_exponent, 2);
38032Speter  __index += 2;
38032Speter
38032Speter  return { _First + _Total_scientific_length, errc{} };
38032Speter}
38032Speter
38032Speter[[nodiscard]] to_chars_result __f2s_buffered_n(char* const _First, char* const _Last, const float __f,
38032Speter  const chars_format _Fmt) {
64565Sgshapiro
38032Speter  // Step 1: Decode the floating-point number, and unify normalized and subnormal cases.
38032Speter  const uint32_t __bits = __float_to_bits(__f);
38032Speter
38032Speter  // Case distinction; exit early for the easy cases.
64565Sgshapiro  if (__bits == 0) {
38032Speter    if (_Fmt == chars_format::scientific) {
38032Speter      if (_Last - _First < 5) {
38032Speter        return { _Last, errc::value_too_large };
38032Speter      }
38032Speter
38032Speter      std::memcpy(_First, "0e+00", 5);
38032Speter
38032Speter      return { _First + 5, errc{} };
38032Speter    }
64565Sgshapiro
38032Speter    // Print "0" for chars_format::fixed, chars_format::general, and chars_format{}.
38032Speter    if (_First == _Last) {
38032Speter      return { _Last, errc::value_too_large };
38032Speter    }
64565Sgshapiro
38032Speter    *_First = '0';
38032Speter
38032Speter    return { _First + 1, errc{} };
38032Speter  }
38032Speter
38032Speter  // Decode __bits into mantissa and exponent.
64565Sgshapiro  const uint32_t __ieeeMantissa = __bits & ((1u << __FLOAT_MANTISSA_BITS) - 1);
38032Speter  const uint32_t __ieeeExponent = __bits >> __FLOAT_MANTISSA_BITS;
38032Speter
38032Speter  // When _Fmt == chars_format::fixed and the floating-point number is a large integer,
64565Sgshapiro  // it's faster to skip Ryu and immediately print the integer exactly.
38032Speter  if (_Fmt == chars_format::fixed) {
38032Speter    const uint32_t _Mantissa2 = __ieeeMantissa | (1u << __FLOAT_MANTISSA_BITS); // restore implicit bit
42580Speter    const int32_t _Exponent2 = static_cast<int32_t>(__ieeeExponent)
38032Speter      - __FLOAT_BIAS - __FLOAT_MANTISSA_BITS; // bias and normalization
38032Speter
38032Speter    // Normal values are equal to _Mantissa2 * 2^_Exponent2.
38032Speter    // (Subnormals are different, but they'll be rejected by the _Exponent2 test here, so they can be ignored.)
38032Speter
38032Speter    if (_Exponent2 > 0) {
38032Speter      return _Large_integer_to_chars(_First, _Last, _Mantissa2, _Exponent2);
38032Speter    }
38032Speter  }
38032Speter
38032Speter  const __floating_decimal_32 __v = __f2d(__ieeeMantissa, __ieeeExponent);
38032Speter  return __to_chars(_First, _Last, __v, _Fmt, __ieeeMantissa, __ieeeExponent);
38032Speter}
64565Sgshapiro
38032Speter_LIBCPP_END_NAMESPACE_STD
38032Speter
38032Speter// clang-format on
64565Sgshapiro