gnu-sort/lib/human.c

98038Sache/* human.c -- print human readable file size
98038Sache
133543Stjr   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
133543Stjr   Free Software Foundation, Inc.
98038Sache
98038Sache   This program is free software; you can redistribute it and/or modify
98038Sache   it under the terms of the GNU General Public License as published by
98038Sache   the Free Software Foundation; either version 2, or (at your option)
98038Sache   any later version.
98038Sache
98038Sache   This program is distributed in the hope that it will be useful,
98038Sache   but WITHOUT ANY WARRANTY; without even the implied warranty of
98038Sache   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
98038Sache   GNU General Public License for more details.
98038Sache
98038Sache   You should have received a copy of the GNU General Public License
98038Sache   along with this program; if not, write to the Free Software Foundation,
98038Sache   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
98038Sache
131447Stjr/* Written by Paul Eggert and Larry McVoy.  */
98038Sache
98038Sache#if HAVE_CONFIG_H
98038Sache# include <config.h>
98038Sache#endif
98038Sache
131447Stjr#include "human.h"
98038Sache
131447Stjr#ifndef SIZE_MAX
131447Stjr# define SIZE_MAX ((size_t) -1)
98038Sache#endif
131447Stjr#ifndef UINTMAX_MAX
131447Stjr# define UINTMAX_MAX ((uintmax_t) -1)
98038Sache#endif
98038Sache
131447Stjr#if HAVE_LOCALE_H && HAVE_LOCALECONV
131447Stjr# include <locale.h>
98038Sache#endif
98038Sache
131447Stjr#include <stdio.h>
131447Stjr#include <stdlib.h>
131447Stjr#include <string.h>
98038Sache
131447Stjr#include "gettext.h"
131447Stjr#define _(msgid) gettext (msgid)
98038Sache
98038Sache#include <argmatch.h>
98038Sache#include <error.h>
98038Sache#include <xstrtol.h>
98038Sache
131447Stjr/* The maximum length of a suffix like "KiB".  */
131447Stjr#define HUMAN_READABLE_SUFFIX_LENGTH_MAX 3
98038Sache
131447Stjrstatic const char power_letter[] =
98038Sache{
98038Sache  0,	/* not used */
98038Sache  'K',	/* kibi ('k' for kilo is a special case) */
98038Sache  'M',	/* mega or mebi */
98038Sache  'G',	/* giga or gibi */
98038Sache  'T',	/* tera or tebi */
98038Sache  'P',	/* peta or pebi */
98038Sache  'E',	/* exa or exbi */
98038Sache  'Z',	/* zetta or 2**70 */
98038Sache  'Y'	/* yotta or 2**80 */
98038Sache};
98038Sache
98038Sache
131447Stjr/* If INEXACT_STYLE is not human_round_to_nearest, and if easily
131447Stjr   possible, adjust VALUE according to the style.  */
98038Sache
131447Stjrstatic long double
131447Stjradjust_value (int inexact_style, long double value)
98038Sache{
131447Stjr  /* Do not use the floorl or ceill functions, as that would mean
131447Stjr     checking for their presence and possibly linking with the
131447Stjr     standard math library, which is a porting pain.  So leave the
131447Stjr     value alone if it is too large to easily round.  */
131447Stjr  if (inexact_style != human_round_to_nearest && value < UINTMAX_MAX)
98038Sache    {
98038Sache      uintmax_t u = value;
98038Sache      value = u + (inexact_style == human_ceiling && u != value);
98038Sache    }
98038Sache
98038Sache  return value;
98038Sache}
98038Sache
131447Stjr/* Group the digits of NUMBER according to the grouping rules of the
131447Stjr   current locale.  NUMBER contains NUMBERLEN digits.  Modify the
131447Stjr   bytes pointed to by NUMBER in place, subtracting 1 from NUMBER for
131447Stjr   each byte inserted.  Return the starting address of the modified
131447Stjr   number.
131447Stjr
131447Stjr   To group the digits, use GROUPING and THOUSANDS_SEP as in `struct
131447Stjr   lconv' from <locale.h>.  */
131447Stjr
131447Stjrstatic char *
131447Stjrgroup_number (char *number, size_t numberlen,
131447Stjr	      char const *grouping, char const *thousands_sep)
98038Sache{
131447Stjr  register char *d;
131447Stjr  size_t grouplen = SIZE_MAX;
131447Stjr  size_t thousands_seplen = strlen (thousands_sep);
131447Stjr  size_t i = numberlen;
131447Stjr
131447Stjr  /* The maximum possible value for NUMBERLEN is the number of digits
131447Stjr     in the square of the largest uintmax_t, so double the size of
131447Stjr     uintmax_t before converting to a bound.  302 / 1000 is ceil
131447Stjr     (log10 (2.0)).  Add 1 for integer division truncation.  */
131447Stjr  char buf[2 * sizeof (uintmax_t) * CHAR_BIT * 302 / 1000 + 1];
131447Stjr
131447Stjr  memcpy (buf, number, numberlen);
131447Stjr  d = number + numberlen;
131447Stjr
131447Stjr  for (;;)
131447Stjr    {
131447Stjr      unsigned char g = *grouping;
131447Stjr
131447Stjr      if (g)
131447Stjr	{
131447Stjr	  grouplen = g < CHAR_MAX ? g : i;
131447Stjr	  grouping++;
131447Stjr	}
131447Stjr
131447Stjr      if (i < grouplen)
131447Stjr	grouplen = i;
131447Stjr
131447Stjr      d -= grouplen;
131447Stjr      i -= grouplen;
131447Stjr      memcpy (d, buf + i, grouplen);
131447Stjr      if (i == 0)
131447Stjr	return d;
131447Stjr
131447Stjr      d -= thousands_seplen;
131447Stjr      memcpy (d, thousands_sep, thousands_seplen);
131447Stjr    }
98038Sache}
98038Sache
131447Stjr/* Convert N to a human readable format in BUF, using the options OPTS.
98038Sache
98038Sache   N is expressed in units of FROM_BLOCK_SIZE.  FROM_BLOCK_SIZE must
98038Sache   be nonnegative.
98038Sache
131447Stjr   Use units of TO_BLOCK_SIZE in the output number.  TO_BLOCK_SIZE
131447Stjr   must be positive.
98038Sache
131447Stjr   Use (OPTS & (human_round_to_nearest | human_floor | human_ceiling))
131447Stjr   to determine whether to take the ceiling or floor of any result
131447Stjr   that cannot be expressed exactly.
98038Sache
131447Stjr   If (OPTS & human_group_digits), group the thousands digits
131447Stjr   according to the locale, e.g., `1,000,000' in an American English
131447Stjr   locale.
98038Sache
131447Stjr   If (OPTS & human_autoscale), deduce the output block size
131447Stjr   automatically; TO_BLOCK_SIZE must be 1 but it has no effect on the
131447Stjr   output.  Use powers of 1024 if (OPTS & human_base_1024), and powers
131447Stjr   of 1000 otherwise.  For example, assuming powers of 1024, 8500
131447Stjr   would be converted to 8.3, 133456345 to 127, 56990456345 to 53, and
131447Stjr   so on.  Numbers smaller than the power aren't modified.
131447Stjr   human_autoscale is normally used together with human_SI.
131447Stjr
131447Stjr   If (OPTS & human_SI), append an SI prefix indicating which power is
131447Stjr   being used.  If in addition (OPTS & human_B), append "B" (if base
131447Stjr   1000) or "iB" (if base 1024) to the SI prefix.  When ((OPTS &
131447Stjr   human_SI) && ! (OPTS & human_autoscale)), TO_BLOCK_SIZE must be a
131447Stjr   power of 1024 or of 1000, depending on (OPTS &
131447Stjr   human_base_1024).  */
131447Stjr
98038Sachechar *
131447Stjrhuman_readable (uintmax_t n, char *buf, int opts,
131447Stjr		uintmax_t from_block_size, uintmax_t to_block_size)
98038Sache{
131447Stjr  int inexact_style =
131447Stjr    opts & (human_round_to_nearest | human_floor | human_ceiling);
131447Stjr  unsigned int base = opts & human_base_1024 ? 1024 : 1000;
98038Sache  uintmax_t amt;
131447Stjr  int tenths;
131447Stjr  int exponent = -1;
131447Stjr  int exponent_max = sizeof power_letter - 1;
98038Sache  char *p;
131447Stjr  char *psuffix;
131447Stjr  char const *integerlim;
98038Sache
98038Sache  /* 0 means adjusted N == AMT.TENTHS;
98038Sache     1 means AMT.TENTHS < adjusted N < AMT.TENTHS + 0.05;
98038Sache     2 means adjusted N == AMT.TENTHS + 0.05;
98038Sache     3 means AMT.TENTHS + 0.05 < adjusted N < AMT.TENTHS + 0.1.  */
131447Stjr  int rounding;
98038Sache
131447Stjr  char const *decimal_point = ".";
131447Stjr  size_t decimal_pointlen = 1;
131447Stjr  char const *grouping = "";
131447Stjr  char const *thousands_sep = "";
131447Stjr#if HAVE_LOCALE_H && HAVE_LOCALECONV
131447Stjr  struct lconv const *l = localeconv ();
131447Stjr  size_t pointlen = strlen (l->decimal_point);
131447Stjr  if (0 < pointlen && pointlen <= MB_LEN_MAX)
98038Sache    {
131447Stjr      decimal_point = l->decimal_point;
131447Stjr      decimal_pointlen = pointlen;
98038Sache    }
131447Stjr  grouping = l->grouping;
131447Stjr  if (strlen (l->thousands_sep) <= MB_LEN_MAX)
131447Stjr    thousands_sep = l->thousands_sep;
131447Stjr#endif
131447Stjr
131447Stjr  psuffix = buf + LONGEST_HUMAN_READABLE - HUMAN_READABLE_SUFFIX_LENGTH_MAX;
131447Stjr  p = psuffix;
131447Stjr
131447Stjr  /* Adjust AMT out of FROM_BLOCK_SIZE units and into TO_BLOCK_SIZE
131447Stjr     units.  If this can be done exactly with integer arithmetic, do
131447Stjr     not use floating point operations.  */
131447Stjr  if (to_block_size <= from_block_size)
98038Sache    {
131447Stjr      if (from_block_size % to_block_size == 0)
131447Stjr	{
131447Stjr	  uintmax_t multiplier = from_block_size / to_block_size;
131447Stjr	  amt = n * multiplier;
131447Stjr	  if (amt / multiplier == n)
131447Stjr	    {
131447Stjr	      tenths = 0;
131447Stjr	      rounding = 0;
131447Stjr	      goto use_integer_arithmetic;
131447Stjr	    }
131447Stjr	}
98038Sache    }
131447Stjr  else if (from_block_size != 0 && to_block_size % from_block_size == 0)
131447Stjr    {
131447Stjr      uintmax_t divisor = to_block_size / from_block_size;
131447Stjr      uintmax_t r10 = (n % divisor) * 10;
131447Stjr      uintmax_t r2 = (r10 % divisor) * 2;
131447Stjr      amt = n / divisor;
131447Stjr      tenths = r10 / divisor;
131447Stjr      rounding = r2 < divisor ? 0 < r2 : 2 + (divisor < r2);
131447Stjr      goto use_integer_arithmetic;
131447Stjr    }
98038Sache
131447Stjr  {
131447Stjr    /* Either the result cannot be computed easily using uintmax_t,
131447Stjr       or from_block_size is zero.  Fall back on floating point.
131447Stjr       FIXME: This can yield answers that are slightly off.  */
98038Sache
131447Stjr    long double dto_block_size = to_block_size;
131447Stjr    long double damt = n * (from_block_size / dto_block_size);
131447Stjr    size_t buflen;
131447Stjr    size_t nonintegerlen;
98038Sache
131447Stjr    if (! (opts & human_autoscale))
131447Stjr      {
131447Stjr	sprintf (buf, "%.0Lf", adjust_value (inexact_style, damt));
131447Stjr	buflen = strlen (buf);
131447Stjr	nonintegerlen = 0;
131447Stjr      }
131447Stjr    else
131447Stjr      {
131447Stjr	long double e = 1;
131447Stjr	exponent = 0;
98038Sache
131447Stjr	do
131447Stjr	  {
131447Stjr	    e *= base;
131447Stjr	    exponent++;
131447Stjr	  }
131447Stjr	while (e * base <= damt && exponent < exponent_max);
131447Stjr
131447Stjr	damt /= e;
131447Stjr
131447Stjr	sprintf (buf, "%.1Lf", adjust_value (inexact_style, damt));
131447Stjr	buflen = strlen (buf);
131447Stjr	nonintegerlen = decimal_pointlen + 1;
131447Stjr
131447Stjr	if (1 + nonintegerlen + ! (opts & human_base_1024) < buflen
131447Stjr	    || ((opts & human_suppress_point_zero)
131447Stjr		&& buf[buflen - 1] == '0'))
131447Stjr	  {
131447Stjr	    sprintf (buf, "%.0Lf",
131447Stjr		     adjust_value (inexact_style, damt * 10) / 10);
131447Stjr	    buflen = strlen (buf);
131447Stjr	    nonintegerlen = 0;
131447Stjr	  }
131447Stjr      }
131447Stjr
131447Stjr    p = psuffix - buflen;
131447Stjr    memmove (p, buf, buflen);
131447Stjr    integerlim = p + buflen - nonintegerlen;
131447Stjr  }
131447Stjr  goto do_grouping;
131447Stjr
131447Stjr use_integer_arithmetic:
98038Sache  {
131447Stjr    /* The computation can be done exactly, with integer arithmetic.
131447Stjr
131447Stjr       Use power of BASE notation if requested and if adjusted AMT is
131447Stjr       large enough.  */
131447Stjr
131447Stjr    if (opts & human_autoscale)
98038Sache      {
131447Stjr	exponent = 0;
98038Sache
131447Stjr	if (base <= amt)
98038Sache	  {
98038Sache	    do
98038Sache	      {
133543Stjr		unsigned int r10 = (amt % base) * 10 + tenths;
133543Stjr		unsigned int r2 = (r10 % base) * 2 + (rounding >> 1);
131447Stjr		amt /= base;
131447Stjr		tenths = r10 / base;
131447Stjr		rounding = (r2 < base
131447Stjr			    ? (r2 + rounding) != 0
131447Stjr			    : 2 + (base < r2 + rounding));
131447Stjr		exponent++;
98038Sache	      }
131447Stjr	    while (base <= amt && exponent < exponent_max);
98038Sache
131447Stjr	    if (amt < 10)
131447Stjr	      {
131447Stjr		if (inexact_style == human_round_to_nearest
131447Stjr		    ? 2 < rounding + (tenths & 1)
131447Stjr		    : inexact_style == human_ceiling && 0 < rounding)
131447Stjr		  {
131447Stjr		    tenths++;
131447Stjr		    rounding = 0;
98038Sache
131447Stjr		    if (tenths == 10)
131447Stjr		      {
131447Stjr			amt++;
131447Stjr			tenths = 0;
131447Stjr		      }
131447Stjr		  }
131447Stjr
131447Stjr		if (amt < 10
131447Stjr		    && (tenths || ! (opts & human_suppress_point_zero)))
131447Stjr		  {
131447Stjr		    *--p = '0' + tenths;
131447Stjr		    p -= decimal_pointlen;
131447Stjr		    memcpy (p, decimal_point, decimal_pointlen);
131447Stjr		    tenths = rounding = 0;
131447Stjr		  }
131447Stjr	      }
98038Sache	  }
131447Stjr      }
98038Sache
131447Stjr    if (inexact_style == human_round_to_nearest
131447Stjr	? 5 < tenths + (0 < rounding + (amt & 1))
131447Stjr	: inexact_style == human_ceiling && 0 < tenths + rounding)
131447Stjr      {
131447Stjr	amt++;
131447Stjr
131447Stjr	if ((opts & human_autoscale)
131447Stjr	    && amt == base && exponent < exponent_max)
131447Stjr	  {
131447Stjr	    exponent++;
131447Stjr	    if (! (opts & human_suppress_point_zero))
131447Stjr	      {
131447Stjr		*--p = '0';
131447Stjr		p -= decimal_pointlen;
131447Stjr		memcpy (p, decimal_point, decimal_pointlen);
131447Stjr	      }
131447Stjr	    amt = 1;
131447Stjr	  }
98038Sache      }
131447Stjr
131447Stjr    integerlim = p;
131447Stjr
131447Stjr    do
131447Stjr      {
131447Stjr	int digit = amt % 10;
131447Stjr	*--p = digit + '0';
131447Stjr      }
131447Stjr    while ((amt /= 10) != 0);
98038Sache  }
98038Sache
131447Stjr do_grouping:
131447Stjr  if (opts & human_group_digits)
131447Stjr    p = group_number (p, integerlim - p, grouping, thousands_sep);
98038Sache
131447Stjr  if (opts & human_SI)
98038Sache    {
131447Stjr      if (exponent < 0)
98038Sache	{
131447Stjr	  uintmax_t power;
131447Stjr	  exponent = 0;
131447Stjr	  for (power = 1; power < to_block_size; power *= base)
131447Stjr	    if (++exponent == exponent_max)
131447Stjr	      break;
98038Sache	}
98038Sache
131447Stjr      if (exponent)
131447Stjr	*psuffix++ = (! (opts & human_base_1024) && exponent == 1
131447Stjr		      ? 'k'
131447Stjr		      : power_letter[exponent]);
98038Sache
131447Stjr      if (opts & human_B)
98038Sache	{
131447Stjr	  if ((opts & human_base_1024) && exponent)
131447Stjr	    *psuffix++ = 'i';
131447Stjr	  *psuffix++ = 'B';
98038Sache	}
98038Sache    }
98038Sache
131447Stjr  *psuffix = '\0';
98038Sache
98038Sache  return p;
98038Sache}
98038Sache
98038Sache
98038Sache/* The default block size used for output.  This number may change in
98038Sache   the future as disks get larger.  */
98038Sache#ifndef DEFAULT_BLOCK_SIZE
98038Sache# define DEFAULT_BLOCK_SIZE 1024
98038Sache#endif
98038Sache
98038Sachestatic char const *const block_size_args[] = { "human-readable", "si", 0 };
131447Stjrstatic int const block_size_opts[] =
131447Stjr  {
131447Stjr    human_autoscale + human_SI + human_base_1024,
131447Stjr    human_autoscale + human_SI
131447Stjr  };
98038Sache
131447Stjrstatic uintmax_t
98038Sachedefault_block_size (void)
98038Sache{
98038Sache  return getenv ("POSIXLY_CORRECT") ? 512 : DEFAULT_BLOCK_SIZE;
98038Sache}
98038Sache
98038Sachestatic strtol_error
131447Stjrhumblock (char const *spec, uintmax_t *block_size, int *options)
98038Sache{
98038Sache  int i;
131447Stjr  int opts = 0;
98038Sache
133543Stjr  if (! spec
133543Stjr      && ! (spec = getenv ("BLOCK_SIZE"))
133543Stjr      && ! (spec = getenv ("BLOCKSIZE")))
98038Sache    *block_size = default_block_size ();
98038Sache  else
98038Sache    {
131447Stjr      if (*spec == '\'')
131447Stjr	{
131447Stjr	  opts |= human_group_digits;
131447Stjr	  spec++;
131447Stjr	}
131447Stjr
131447Stjr      if (0 <= (i = ARGMATCH (spec, block_size_args, block_size_opts)))
131447Stjr	{
131447Stjr	  opts |= block_size_opts[i];
131447Stjr	  *block_size = 1;
131447Stjr	}
131447Stjr      else
131447Stjr	{
131447Stjr	  char *ptr;
131447Stjr	  strtol_error e = xstrtoumax (spec, &ptr, 0, block_size,
131447Stjr				       "eEgGkKmMpPtTyYzZ0");
131447Stjr	  if (e != LONGINT_OK)
131447Stjr	    return e;
131447Stjr	  for (; ! ('0' <= *spec && *spec <= '9'); spec++)
131447Stjr	    if (spec == ptr)
131447Stjr	      {
131447Stjr		opts |= human_SI;
131447Stjr		if (ptr[-1] == 'B')
131447Stjr		  opts |= human_B;
131447Stjr		if (ptr[-1] != 'B' || ptr[-2] == 'i')
131447Stjr		  opts |= human_base_1024;
131447Stjr		break;
131447Stjr	      }
131447Stjr	}
98038Sache    }
98038Sache
131447Stjr  *options = opts;
98038Sache  return LONGINT_OK;
98038Sache}
98038Sache
131447Stjrint
131447Stjrhuman_options (char const *spec, bool report_errors, uintmax_t *block_size)
98038Sache{
131447Stjr  int opts;
131447Stjr  strtol_error e = humblock (spec, block_size, &opts);
98038Sache  if (*block_size == 0)
98038Sache    {
98038Sache      *block_size = default_block_size ();
98038Sache      e = LONGINT_INVALID;
98038Sache    }
98038Sache  if (e != LONGINT_OK && report_errors)
98038Sache    STRTOL_FATAL_ERROR (spec, _("block size"), e);
131447Stjr  return opts;
98038Sache}