1/* human.c -- print human readable file size
2
3   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4   2005, 2006 Free Software Foundation, Inc.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software Foundation,
18   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
19
20/* Written by Paul Eggert and Larry McVoy.  */
21
22#include <config.h>
23
24#include "human.h"
25
26#include <locale.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30
31#include "gettext.h"
32#define _(msgid) gettext (msgid)
33
34#include <argmatch.h>
35#include <error.h>
36#include <intprops.h>
37#include <xstrtol.h>
38
39/* The maximum length of a suffix like "KiB".  */
40#define HUMAN_READABLE_SUFFIX_LENGTH_MAX 3
41
42static const char power_letter[] =
43{
44  0,	/* not used */
45  'K',	/* kibi ('k' for kilo is a special case) */
46  'M',	/* mega or mebi */
47  'G',	/* giga or gibi */
48  'T',	/* tera or tebi */
49  'P',	/* peta or pebi */
50  'E',	/* exa or exbi */
51  'Z',	/* zetta or 2**70 */
52  'Y'	/* yotta or 2**80 */
53};
54
55
56/* If INEXACT_STYLE is not human_round_to_nearest, and if easily
57   possible, adjust VALUE according to the style.  */
58
59static long double
60adjust_value (int inexact_style, long double value)
61{
62  /* Do not use the floorl or ceill functions, as that would mean
63     checking for their presence and possibly linking with the
64     standard math library, which is a porting pain.  So leave the
65     value alone if it is too large to easily round.  */
66  if (inexact_style != human_round_to_nearest && value < UINTMAX_MAX)
67    {
68      uintmax_t u = value;
69      value = u + (inexact_style == human_ceiling && u != value);
70    }
71
72  return value;
73}
74
75/* Group the digits of NUMBER according to the grouping rules of the
76   current locale.  NUMBER contains NUMBERLEN digits.  Modify the
77   bytes pointed to by NUMBER in place, subtracting 1 from NUMBER for
78   each byte inserted.  Return the starting address of the modified
79   number.
80
81   To group the digits, use GROUPING and THOUSANDS_SEP as in `struct
82   lconv' from <locale.h>.  */
83
84static char *
85group_number (char *number, size_t numberlen,
86	      char const *grouping, char const *thousands_sep)
87{
88  register char *d;
89  size_t grouplen = SIZE_MAX;
90  size_t thousands_seplen = strlen (thousands_sep);
91  size_t i = numberlen;
92
93  /* The maximum possible value for NUMBERLEN is the number of digits
94     in the square of the largest uintmax_t, so double the size needed.  */
95  char buf[2 * INT_STRLEN_BOUND (uintmax_t) + 1];
96
97  memcpy (buf, number, numberlen);
98  d = number + numberlen;
99
100  for (;;)
101    {
102      unsigned char g = *grouping;
103
104      if (g)
105	{
106	  grouplen = g < CHAR_MAX ? g : i;
107	  grouping++;
108	}
109
110      if (i < grouplen)
111	grouplen = i;
112
113      d -= grouplen;
114      i -= grouplen;
115      memcpy (d, buf + i, grouplen);
116      if (i == 0)
117	return d;
118
119      d -= thousands_seplen;
120      memcpy (d, thousands_sep, thousands_seplen);
121    }
122}
123
124/* Convert N to a human readable format in BUF, using the options OPTS.
125
126   N is expressed in units of FROM_BLOCK_SIZE.  FROM_BLOCK_SIZE must
127   be nonnegative.
128
129   Use units of TO_BLOCK_SIZE in the output number.  TO_BLOCK_SIZE
130   must be positive.
131
132   Use (OPTS & (human_round_to_nearest | human_floor | human_ceiling))
133   to determine whether to take the ceiling or floor of any result
134   that cannot be expressed exactly.
135
136   If (OPTS & human_group_digits), group the thousands digits
137   according to the locale, e.g., `1,000,000' in an American English
138   locale.
139
140   If (OPTS & human_autoscale), deduce the output block size
141   automatically; TO_BLOCK_SIZE must be 1 but it has no effect on the
142   output.  Use powers of 1024 if (OPTS & human_base_1024), and powers
143   of 1000 otherwise.  For example, assuming powers of 1024, 8500
144   would be converted to 8.3, 133456345 to 127, 56990456345 to 53, and
145   so on.  Numbers smaller than the power aren't modified.
146   human_autoscale is normally used together with human_SI.
147
148   If (OPTS & human_space_before_unit), use a space to separate the
149   number from any suffix that is appended as described below.
150
151   If (OPTS & human_SI), append an SI prefix indicating which power is
152   being used.  If in addition (OPTS & human_B), append "B" (if base
153   1000) or "iB" (if base 1024) to the SI prefix.  When ((OPTS &
154   human_SI) && ! (OPTS & human_autoscale)), TO_BLOCK_SIZE must be a
155   power of 1024 or of 1000, depending on (OPTS &
156   human_base_1024).  */
157
158char *
159human_readable (uintmax_t n, char *buf, int opts,
160		uintmax_t from_block_size, uintmax_t to_block_size)
161{
162  int inexact_style =
163    opts & (human_round_to_nearest | human_floor | human_ceiling);
164  unsigned int base = opts & human_base_1024 ? 1024 : 1000;
165  uintmax_t amt;
166  int tenths;
167  int exponent = -1;
168  int exponent_max = sizeof power_letter - 1;
169  char *p;
170  char *psuffix;
171  char const *integerlim;
172
173  /* 0 means adjusted N == AMT.TENTHS;
174     1 means AMT.TENTHS < adjusted N < AMT.TENTHS + 0.05;
175     2 means adjusted N == AMT.TENTHS + 0.05;
176     3 means AMT.TENTHS + 0.05 < adjusted N < AMT.TENTHS + 0.1.  */
177  int rounding;
178
179  char const *decimal_point = ".";
180  size_t decimal_pointlen = 1;
181  char const *grouping = "";
182  char const *thousands_sep = "";
183  struct lconv const *l = localeconv ();
184  size_t pointlen = strlen (l->decimal_point);
185  if (0 < pointlen && pointlen <= MB_LEN_MAX)
186    {
187      decimal_point = l->decimal_point;
188      decimal_pointlen = pointlen;
189    }
190  grouping = l->grouping;
191  if (strlen (l->thousands_sep) <= MB_LEN_MAX)
192    thousands_sep = l->thousands_sep;
193
194  psuffix = buf + LONGEST_HUMAN_READABLE - HUMAN_READABLE_SUFFIX_LENGTH_MAX;
195  p = psuffix;
196
197  /* Adjust AMT out of FROM_BLOCK_SIZE units and into TO_BLOCK_SIZE
198     units.  If this can be done exactly with integer arithmetic, do
199     not use floating point operations.  */
200  if (to_block_size <= from_block_size)
201    {
202      if (from_block_size % to_block_size == 0)
203	{
204	  uintmax_t multiplier = from_block_size / to_block_size;
205	  amt = n * multiplier;
206	  if (amt / multiplier == n)
207	    {
208	      tenths = 0;
209	      rounding = 0;
210	      goto use_integer_arithmetic;
211	    }
212	}
213    }
214  else if (from_block_size != 0 && to_block_size % from_block_size == 0)
215    {
216      uintmax_t divisor = to_block_size / from_block_size;
217      uintmax_t r10 = (n % divisor) * 10;
218      uintmax_t r2 = (r10 % divisor) * 2;
219      amt = n / divisor;
220      tenths = r10 / divisor;
221      rounding = r2 < divisor ? 0 < r2 : 2 + (divisor < r2);
222      goto use_integer_arithmetic;
223    }
224
225  {
226    /* Either the result cannot be computed easily using uintmax_t,
227       or from_block_size is zero.  Fall back on floating point.
228       FIXME: This can yield answers that are slightly off.  */
229
230    long double dto_block_size = to_block_size;
231    long double damt = n * (from_block_size / dto_block_size);
232    size_t buflen;
233    size_t nonintegerlen;
234
235    if (! (opts & human_autoscale))
236      {
237	sprintf (buf, "%.0Lf", adjust_value (inexact_style, damt));
238	buflen = strlen (buf);
239	nonintegerlen = 0;
240      }
241    else
242      {
243	long double e = 1;
244	exponent = 0;
245
246	do
247	  {
248	    e *= base;
249	    exponent++;
250	  }
251	while (e * base <= damt && exponent < exponent_max);
252
253	damt /= e;
254
255	sprintf (buf, "%.1Lf", adjust_value (inexact_style, damt));
256	buflen = strlen (buf);
257	nonintegerlen = decimal_pointlen + 1;
258
259	if (1 + nonintegerlen + ! (opts & human_base_1024) < buflen
260	    || ((opts & human_suppress_point_zero)
261		&& buf[buflen - 1] == '0'))
262	  {
263	    sprintf (buf, "%.0Lf",
264		     adjust_value (inexact_style, damt * 10) / 10);
265	    buflen = strlen (buf);
266	    nonintegerlen = 0;
267	  }
268      }
269
270    p = psuffix - buflen;
271    memmove (p, buf, buflen);
272    integerlim = p + buflen - nonintegerlen;
273  }
274  goto do_grouping;
275
276 use_integer_arithmetic:
277  {
278    /* The computation can be done exactly, with integer arithmetic.
279
280       Use power of BASE notation if requested and if adjusted AMT is
281       large enough.  */
282
283    if (opts & human_autoscale)
284      {
285	exponent = 0;
286
287	if (base <= amt)
288	  {
289	    do
290	      {
291		unsigned int r10 = (amt % base) * 10 + tenths;
292		unsigned int r2 = (r10 % base) * 2 + (rounding >> 1);
293		amt /= base;
294		tenths = r10 / base;
295		rounding = (r2 < base
296			    ? (r2 + rounding) != 0
297			    : 2 + (base < r2 + rounding));
298		exponent++;
299	      }
300	    while (base <= amt && exponent < exponent_max);
301
302	    if (amt < 10)
303	      {
304		if (inexact_style == human_round_to_nearest
305		    ? 2 < rounding + (tenths & 1)
306		    : inexact_style == human_ceiling && 0 < rounding)
307		  {
308		    tenths++;
309		    rounding = 0;
310
311		    if (tenths == 10)
312		      {
313			amt++;
314			tenths = 0;
315		      }
316		  }
317
318		if (amt < 10
319		    && (tenths || ! (opts & human_suppress_point_zero)))
320		  {
321		    *--p = '0' + tenths;
322		    p -= decimal_pointlen;
323		    memcpy (p, decimal_point, decimal_pointlen);
324		    tenths = rounding = 0;
325		  }
326	      }
327	  }
328      }
329
330    if (inexact_style == human_round_to_nearest
331	? 5 < tenths + (0 < rounding + (amt & 1))
332	: inexact_style == human_ceiling && 0 < tenths + rounding)
333      {
334	amt++;
335
336	if ((opts & human_autoscale)
337	    && amt == base && exponent < exponent_max)
338	  {
339	    exponent++;
340	    if (! (opts & human_suppress_point_zero))
341	      {
342		*--p = '0';
343		p -= decimal_pointlen;
344		memcpy (p, decimal_point, decimal_pointlen);
345	      }
346	    amt = 1;
347	  }
348      }
349
350    integerlim = p;
351
352    do
353      {
354	int digit = amt % 10;
355	*--p = digit + '0';
356      }
357    while ((amt /= 10) != 0);
358  }
359
360 do_grouping:
361  if (opts & human_group_digits)
362    p = group_number (p, integerlim - p, grouping, thousands_sep);
363
364  if (opts & human_SI)
365    {
366      if (exponent < 0)
367	{
368	  uintmax_t power;
369	  exponent = 0;
370	  for (power = 1; power < to_block_size; power *= base)
371	    if (++exponent == exponent_max)
372	      break;
373	}
374
375      if ((exponent | (opts & human_B)) && (opts & human_space_before_unit))
376	*psuffix++ = ' ';
377
378      if (exponent)
379	*psuffix++ = (! (opts & human_base_1024) && exponent == 1
380		      ? 'k'
381		      : power_letter[exponent]);
382
383      if (opts & human_B)
384	{
385	  if ((opts & human_base_1024) && exponent)
386	    *psuffix++ = 'i';
387	  *psuffix++ = 'B';
388	}
389    }
390
391  *psuffix = '\0';
392
393  return p;
394}
395
396
397/* The default block size used for output.  This number may change in
398   the future as disks get larger.  */
399#ifndef DEFAULT_BLOCK_SIZE
400# define DEFAULT_BLOCK_SIZE 1024
401#endif
402
403static char const *const block_size_args[] = { "human-readable", "si", 0 };
404static int const block_size_opts[] =
405  {
406    human_autoscale + human_SI + human_base_1024,
407    human_autoscale + human_SI
408  };
409
410static uintmax_t
411default_block_size (void)
412{
413  return getenv ("POSIXLY_CORRECT") ? 512 : DEFAULT_BLOCK_SIZE;
414}
415
416static strtol_error
417humblock (char const *spec, uintmax_t *block_size, int *options)
418{
419  int i;
420  int opts = 0;
421
422  if (! spec
423      && ! (spec = getenv ("BLOCK_SIZE"))
424      && ! (spec = getenv ("BLOCKSIZE")))
425    *block_size = default_block_size ();
426  else
427    {
428      if (*spec == '\'')
429	{
430	  opts |= human_group_digits;
431	  spec++;
432	}
433
434      if (0 <= (i = ARGMATCH (spec, block_size_args, block_size_opts)))
435	{
436	  opts |= block_size_opts[i];
437	  *block_size = 1;
438	}
439      else
440	{
441	  char *ptr;
442	  strtol_error e = xstrtoumax (spec, &ptr, 0, block_size,
443				       "eEgGkKmMpPtTyYzZ0");
444	  if (e != LONGINT_OK)
445	    {
446	      *options = 0;
447	      return e;
448	    }
449	  for (; ! ('0' <= *spec && *spec <= '9'); spec++)
450	    if (spec == ptr)
451	      {
452		opts |= human_SI;
453		if (ptr[-1] == 'B')
454		  opts |= human_B;
455		if (ptr[-1] != 'B' || ptr[-2] == 'i')
456		  opts |= human_base_1024;
457		break;
458	      }
459	}
460    }
461
462  *options = opts;
463  return LONGINT_OK;
464}
465
466int
467human_options (char const *spec, bool report_errors, uintmax_t *block_size)
468{
469  int opts;
470  strtol_error e = humblock (spec, block_size, &opts);
471  if (*block_size == 0)
472    {
473      *block_size = default_block_size ();
474      e = LONGINT_INVALID;
475    }
476  if (e != LONGINT_OK && report_errors)
477    STRTOL_FATAL_ERROR (spec, _("block size"), e);
478  return opts;
479}
480