1/* human.c -- print human readable file size
2
3   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4   2005, 2006, 2007 Free Software Foundation, Inc.
5
6   This program is free software: you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3 of the License, or
9   (at your option) any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
18
19/* Written by Paul Eggert and Larry McVoy.  */
20
21#include <config.h>
22
23#include "human.h"
24
25#include <locale.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include <argmatch.h>
31#include <error.h>
32#include <intprops.h>
33
34/* The maximum length of a suffix like "KiB".  */
35#define HUMAN_READABLE_SUFFIX_LENGTH_MAX 3
36
37static const char power_letter[] =
38{
39  0,	/* not used */
40  'K',	/* kibi ('k' for kilo is a special case) */
41  'M',	/* mega or mebi */
42  'G',	/* giga or gibi */
43  'T',	/* tera or tebi */
44  'P',	/* peta or pebi */
45  'E',	/* exa or exbi */
46  'Z',	/* zetta or 2**70 */
47  'Y'	/* yotta or 2**80 */
48};
49
50
51/* If INEXACT_STYLE is not human_round_to_nearest, and if easily
52   possible, adjust VALUE according to the style.  */
53
54static long double
55adjust_value (int inexact_style, long double value)
56{
57  /* Do not use the floorl or ceill functions, as that would mean
58     checking for their presence and possibly linking with the
59     standard math library, which is a porting pain.  So leave the
60     value alone if it is too large to easily round.  */
61  if (inexact_style != human_round_to_nearest && value < UINTMAX_MAX)
62    {
63      uintmax_t u = value;
64      value = u + (inexact_style == human_ceiling && u != value);
65    }
66
67  return value;
68}
69
70/* Group the digits of NUMBER according to the grouping rules of the
71   current locale.  NUMBER contains NUMBERLEN digits.  Modify the
72   bytes pointed to by NUMBER in place, subtracting 1 from NUMBER for
73   each byte inserted.  Return the starting address of the modified
74   number.
75
76   To group the digits, use GROUPING and THOUSANDS_SEP as in `struct
77   lconv' from <locale.h>.  */
78
79static char *
80group_number (char *number, size_t numberlen,
81	      char const *grouping, char const *thousands_sep)
82{
83  register char *d;
84  size_t grouplen = SIZE_MAX;
85  size_t thousands_seplen = strlen (thousands_sep);
86  size_t i = numberlen;
87
88  /* The maximum possible value for NUMBERLEN is the number of digits
89     in the square of the largest uintmax_t, so double the size needed.  */
90  char buf[2 * INT_STRLEN_BOUND (uintmax_t) + 1];
91
92  memcpy (buf, number, numberlen);
93  d = number + numberlen;
94
95  for (;;)
96    {
97      unsigned char g = *grouping;
98
99      if (g)
100	{
101	  grouplen = g < CHAR_MAX ? g : i;
102	  grouping++;
103	}
104
105      if (i < grouplen)
106	grouplen = i;
107
108      d -= grouplen;
109      i -= grouplen;
110      memcpy (d, buf + i, grouplen);
111      if (i == 0)
112	return d;
113
114      d -= thousands_seplen;
115      memcpy (d, thousands_sep, thousands_seplen);
116    }
117}
118
119/* Convert N to a human readable format in BUF, using the options OPTS.
120
121   N is expressed in units of FROM_BLOCK_SIZE.  FROM_BLOCK_SIZE must
122   be nonnegative.
123
124   Use units of TO_BLOCK_SIZE in the output number.  TO_BLOCK_SIZE
125   must be positive.
126
127   Use (OPTS & (human_round_to_nearest | human_floor | human_ceiling))
128   to determine whether to take the ceiling or floor of any result
129   that cannot be expressed exactly.
130
131   If (OPTS & human_group_digits), group the thousands digits
132   according to the locale, e.g., `1,000,000' in an American English
133   locale.
134
135   If (OPTS & human_autoscale), deduce the output block size
136   automatically; TO_BLOCK_SIZE must be 1 but it has no effect on the
137   output.  Use powers of 1024 if (OPTS & human_base_1024), and powers
138   of 1000 otherwise.  For example, assuming powers of 1024, 8500
139   would be converted to 8.3, 133456345 to 127, 56990456345 to 53, and
140   so on.  Numbers smaller than the power aren't modified.
141   human_autoscale is normally used together with human_SI.
142
143   If (OPTS & human_space_before_unit), use a space to separate the
144   number from any suffix that is appended as described below.
145
146   If (OPTS & human_SI), append an SI prefix indicating which power is
147   being used.  If in addition (OPTS & human_B), append "B" (if base
148   1000) or "iB" (if base 1024) to the SI prefix.  When ((OPTS &
149   human_SI) && ! (OPTS & human_autoscale)), TO_BLOCK_SIZE must be a
150   power of 1024 or of 1000, depending on (OPTS &
151   human_base_1024).  */
152
153char *
154human_readable (uintmax_t n, char *buf, int opts,
155		uintmax_t from_block_size, uintmax_t to_block_size)
156{
157  int inexact_style =
158    opts & (human_round_to_nearest | human_floor | human_ceiling);
159  unsigned int base = opts & human_base_1024 ? 1024 : 1000;
160  uintmax_t amt;
161  int tenths;
162  int exponent = -1;
163  int exponent_max = sizeof power_letter - 1;
164  char *p;
165  char *psuffix;
166  char const *integerlim;
167
168  /* 0 means adjusted N == AMT.TENTHS;
169     1 means AMT.TENTHS < adjusted N < AMT.TENTHS + 0.05;
170     2 means adjusted N == AMT.TENTHS + 0.05;
171     3 means AMT.TENTHS + 0.05 < adjusted N < AMT.TENTHS + 0.1.  */
172  int rounding;
173
174  char const *decimal_point = ".";
175  size_t decimal_pointlen = 1;
176  char const *grouping = "";
177  char const *thousands_sep = "";
178  struct lconv const *l = localeconv ();
179  size_t pointlen = strlen (l->decimal_point);
180  if (0 < pointlen && pointlen <= MB_LEN_MAX)
181    {
182      decimal_point = l->decimal_point;
183      decimal_pointlen = pointlen;
184    }
185  grouping = l->grouping;
186  if (strlen (l->thousands_sep) <= MB_LEN_MAX)
187    thousands_sep = l->thousands_sep;
188
189  psuffix = buf + LONGEST_HUMAN_READABLE - HUMAN_READABLE_SUFFIX_LENGTH_MAX;
190  p = psuffix;
191
192  /* Adjust AMT out of FROM_BLOCK_SIZE units and into TO_BLOCK_SIZE
193     units.  If this can be done exactly with integer arithmetic, do
194     not use floating point operations.  */
195  if (to_block_size <= from_block_size)
196    {
197      if (from_block_size % to_block_size == 0)
198	{
199	  uintmax_t multiplier = from_block_size / to_block_size;
200	  amt = n * multiplier;
201	  if (amt / multiplier == n)
202	    {
203	      tenths = 0;
204	      rounding = 0;
205	      goto use_integer_arithmetic;
206	    }
207	}
208    }
209  else if (from_block_size != 0 && to_block_size % from_block_size == 0)
210    {
211      uintmax_t divisor = to_block_size / from_block_size;
212      uintmax_t r10 = (n % divisor) * 10;
213      uintmax_t r2 = (r10 % divisor) * 2;
214      amt = n / divisor;
215      tenths = r10 / divisor;
216      rounding = r2 < divisor ? 0 < r2 : 2 + (divisor < r2);
217      goto use_integer_arithmetic;
218    }
219
220  {
221    /* Either the result cannot be computed easily using uintmax_t,
222       or from_block_size is zero.  Fall back on floating point.
223       FIXME: This can yield answers that are slightly off.  */
224
225    long double dto_block_size = to_block_size;
226    long double damt = n * (from_block_size / dto_block_size);
227    size_t buflen;
228    size_t nonintegerlen;
229
230    if (! (opts & human_autoscale))
231      {
232	sprintf (buf, "%.0Lf", adjust_value (inexact_style, damt));
233	buflen = strlen (buf);
234	nonintegerlen = 0;
235      }
236    else
237      {
238	long double e = 1;
239	exponent = 0;
240
241	do
242	  {
243	    e *= base;
244	    exponent++;
245	  }
246	while (e * base <= damt && exponent < exponent_max);
247
248	damt /= e;
249
250	sprintf (buf, "%.1Lf", adjust_value (inexact_style, damt));
251	buflen = strlen (buf);
252	nonintegerlen = decimal_pointlen + 1;
253
254	if (1 + nonintegerlen + ! (opts & human_base_1024) < buflen
255	    || ((opts & human_suppress_point_zero)
256		&& buf[buflen - 1] == '0'))
257	  {
258	    sprintf (buf, "%.0Lf",
259		     adjust_value (inexact_style, damt * 10) / 10);
260	    buflen = strlen (buf);
261	    nonintegerlen = 0;
262	  }
263      }
264
265    p = psuffix - buflen;
266    memmove (p, buf, buflen);
267    integerlim = p + buflen - nonintegerlen;
268  }
269  goto do_grouping;
270
271 use_integer_arithmetic:
272  {
273    /* The computation can be done exactly, with integer arithmetic.
274
275       Use power of BASE notation if requested and if adjusted AMT is
276       large enough.  */
277
278    if (opts & human_autoscale)
279      {
280	exponent = 0;
281
282	if (base <= amt)
283	  {
284	    do
285	      {
286		unsigned int r10 = (amt % base) * 10 + tenths;
287		unsigned int r2 = (r10 % base) * 2 + (rounding >> 1);
288		amt /= base;
289		tenths = r10 / base;
290		rounding = (r2 < base
291			    ? (r2 + rounding) != 0
292			    : 2 + (base < r2 + rounding));
293		exponent++;
294	      }
295	    while (base <= amt && exponent < exponent_max);
296
297	    if (amt < 10)
298	      {
299		if (inexact_style == human_round_to_nearest
300		    ? 2 < rounding + (tenths & 1)
301		    : inexact_style == human_ceiling && 0 < rounding)
302		  {
303		    tenths++;
304		    rounding = 0;
305
306		    if (tenths == 10)
307		      {
308			amt++;
309			tenths = 0;
310		      }
311		  }
312
313		if (amt < 10
314		    && (tenths || ! (opts & human_suppress_point_zero)))
315		  {
316		    *--p = '0' + tenths;
317		    p -= decimal_pointlen;
318		    memcpy (p, decimal_point, decimal_pointlen);
319		    tenths = rounding = 0;
320		  }
321	      }
322	  }
323      }
324
325    if (inexact_style == human_round_to_nearest
326	? 5 < tenths + (0 < rounding + (amt & 1))
327	: inexact_style == human_ceiling && 0 < tenths + rounding)
328      {
329	amt++;
330
331	if ((opts & human_autoscale)
332	    && amt == base && exponent < exponent_max)
333	  {
334	    exponent++;
335	    if (! (opts & human_suppress_point_zero))
336	      {
337		*--p = '0';
338		p -= decimal_pointlen;
339		memcpy (p, decimal_point, decimal_pointlen);
340	      }
341	    amt = 1;
342	  }
343      }
344
345    integerlim = p;
346
347    do
348      {
349	int digit = amt % 10;
350	*--p = digit + '0';
351      }
352    while ((amt /= 10) != 0);
353  }
354
355 do_grouping:
356  if (opts & human_group_digits)
357    p = group_number (p, integerlim - p, grouping, thousands_sep);
358
359  if (opts & human_SI)
360    {
361      if (exponent < 0)
362	{
363	  uintmax_t power;
364	  exponent = 0;
365	  for (power = 1; power < to_block_size; power *= base)
366	    if (++exponent == exponent_max)
367	      break;
368	}
369
370      if ((exponent | (opts & human_B)) && (opts & human_space_before_unit))
371	*psuffix++ = ' ';
372
373      if (exponent)
374	*psuffix++ = (! (opts & human_base_1024) && exponent == 1
375		      ? 'k'
376		      : power_letter[exponent]);
377
378      if (opts & human_B)
379	{
380	  if ((opts & human_base_1024) && exponent)
381	    *psuffix++ = 'i';
382	  *psuffix++ = 'B';
383	}
384    }
385
386  *psuffix = '\0';
387
388  return p;
389}
390
391
392/* The default block size used for output.  This number may change in
393   the future as disks get larger.  */
394#ifndef DEFAULT_BLOCK_SIZE
395# define DEFAULT_BLOCK_SIZE 1024
396#endif
397
398static char const *const block_size_args[] = { "human-readable", "si", 0 };
399static int const block_size_opts[] =
400  {
401    human_autoscale + human_SI + human_base_1024,
402    human_autoscale + human_SI
403  };
404
405static uintmax_t
406default_block_size (void)
407{
408  return getenv ("POSIXLY_CORRECT") ? 512 : DEFAULT_BLOCK_SIZE;
409}
410
411static strtol_error
412humblock (char const *spec, uintmax_t *block_size, int *options)
413{
414  int i;
415  int opts = 0;
416
417  if (! spec
418      && ! (spec = getenv ("BLOCK_SIZE"))
419      && ! (spec = getenv ("BLOCKSIZE")))
420    *block_size = default_block_size ();
421  else
422    {
423      if (*spec == '\'')
424	{
425	  opts |= human_group_digits;
426	  spec++;
427	}
428
429      if (0 <= (i = ARGMATCH (spec, block_size_args, block_size_opts)))
430	{
431	  opts |= block_size_opts[i];
432	  *block_size = 1;
433	}
434      else
435	{
436	  char *ptr;
437	  strtol_error e = xstrtoumax (spec, &ptr, 0, block_size,
438				       "eEgGkKmMpPtTyYzZ0");
439	  if (e != LONGINT_OK)
440	    {
441	      *options = 0;
442	      return e;
443	    }
444	  for (; ! ('0' <= *spec && *spec <= '9'); spec++)
445	    if (spec == ptr)
446	      {
447		opts |= human_SI;
448		if (ptr[-1] == 'B')
449		  opts |= human_B;
450		if (ptr[-1] != 'B' || ptr[-2] == 'i')
451		  opts |= human_base_1024;
452		break;
453	      }
454	}
455    }
456
457  *options = opts;
458  return LONGINT_OK;
459}
460
461enum strtol_error
462human_options (char const *spec, int *opts, uintmax_t *block_size)
463{
464  strtol_error e = humblock (spec, block_size, opts);
465  if (*block_size == 0)
466    {
467      *block_size = default_block_size ();
468      e = LONGINT_INVALID;
469    }
470  return e;
471}
472