1/* mpn_get_str -- Convert {UP,USIZE} to a base BASE string in STR.
2
3   Contributed to the GNU project by Torbjorn Granlund.
4
5   THE FUNCTIONS IN THIS FILE, EXCEPT mpn_get_str, ARE INTERNAL WITH A MUTABLE
6   INTERFACE.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN
7   FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE
8   GNU MP RELEASE.
9
10Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006, 2007,
112008 Free Software Foundation, Inc.
12
13This file is part of the GNU MP Library.
14
15The GNU MP Library is free software; you can redistribute it and/or modify
16it under the terms of the GNU Lesser General Public License as published by
17the Free Software Foundation; either version 3 of the License, or (at your
18option) any later version.
19
20The GNU MP Library is distributed in the hope that it will be useful, but
21WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
23License for more details.
24
25You should have received a copy of the GNU Lesser General Public License
26along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
27
28#include "gmp.h"
29#include "gmp-impl.h"
30#include "longlong.h"
31
32/* Conversion of U {up,un} to a string in base b.  Internally, we convert to
33   base B = b^m, the largest power of b that fits a limb.  Basic algorithms:
34
35  A) Divide U repeatedly by B, generating a quotient and remainder, until the
36     quotient becomes zero.  The remainders hold the converted digits.  Digits
37     come out from right to left.  (Used in mpn_sb_get_str.)
38
39  B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction.
40     Then develop digits by multiplying the fraction repeatedly by b.  Digits
41     come out from left to right.  (Currently not used herein, except for in
42     code for converting single limbs to individual digits.)
43
44  C) Compute B^1, B^2, B^4, ..., B^s, for s such that B^s is just above
45     sqrt(U).  Then divide U by B^s, generating quotient and remainder.
46     Recursively convert the quotient, then the remainder, using the
47     precomputed powers.  Digits come out from left to right.  (Used in
48     mpn_dc_get_str.)
49
50  When using algorithm C, algorithm B might be suitable for basecase code,
51  since the required b^g power will be readily accessible.
52
53  Optimization ideas:
54  1. The recursive function of (C) could use less temporary memory.  The powtab
55     allocation could be trimmed with some computation, and the tmp area could
56     be reduced, or perhaps eliminated if up is reused for both quotient and
57     remainder (it is currently used just for remainder).
58  2. Store the powers of (C) in normalized form, with the normalization count.
59     Quotients will usually need to be left-shifted before each divide, and
60     remainders will either need to be left-shifted of right-shifted.
61  3. In the code for developing digits from a single limb, we could avoid using
62     a full umul_ppmm except for the first (or first few) digits, provided base
63     is even.  Subsequent digits can be developed using plain multiplication.
64     (This saves on register-starved machines (read x86) and on all machines
65     that generate the upper product half using a separate instruction (alpha,
66     powerpc, IA-64) or lacks such support altogether (sparc64, hppa64).
67  4. Separate mpn_dc_get_str basecase code from code for small conversions. The
68     former code will have the exact right power readily available in the
69     powtab parameter for dividing the current number into a fraction.  Convert
70     that using algorithm B.
71  5. Completely avoid division.  Compute the inverses of the powers now in
72     powtab instead of the actual powers.
73  6. Decrease powtab allocation for even bases.  E.g. for base 10 we could save
74     about 30% (1-log(5)/log(10)).
75
76  Basic structure of (C):
77    mpn_get_str:
78      if POW2_P (n)
79	...
80      else
81	if (un < GET_STR_PRECOMPUTE_THRESHOLD)
82	  mpn_sb_get_str (str, base, up, un);
83	else
84	  precompute_power_tables
85	  mpn_dc_get_str
86
87    mpn_dc_get_str:
88	mpn_tdiv_qr
89	if (qn < GET_STR_DC_THRESHOLD)
90	  mpn_sb_get_str
91	else
92	  mpn_dc_get_str
93	if (rn < GET_STR_DC_THRESHOLD)
94	  mpn_sb_get_str
95	else
96	  mpn_dc_get_str
97
98
99  The reason for the two threshold values is the cost of
100  precompute_power_tables.  GET_STR_PRECOMPUTE_THRESHOLD will be considerably
101  larger than GET_STR_PRECOMPUTE_THRESHOLD.  */
102
103
104/* The x86s and m68020 have a quotient and remainder "div" instruction and
105   gcc recognises an adjacent "/" and "%" can be combined using that.
106   Elsewhere "/" and "%" are either separate instructions, or separate
107   libgcc calls (which unfortunately gcc as of version 3.0 doesn't combine).
108   A multiply and subtract should be faster than a "%" in those cases.  */
109#if HAVE_HOST_CPU_FAMILY_x86            \
110  || HAVE_HOST_CPU_m68020               \
111  || HAVE_HOST_CPU_m68030               \
112  || HAVE_HOST_CPU_m68040               \
113  || HAVE_HOST_CPU_m68060               \
114  || HAVE_HOST_CPU_m68360 /* CPU32 */
115#define udiv_qrnd_unnorm(q,r,n,d)       \
116  do {                                  \
117    mp_limb_t  __q = (n) / (d);         \
118    mp_limb_t  __r = (n) % (d);         \
119    (q) = __q;                          \
120    (r) = __r;                          \
121  } while (0)
122#else
123#define udiv_qrnd_unnorm(q,r,n,d)       \
124  do {                                  \
125    mp_limb_t  __q = (n) / (d);         \
126    mp_limb_t  __r = (n) - __q*(d);     \
127    (q) = __q;                          \
128    (r) = __r;                          \
129  } while (0)
130#endif
131
132
133/* Convert {up,un} to a string in base base, and put the result in str.
134   Generate len characters, possibly padding with zeros to the left.  If len is
135   zero, generate as many characters as required.  Return a pointer immediately
136   after the last digit of the result string.  Complexity is O(un^2); intended
137   for small conversions.  */
138static unsigned char *
139mpn_sb_get_str (unsigned char *str, size_t len,
140		mp_ptr up, mp_size_t un, int base)
141{
142  mp_limb_t rl, ul;
143  unsigned char *s;
144  size_t l;
145  /* Allocate memory for largest possible string, given that we only get here
146     for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest
147     base is 3.  7/11 is an approximation to 1/log2(3).  */
148#if TUNE_PROGRAM_BUILD
149#define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * GMP_LIMB_BITS * 7 / 11)
150#else
151#define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * GMP_LIMB_BITS * 7 / 11)
152#endif
153  unsigned char buf[BUF_ALLOC];
154#if TUNE_PROGRAM_BUILD
155  mp_limb_t rp[GET_STR_THRESHOLD_LIMIT];
156#else
157  mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD];
158#endif
159
160  if (base == 10)
161    {
162      /* Special case code for base==10 so that the compiler has a chance to
163	 optimize things.  */
164
165      MPN_COPY (rp + 1, up, un);
166
167      s = buf + BUF_ALLOC;
168      while (un > 1)
169	{
170	  int i;
171	  mp_limb_t frac, digit;
172	  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
173					 MP_BASES_BIG_BASE_10,
174					 MP_BASES_BIG_BASE_INVERTED_10,
175					 MP_BASES_NORMALIZATION_STEPS_10);
176	  un -= rp[un] == 0;
177	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
178	  s -= MP_BASES_CHARS_PER_LIMB_10;
179#if HAVE_HOST_CPU_FAMILY_x86
180	  /* The code below turns out to be a bit slower for x86 using gcc.
181	     Use plain code.  */
182	  i = MP_BASES_CHARS_PER_LIMB_10;
183	  do
184	    {
185	      umul_ppmm (digit, frac, frac, 10);
186	      *s++ = digit;
187	    }
188	  while (--i);
189#else
190	  /* Use the fact that 10 in binary is 1010, with the lowest bit 0.
191	     After a few umul_ppmm, we will have accumulated enough low zeros
192	     to use a plain multiply.  */
193	  if (MP_BASES_NORMALIZATION_STEPS_10 == 0)
194	    {
195	      umul_ppmm (digit, frac, frac, 10);
196	      *s++ = digit;
197	    }
198	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 1)
199	    {
200	      umul_ppmm (digit, frac, frac, 10);
201	      *s++ = digit;
202	    }
203	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 2)
204	    {
205	      umul_ppmm (digit, frac, frac, 10);
206	      *s++ = digit;
207	    }
208	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 3)
209	    {
210	      umul_ppmm (digit, frac, frac, 10);
211	      *s++ = digit;
212	    }
213	  i = (MP_BASES_CHARS_PER_LIMB_10 - ((MP_BASES_NORMALIZATION_STEPS_10 < 4)
214					     ? (4-MP_BASES_NORMALIZATION_STEPS_10)
215					     : 0));
216	  frac = (frac + 0xf) >> 4;
217	  do
218	    {
219	      frac *= 10;
220	      digit = frac >> (GMP_LIMB_BITS - 4);
221	      *s++ = digit;
222	      frac &= (~(mp_limb_t) 0) >> 4;
223	    }
224	  while (--i);
225#endif
226	  s -= MP_BASES_CHARS_PER_LIMB_10;
227	}
228
229      ul = rp[1];
230      while (ul != 0)
231	{
232	  udiv_qrnd_unnorm (ul, rl, ul, 10);
233	  *--s = rl;
234	}
235    }
236  else /* not base 10 */
237    {
238      unsigned chars_per_limb;
239      mp_limb_t big_base, big_base_inverted;
240      unsigned normalization_steps;
241
242      chars_per_limb = mp_bases[base].chars_per_limb;
243      big_base = mp_bases[base].big_base;
244      big_base_inverted = mp_bases[base].big_base_inverted;
245      count_leading_zeros (normalization_steps, big_base);
246
247      MPN_COPY (rp + 1, up, un);
248
249      s = buf + BUF_ALLOC;
250      while (un > 1)
251	{
252	  int i;
253	  mp_limb_t frac;
254	  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
255					 big_base, big_base_inverted,
256					 normalization_steps);
257	  un -= rp[un] == 0;
258	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
259	  s -= chars_per_limb;
260	  i = chars_per_limb;
261	  do
262	    {
263	      mp_limb_t digit;
264	      umul_ppmm (digit, frac, frac, base);
265	      *s++ = digit;
266	    }
267	  while (--i);
268	  s -= chars_per_limb;
269	}
270
271      ul = rp[1];
272      while (ul != 0)
273	{
274	  udiv_qrnd_unnorm (ul, rl, ul, base);
275	  *--s = rl;
276	}
277    }
278
279  l = buf + BUF_ALLOC - s;
280  while (l < len)
281    {
282      *str++ = 0;
283      len--;
284    }
285  while (l != 0)
286    {
287      *str++ = *s++;
288      l--;
289    }
290  return str;
291}
292
293
294/* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
295   the string in STR.  Generate LEN characters, possibly padding with zeros to
296   the left.  If LEN is zero, generate as many characters as required.
297   Return a pointer immediately after the last digit of the result string.
298   This uses divide-and-conquer and is intended for large conversions.  */
299static unsigned char *
300mpn_dc_get_str (unsigned char *str, size_t len,
301		mp_ptr up, mp_size_t un,
302		const powers_t *powtab, mp_ptr tmp)
303{
304  if (BELOW_THRESHOLD (un, GET_STR_DC_THRESHOLD))
305    {
306      if (un != 0)
307	str = mpn_sb_get_str (str, len, up, un, powtab->base);
308      else
309	{
310	  while (len != 0)
311	    {
312	      *str++ = 0;
313	      len--;
314	    }
315	}
316    }
317  else
318    {
319      mp_ptr pwp, qp, rp;
320      mp_size_t pwn, qn;
321      mp_size_t sn;
322
323      pwp = powtab->p;
324      pwn = powtab->n;
325      sn = powtab->shift;
326
327      if (un < pwn + sn || (un == pwn + sn && mpn_cmp (up + sn, pwp, un - sn) < 0))
328	{
329	  str = mpn_dc_get_str (str, len, up, un, powtab - 1, tmp);
330	}
331      else
332	{
333	  qp = tmp;		/* (un - pwn + 1) limbs for qp */
334	  rp = up;		/* pwn limbs for rp; overwrite up area */
335
336	  mpn_tdiv_qr (qp, rp + sn, 0L, up + sn, un - sn, pwp, pwn);
337	  qn = un - sn - pwn; qn += qp[qn] != 0;		/* quotient size */
338
339	  ASSERT (qn < pwn + sn || (qn == pwn + sn && mpn_cmp (qp + sn, pwp, pwn) < 0));
340
341	  if (len != 0)
342	    len = len - powtab->digits_in_base;
343
344	  str = mpn_dc_get_str (str, len, qp, qn, powtab - 1, tmp + qn);
345	  str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn + sn, powtab - 1, tmp);
346	}
347    }
348  return str;
349}
350
351
352/* There are no leading zeros on the digits generated at str, but that's not
353   currently a documented feature.  */
354
355size_t
356mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
357{
358  mp_ptr powtab_mem, powtab_mem_ptr;
359  mp_limb_t big_base;
360  size_t digits_in_base;
361  powers_t powtab[GMP_LIMB_BITS];
362  int pi;
363  mp_size_t n;
364  mp_ptr p, t;
365  size_t out_len;
366  mp_ptr tmp;
367  TMP_DECL;
368
369  /* Special case zero, as the code below doesn't handle it.  */
370  if (un == 0)
371    {
372      str[0] = 0;
373      return 1;
374    }
375
376  if (POW2_P (base))
377    {
378      /* The base is a power of 2.  Convert from most significant end.  */
379      mp_limb_t n1, n0;
380      int bits_per_digit = mp_bases[base].big_base;
381      int cnt;
382      int bit_pos;
383      mp_size_t i;
384      unsigned char *s = str;
385      mp_bitcnt_t bits;
386
387      n1 = up[un - 1];
388      count_leading_zeros (cnt, n1);
389
390      /* BIT_POS should be R when input ends in least significant nibble,
391	 R + bits_per_digit * n when input ends in nth least significant
392	 nibble. */
393
394      bits = (mp_bitcnt_t) GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
395      cnt = bits % bits_per_digit;
396      if (cnt != 0)
397	bits += bits_per_digit - cnt;
398      bit_pos = bits - (mp_bitcnt_t) (un - 1) * GMP_NUMB_BITS;
399
400      /* Fast loop for bit output.  */
401      i = un - 1;
402      for (;;)
403	{
404	  bit_pos -= bits_per_digit;
405	  while (bit_pos >= 0)
406	    {
407	      *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
408	      bit_pos -= bits_per_digit;
409	    }
410	  i--;
411	  if (i < 0)
412	    break;
413	  n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
414	  n1 = up[i];
415	  bit_pos += GMP_NUMB_BITS;
416	  *s++ = n0 | (n1 >> bit_pos);
417	}
418
419      return s - str;
420    }
421
422  /* General case.  The base is not a power of 2.  */
423
424  if (BELOW_THRESHOLD (un, GET_STR_PRECOMPUTE_THRESHOLD))
425    return mpn_sb_get_str (str, (size_t) 0, up, un, base) - str;
426
427  TMP_MARK;
428
429  /* Allocate one large block for the powers of big_base.  */
430  powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_get_str_powtab_alloc (un));
431  powtab_mem_ptr = powtab_mem;
432
433  /* Compute a table of powers, were the largest power is >= sqrt(U).  */
434
435  big_base = mp_bases[base].big_base;
436  digits_in_base = mp_bases[base].chars_per_limb;
437
438  {
439    mp_size_t n_pows, xn, pn, exptab[GMP_LIMB_BITS], bexp;
440    mp_limb_t cy;
441    mp_size_t shift;
442
443    n_pows = 0;
444    xn = 1 + un*(mp_bases[base].chars_per_bit_exactly*GMP_NUMB_BITS)/mp_bases[base].chars_per_limb;
445    for (pn = xn; pn != 1; pn = (pn + 1) >> 1)
446      {
447	exptab[n_pows] = pn;
448	n_pows++;
449      }
450    exptab[n_pows] = 1;
451
452    powtab[0].p = &big_base;
453    powtab[0].n = 1;
454    powtab[0].digits_in_base = digits_in_base;
455    powtab[0].base = base;
456    powtab[0].shift = 0;
457
458    powtab[1].p = powtab_mem_ptr;  powtab_mem_ptr += 2;
459    powtab[1].p[0] = big_base;
460    powtab[1].n = 1;
461    powtab[1].digits_in_base = digits_in_base;
462    powtab[1].base = base;
463    powtab[1].shift = 0;
464
465    n = 1;
466    p = &big_base;
467    bexp = 1;
468    shift = 0;
469    for (pi = 2; pi < n_pows; pi++)
470      {
471	t = powtab_mem_ptr;
472	powtab_mem_ptr += 2 * n + 2;
473
474	ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_get_str_powtab_alloc (un));
475
476	mpn_sqr (t, p, n);
477
478	digits_in_base *= 2;
479	n *= 2;  n -= t[n - 1] == 0;
480	bexp *= 2;
481
482	if (bexp + 1 < exptab[n_pows - pi])
483	  {
484	    digits_in_base += mp_bases[base].chars_per_limb;
485	    cy = mpn_mul_1 (t, t, n, big_base);
486	    t[n] = cy;
487	    n += cy != 0;
488	    bexp += 1;
489	  }
490	shift *= 2;
491	/* Strip low zero limbs.  */
492	while (t[0] == 0)
493	  {
494	    t++;
495	    n--;
496	    shift++;
497	  }
498	p = t;
499	powtab[pi].p = p;
500	powtab[pi].n = n;
501	powtab[pi].digits_in_base = digits_in_base;
502	powtab[pi].base = base;
503	powtab[pi].shift = shift;
504      }
505
506    for (pi = 1; pi < n_pows; pi++)
507      {
508	t = powtab[pi].p;
509	n = powtab[pi].n;
510	cy = mpn_mul_1 (t, t, n, big_base);
511	t[n] = cy;
512	n += cy != 0;
513	if (t[0] == 0)
514	  {
515	    powtab[pi].p = t + 1;
516	    n--;
517	    powtab[pi].shift++;
518	  }
519	powtab[pi].n = n;
520	powtab[pi].digits_in_base += mp_bases[base].chars_per_limb;
521      }
522
523#if 0
524    { int i;
525      printf ("Computed table values for base=%d, un=%d, xn=%d:\n", base, un, xn);
526      for (i = 0; i < n_pows; i++)
527	printf ("%2d: %10ld %10ld %11ld %ld\n", i, exptab[n_pows-i], powtab[i].n, powtab[i].digits_in_base, powtab[i].shift);
528    }
529#endif
530  }
531
532  /* Using our precomputed powers, now in powtab[], convert our number.  */
533  tmp = TMP_BALLOC_LIMBS (mpn_dc_get_str_itch (un));
534  out_len = mpn_dc_get_str (str, 0, up, un, powtab - 1 + pi, tmp) - str;
535  TMP_FREE;
536
537  return out_len;
538}
539