1/**********************************************************************
2
3  sprintf.c -
4
5  $Author: nagachika $
6  created at: Fri Oct 15 10:39:26 JST 1993
7
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9  Copyright (C) 2000  Network Applied Communication Laboratory, Inc.
10  Copyright (C) 2000  Information-technology Promotion Agency, Japan
11
12**********************************************************************/
13
14#include "ruby/ruby.h"
15#include "ruby/re.h"
16#include "ruby/encoding.h"
17#include <math.h>
18#include <stdarg.h>
19
20#ifdef HAVE_IEEEFP_H
21#include <ieeefp.h>
22#endif
23
24#define BIT_DIGITS(N)   (((N)*146)/485 + 1)  /* log2(10) =~ 146/485 */
25#define BITSPERDIG (SIZEOF_BDIGITS*CHAR_BIT)
26#define EXTENDSIGN(n, l) (((~0 << (n)) >> (((n)*(l)) % BITSPERDIG)) & ~(~0 << (n)))
27
28static void fmt_setup(char*,size_t,int,int,int,int);
29
30static char*
31remove_sign_bits(char *str, int base)
32{
33    char *t = str;
34
35    if (base == 16) {
36	while (*t == 'f') {
37	    t++;
38	}
39    }
40    else if (base == 8) {
41	*t |= EXTENDSIGN(3, strlen(t));
42	while (*t == '7') {
43	    t++;
44	}
45    }
46    else if (base == 2) {
47	while (*t == '1') {
48	    t++;
49	}
50    }
51
52    return t;
53}
54
55static char
56sign_bits(int base, const char *p)
57{
58    char c = '.';
59
60    switch (base) {
61      case 16:
62	if (*p == 'X') c = 'F';
63	else c = 'f';
64	break;
65      case 8:
66	c = '7'; break;
67      case 2:
68	c = '1'; break;
69    }
70    return c;
71}
72
73#define FNONE  0
74#define FSHARP 1
75#define FMINUS 2
76#define FPLUS  4
77#define FZERO  8
78#define FSPACE 16
79#define FWIDTH 32
80#define FPREC  64
81#define FPREC0 128
82
83#define CHECK(l) do {\
84    int cr = ENC_CODERANGE(result);\
85    while (blen + (l) >= bsiz) {\
86	bsiz*=2;\
87    }\
88    rb_str_resize(result, bsiz);\
89    ENC_CODERANGE_SET(result, cr);\
90    buf = RSTRING_PTR(result);\
91} while (0)
92
93#define PUSH(s, l) do { \
94    CHECK(l);\
95    memcpy(&buf[blen], (s), (l));\
96    blen += (l);\
97} while (0)
98
99#define FILL(c, l) do { \
100    CHECK(l);\
101    memset(&buf[blen], (c), (l));\
102    blen += (l);\
103} while (0)
104
105#define GETARG() (nextvalue != Qundef ? nextvalue : \
106    posarg == -1 ? \
107    (rb_raise(rb_eArgError, "unnumbered(%d) mixed with numbered", nextarg), 0) : \
108    posarg == -2 ? \
109    (rb_raise(rb_eArgError, "unnumbered(%d) mixed with named", nextarg), 0) : \
110    (posarg = nextarg++, GETNTHARG(posarg)))
111
112#define GETPOSARG(n) (posarg > 0 ? \
113    (rb_raise(rb_eArgError, "numbered(%d) after unnumbered(%d)", (n), posarg), 0) : \
114    posarg == -2 ? \
115    (rb_raise(rb_eArgError, "numbered(%d) after named", (n)), 0) : \
116    (((n) < 1) ? (rb_raise(rb_eArgError, "invalid index - %d$", (n)), 0) : \
117	       (posarg = -1, GETNTHARG(n))))
118
119#define GETNTHARG(nth) \
120    (((nth) >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : argv[(nth)])
121
122#define GETNAMEARG(id, name, len, enc) ( \
123    posarg > 0 ? \
124    (rb_enc_raise((enc), rb_eArgError, "named%.*s after unnumbered(%d)", (len), (name), posarg), 0) : \
125    posarg == -1 ? \
126    (rb_enc_raise((enc), rb_eArgError, "named%.*s after numbered", (len), (name)), 0) :	\
127    (posarg = -2, rb_hash_lookup2(get_hash(&hash, argc, argv), (id), Qundef)))
128
129#define GETNUM(n, val) \
130    for (; p < end && rb_enc_isdigit(*p, enc); p++) {	\
131	int next_n = 10 * (n) + (*p - '0'); \
132        if (next_n / 10 != (n)) {\
133	    rb_raise(rb_eArgError, #val " too big"); \
134	} \
135	(n) = next_n; \
136    } \
137    if (p >= end) { \
138	rb_raise(rb_eArgError, "malformed format string - %%*[0-9]"); \
139    }
140
141#define GETASTER(val) do { \
142    t = p++; \
143    n = 0; \
144    GETNUM(n, (val)); \
145    if (*p == '$') { \
146	tmp = GETPOSARG(n); \
147    } \
148    else { \
149	tmp = GETARG(); \
150	p = t; \
151    } \
152    (val) = NUM2INT(tmp); \
153} while (0)
154
155static VALUE
156get_hash(volatile VALUE *hash, int argc, const VALUE *argv)
157{
158    VALUE tmp;
159
160    if (*hash != Qundef) return *hash;
161    if (argc != 2) {
162	rb_raise(rb_eArgError, "one hash required");
163    }
164    tmp = rb_check_hash_type(argv[1]);
165    if (NIL_P(tmp)) {
166	rb_raise(rb_eArgError, "one hash required");
167    }
168    return (*hash = tmp);
169}
170
171/*
172 *  call-seq:
173 *     format(format_string [, arguments...] )   -> string
174 *     sprintf(format_string [, arguments...] )  -> string
175 *
176 *  Returns the string resulting from applying <i>format_string</i> to
177 *  any additional arguments.  Within the format string, any characters
178 *  other than format sequences are copied to the result.
179 *
180 *  The syntax of a format sequence is follows.
181 *
182 *    %[flags][width][.precision]type
183 *
184 *  A format
185 *  sequence consists of a percent sign, followed by optional flags,
186 *  width, and precision indicators, then terminated with a field type
187 *  character.  The field type controls how the corresponding
188 *  <code>sprintf</code> argument is to be interpreted, while the flags
189 *  modify that interpretation.
190 *
191 *  The field type characters are:
192 *
193 *      Field |  Integer Format
194 *      ------+--------------------------------------------------------------
195 *        b   | Convert argument as a binary number.
196 *            | Negative numbers will be displayed as a two's complement
197 *            | prefixed with `..1'.
198 *        B   | Equivalent to `b', but uses an uppercase 0B for prefix
199 *            | in the alternative format by #.
200 *        d   | Convert argument as a decimal number.
201 *        i   | Identical to `d'.
202 *        o   | Convert argument as an octal number.
203 *            | Negative numbers will be displayed as a two's complement
204 *            | prefixed with `..7'.
205 *        u   | Identical to `d'.
206 *        x   | Convert argument as a hexadecimal number.
207 *            | Negative numbers will be displayed as a two's complement
208 *            | prefixed with `..f' (representing an infinite string of
209 *            | leading 'ff's).
210 *        X   | Equivalent to `x', but uses uppercase letters.
211 *
212 *      Field |  Float Format
213 *      ------+--------------------------------------------------------------
214 *        e   | Convert floating point argument into exponential notation
215 *            | with one digit before the decimal point as [-]d.dddddde[+-]dd.
216 *            | The precision specifies the number of digits after the decimal
217 *            | point (defaulting to six).
218 *        E   | Equivalent to `e', but uses an uppercase E to indicate
219 *            | the exponent.
220 *        f   | Convert floating point argument as [-]ddd.dddddd,
221 *            | where the precision specifies the number of digits after
222 *            | the decimal point.
223 *        g   | Convert a floating point number using exponential form
224 *            | if the exponent is less than -4 or greater than or
225 *            | equal to the precision, or in dd.dddd form otherwise.
226 *            | The precision specifies the number of significant digits.
227 *        G   | Equivalent to `g', but use an uppercase `E' in exponent form.
228 *        a   | Convert floating point argument as [-]0xh.hhhhp[+-]dd,
229 *            | which is consisted from optional sign, "0x", fraction part
230 *            | as hexadecimal, "p", and exponential part as decimal.
231 *        A   | Equivalent to `a', but use uppercase `X' and `P'.
232 *
233 *      Field |  Other Format
234 *      ------+--------------------------------------------------------------
235 *        c   | Argument is the numeric code for a single character or
236 *            | a single character string itself.
237 *        p   | The valuing of argument.inspect.
238 *        s   | Argument is a string to be substituted.  If the format
239 *            | sequence contains a precision, at most that many characters
240 *            | will be copied.
241 *        %   | A percent sign itself will be displayed.  No argument taken.
242 *
243 *  The flags modifies the behavior of the formats.
244 *  The flag characters are:
245 *
246 *    Flag     | Applies to    | Meaning
247 *    ---------+---------------+-----------------------------------------
248 *    space    | bBdiouxX      | Leave a space at the start of
249 *             | aAeEfgG       | non-negative numbers.
250 *             | (numeric fmt) | For `o', `x', `X', `b' and `B', use
251 *             |               | a minus sign with absolute value for
252 *             |               | negative values.
253 *    ---------+---------------+-----------------------------------------
254 *    (digit)$ | all           | Specifies the absolute argument number
255 *             |               | for this field.  Absolute and relative
256 *             |               | argument numbers cannot be mixed in a
257 *             |               | sprintf string.
258 *    ---------+---------------+-----------------------------------------
259 *     #       | bBoxX         | Use an alternative format.
260 *             | aAeEfgG       | For the conversions `o', increase the precision
261 *             |               | until the first digit will be `0' if
262 *             |               | it is not formatted as complements.
263 *             |               | For the conversions `x', `X', `b' and `B'
264 *             |               | on non-zero, prefix the result with ``0x'',
265 *             |               | ``0X'', ``0b'' and ``0B'', respectively.
266 *             |               | For `a', `A', `e', `E', `f', `g', and 'G',
267 *             |               | force a decimal point to be added,
268 *             |               | even if no digits follow.
269 *             |               | For `g' and 'G', do not remove trailing zeros.
270 *    ---------+---------------+-----------------------------------------
271 *    +        | bBdiouxX      | Add a leading plus sign to non-negative
272 *             | aAeEfgG       | numbers.
273 *             | (numeric fmt) | For `o', `x', `X', `b' and `B', use
274 *             |               | a minus sign with absolute value for
275 *             |               | negative values.
276 *    ---------+---------------+-----------------------------------------
277 *    -        | all           | Left-justify the result of this conversion.
278 *    ---------+---------------+-----------------------------------------
279 *    0 (zero) | bBdiouxX      | Pad with zeros, not spaces.
280 *             | aAeEfgG       | For `o', `x', `X', `b' and `B', radix-1
281 *             | (numeric fmt) | is used for negative numbers formatted as
282 *             |               | complements.
283 *    ---------+---------------+-----------------------------------------
284 *    *        | all           | Use the next argument as the field width.
285 *             |               | If negative, left-justify the result. If the
286 *             |               | asterisk is followed by a number and a dollar
287 *             |               | sign, use the indicated argument as the width.
288 *
289 *  Examples of flags:
290 *
291 *   # `+' and space flag specifies the sign of non-negative numbers.
292 *   sprintf("%d", 123)  #=> "123"
293 *   sprintf("%+d", 123) #=> "+123"
294 *   sprintf("% d", 123) #=> " 123"
295 *
296 *   # `#' flag for `o' increases number of digits to show `0'.
297 *   # `+' and space flag changes format of negative numbers.
298 *   sprintf("%o", 123)   #=> "173"
299 *   sprintf("%#o", 123)  #=> "0173"
300 *   sprintf("%+o", -123) #=> "-173"
301 *   sprintf("%o", -123)  #=> "..7605"
302 *   sprintf("%#o", -123) #=> "..7605"
303 *
304 *   # `#' flag for `x' add a prefix `0x' for non-zero numbers.
305 *   # `+' and space flag disables complements for negative numbers.
306 *   sprintf("%x", 123)   #=> "7b"
307 *   sprintf("%#x", 123)  #=> "0x7b"
308 *   sprintf("%+x", -123) #=> "-7b"
309 *   sprintf("%x", -123)  #=> "..f85"
310 *   sprintf("%#x", -123) #=> "0x..f85"
311 *   sprintf("%#x", 0)    #=> "0"
312 *
313 *   # `#' for `X' uses the prefix `0X'.
314 *   sprintf("%X", 123)  #=> "7B"
315 *   sprintf("%#X", 123) #=> "0X7B"
316 *
317 *   # `#' flag for `b' add a prefix `0b' for non-zero numbers.
318 *   # `+' and space flag disables complements for negative numbers.
319 *   sprintf("%b", 123)   #=> "1111011"
320 *   sprintf("%#b", 123)  #=> "0b1111011"
321 *   sprintf("%+b", -123) #=> "-1111011"
322 *   sprintf("%b", -123)  #=> "..10000101"
323 *   sprintf("%#b", -123) #=> "0b..10000101"
324 *   sprintf("%#b", 0)    #=> "0"
325 *
326 *   # `#' for `B' uses the prefix `0B'.
327 *   sprintf("%B", 123)  #=> "1111011"
328 *   sprintf("%#B", 123) #=> "0B1111011"
329 *
330 *   # `#' for `e' forces to show the decimal point.
331 *   sprintf("%.0e", 1)  #=> "1e+00"
332 *   sprintf("%#.0e", 1) #=> "1.e+00"
333 *
334 *   # `#' for `f' forces to show the decimal point.
335 *   sprintf("%.0f", 1234)  #=> "1234"
336 *   sprintf("%#.0f", 1234) #=> "1234."
337 *
338 *   # `#' for `g' forces to show the decimal point.
339 *   # It also disables stripping lowest zeros.
340 *   sprintf("%g", 123.4)   #=> "123.4"
341 *   sprintf("%#g", 123.4)  #=> "123.400"
342 *   sprintf("%g", 123456)  #=> "123456"
343 *   sprintf("%#g", 123456) #=> "123456."
344 *
345 *  The field width is an optional integer, followed optionally by a
346 *  period and a precision.  The width specifies the minimum number of
347 *  characters that will be written to the result for this field.
348 *
349 *  Examples of width:
350 *
351 *   # padding is done by spaces,       width=20
352 *   # 0 or radix-1.             <------------------>
353 *   sprintf("%20d", 123)   #=> "                 123"
354 *   sprintf("%+20d", 123)  #=> "                +123"
355 *   sprintf("%020d", 123)  #=> "00000000000000000123"
356 *   sprintf("%+020d", 123) #=> "+0000000000000000123"
357 *   sprintf("% 020d", 123) #=> " 0000000000000000123"
358 *   sprintf("%-20d", 123)  #=> "123                 "
359 *   sprintf("%-+20d", 123) #=> "+123                "
360 *   sprintf("%- 20d", 123) #=> " 123                "
361 *   sprintf("%020x", -123) #=> "..ffffffffffffffff85"
362 *
363 *  For
364 *  numeric fields, the precision controls the number of decimal places
365 *  displayed.  For string fields, the precision determines the maximum
366 *  number of characters to be copied from the string.  (Thus, the format
367 *  sequence <code>%10.10s</code> will always contribute exactly ten
368 *  characters to the result.)
369 *
370 *  Examples of precisions:
371 *
372 *   # precision for `d', 'o', 'x' and 'b' is
373 *   # minimum number of digits               <------>
374 *   sprintf("%20.8d", 123)  #=> "            00000123"
375 *   sprintf("%20.8o", 123)  #=> "            00000173"
376 *   sprintf("%20.8x", 123)  #=> "            0000007b"
377 *   sprintf("%20.8b", 123)  #=> "            01111011"
378 *   sprintf("%20.8d", -123) #=> "           -00000123"
379 *   sprintf("%20.8o", -123) #=> "            ..777605"
380 *   sprintf("%20.8x", -123) #=> "            ..ffff85"
381 *   sprintf("%20.8b", -11)  #=> "            ..110101"
382 *
383 *   # "0x" and "0b" for `#x' and `#b' is not counted for
384 *   # precision but "0" for `#o' is counted.  <------>
385 *   sprintf("%#20.8d", 123)  #=> "            00000123"
386 *   sprintf("%#20.8o", 123)  #=> "            00000173"
387 *   sprintf("%#20.8x", 123)  #=> "          0x0000007b"
388 *   sprintf("%#20.8b", 123)  #=> "          0b01111011"
389 *   sprintf("%#20.8d", -123) #=> "           -00000123"
390 *   sprintf("%#20.8o", -123) #=> "            ..777605"
391 *   sprintf("%#20.8x", -123) #=> "          0x..ffff85"
392 *   sprintf("%#20.8b", -11)  #=> "          0b..110101"
393 *
394 *   # precision for `e' is number of
395 *   # digits after the decimal point           <------>
396 *   sprintf("%20.8e", 1234.56789) #=> "      1.23456789e+03"
397 *
398 *   # precision for `f' is number of
399 *   # digits after the decimal point               <------>
400 *   sprintf("%20.8f", 1234.56789) #=> "       1234.56789000"
401 *
402 *   # precision for `g' is number of
403 *   # significant digits                          <------->
404 *   sprintf("%20.8g", 1234.56789) #=> "           1234.5679"
405 *
406 *   #                                         <------->
407 *   sprintf("%20.8g", 123456789)  #=> "       1.2345679e+08"
408 *
409 *   # precision for `s' is
410 *   # maximum number of characters                    <------>
411 *   sprintf("%20.8s", "string test") #=> "            string t"
412 *
413 *  Examples:
414 *
415 *     sprintf("%d %04x", 123, 123)               #=> "123 007b"
416 *     sprintf("%08b '%4s'", 123, 123)            #=> "01111011 ' 123'"
417 *     sprintf("%1$*2$s %2$d %1$s", "hello", 8)   #=> "   hello 8 hello"
418 *     sprintf("%1$*2$s %2$d", "hello", -8)       #=> "hello    -8"
419 *     sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23)   #=> "+1.23: 1.23:1.23"
420 *     sprintf("%u", -123)                        #=> "-123"
421 *
422 *  For more complex formatting, Ruby supports a reference by name.
423 *  %<name>s style uses format style, but %{name} style doesn't.
424 *
425 *  Examples:
426 *    sprintf("%<foo>d : %<bar>f", { :foo => 1, :bar => 2 })
427 *      #=> 1 : 2.000000
428 *    sprintf("%{foo}f", { :foo => 1 })
429 *      # => "1f"
430 */
431
432VALUE
433rb_f_sprintf(int argc, const VALUE *argv)
434{
435    return rb_str_format(argc - 1, argv + 1, GETNTHARG(0));
436}
437
438VALUE
439rb_str_format(int argc, const VALUE *argv, VALUE fmt)
440{
441    rb_encoding *enc;
442    const char *p, *end;
443    char *buf;
444    long blen, bsiz;
445    VALUE result;
446
447    long scanned = 0;
448    int coderange = ENC_CODERANGE_7BIT;
449    int width, prec, flags = FNONE;
450    int nextarg = 1;
451    int posarg = 0;
452    int tainted = 0;
453    VALUE nextvalue;
454    VALUE tmp;
455    VALUE str;
456    volatile VALUE hash = Qundef;
457
458#define CHECK_FOR_WIDTH(f)				 \
459    if ((f) & FWIDTH) {					 \
460	rb_raise(rb_eArgError, "width given twice");	 \
461    }							 \
462    if ((f) & FPREC0) {					 \
463	rb_raise(rb_eArgError, "width after precision"); \
464    }
465#define CHECK_FOR_FLAGS(f)				 \
466    if ((f) & FWIDTH) {					 \
467	rb_raise(rb_eArgError, "flag after width");	 \
468    }							 \
469    if ((f) & FPREC0) {					 \
470	rb_raise(rb_eArgError, "flag after precision"); \
471    }
472
473    ++argc;
474    --argv;
475    if (OBJ_TAINTED(fmt)) tainted = 1;
476    StringValue(fmt);
477    enc = rb_enc_get(fmt);
478    fmt = rb_str_new4(fmt);
479    p = RSTRING_PTR(fmt);
480    end = p + RSTRING_LEN(fmt);
481    blen = 0;
482    bsiz = 120;
483    result = rb_str_buf_new(bsiz);
484    rb_enc_copy(result, fmt);
485    buf = RSTRING_PTR(result);
486    memset(buf, 0, bsiz);
487    ENC_CODERANGE_SET(result, coderange);
488
489    for (; p < end; p++) {
490	const char *t;
491	int n;
492	ID id = 0;
493
494	for (t = p; t < end && *t != '%'; t++) ;
495	PUSH(p, t - p);
496	if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
497	    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange);
498	    ENC_CODERANGE_SET(result, coderange);
499	}
500	if (t >= end) {
501	    /* end of fmt string */
502	    goto sprint_exit;
503	}
504	p = t + 1;		/* skip `%' */
505
506	width = prec = -1;
507	nextvalue = Qundef;
508      retry:
509	switch (*p) {
510	  default:
511	    if (rb_enc_isprint(*p, enc))
512		rb_raise(rb_eArgError, "malformed format string - %%%c", *p);
513	    else
514		rb_raise(rb_eArgError, "malformed format string");
515	    break;
516
517	  case ' ':
518	    CHECK_FOR_FLAGS(flags);
519	    flags |= FSPACE;
520	    p++;
521	    goto retry;
522
523	  case '#':
524	    CHECK_FOR_FLAGS(flags);
525	    flags |= FSHARP;
526	    p++;
527	    goto retry;
528
529	  case '+':
530	    CHECK_FOR_FLAGS(flags);
531	    flags |= FPLUS;
532	    p++;
533	    goto retry;
534
535	  case '-':
536	    CHECK_FOR_FLAGS(flags);
537	    flags |= FMINUS;
538	    p++;
539	    goto retry;
540
541	  case '0':
542	    CHECK_FOR_FLAGS(flags);
543	    flags |= FZERO;
544	    p++;
545	    goto retry;
546
547	  case '1': case '2': case '3': case '4':
548	  case '5': case '6': case '7': case '8': case '9':
549	    n = 0;
550	    GETNUM(n, width);
551	    if (*p == '$') {
552		if (nextvalue != Qundef) {
553		    rb_raise(rb_eArgError, "value given twice - %d$", n);
554		}
555		nextvalue = GETPOSARG(n);
556		p++;
557		goto retry;
558	    }
559	    CHECK_FOR_WIDTH(flags);
560	    width = n;
561	    flags |= FWIDTH;
562	    goto retry;
563
564	  case '<':
565	  case '{':
566	    {
567		const char *start = p;
568		char term = (*p == '<') ? '>' : '}';
569		int len;
570
571		for (; p < end && *p != term; ) {
572		    p += rb_enc_mbclen(p, end, enc);
573		}
574		if (p >= end) {
575		    rb_raise(rb_eArgError, "malformed name - unmatched parenthesis");
576		}
577#if SIZEOF_INT < SIZEOF_SIZE_T
578		if ((size_t)(p - start) >= INT_MAX) {
579		    const int message_limit = 20;
580		    len = (int)(rb_enc_right_char_head(start, start + message_limit, p, enc) - start);
581		    rb_enc_raise(enc, rb_eArgError,
582				 "too long name (%"PRIdSIZE" bytes) - %.*s...%c",
583				 (size_t)(p - start - 2), len, start, term);
584		}
585#endif
586		len = (int)(p - start + 1); /* including parenthesis */
587		if (id) {
588		    rb_enc_raise(enc, rb_eArgError, "named%.*s after <%s>",
589				 len, start, rb_id2name(id));
590		}
591		nextvalue = GETNAMEARG((id = rb_check_id_cstr(start + 1,
592							      len - 2 /* without parenthesis */,
593							      enc),
594					ID2SYM(id)),
595				       start, len, enc);
596		if (nextvalue == Qundef) {
597		    rb_enc_raise(enc, rb_eKeyError, "key%.*s not found", len, start);
598		}
599		if (term == '}') goto format_s;
600		p++;
601		goto retry;
602	    }
603
604	  case '*':
605	    CHECK_FOR_WIDTH(flags);
606	    flags |= FWIDTH;
607	    GETASTER(width);
608	    if (width < 0) {
609		flags |= FMINUS;
610		width = -width;
611	    }
612	    p++;
613	    goto retry;
614
615	  case '.':
616	    if (flags & FPREC0) {
617		rb_raise(rb_eArgError, "precision given twice");
618	    }
619	    flags |= FPREC|FPREC0;
620
621	    prec = 0;
622	    p++;
623	    if (*p == '*') {
624		GETASTER(prec);
625		if (prec < 0) {	/* ignore negative precision */
626		    flags &= ~FPREC;
627		}
628		p++;
629		goto retry;
630	    }
631
632	    GETNUM(prec, precision);
633	    goto retry;
634
635	  case '\n':
636	  case '\0':
637	    p--;
638	  case '%':
639	    if (flags != FNONE) {
640		rb_raise(rb_eArgError, "invalid format character - %%");
641	    }
642	    PUSH("%", 1);
643	    break;
644
645	  case 'c':
646	    {
647		VALUE val = GETARG();
648		VALUE tmp;
649		unsigned int c;
650		int n;
651
652		tmp = rb_check_string_type(val);
653		if (!NIL_P(tmp)) {
654		    if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) {
655			rb_raise(rb_eArgError, "%%c requires a character");
656		    }
657		    c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
658		    RB_GC_GUARD(tmp);
659		}
660		else {
661		    c = NUM2INT(val);
662		    n = rb_enc_codelen(c, enc);
663		}
664		if (n <= 0) {
665		    rb_raise(rb_eArgError, "invalid character");
666		}
667		if (!(flags & FWIDTH)) {
668		    CHECK(n);
669		    rb_enc_mbcput(c, &buf[blen], enc);
670		    blen += n;
671		}
672		else if ((flags & FMINUS)) {
673		    CHECK(n);
674		    rb_enc_mbcput(c, &buf[blen], enc);
675		    blen += n;
676		    FILL(' ', width-1);
677		}
678		else {
679		    FILL(' ', width-1);
680		    CHECK(n);
681		    rb_enc_mbcput(c, &buf[blen], enc);
682		    blen += n;
683		}
684	    }
685	    break;
686
687	  case 's':
688	  case 'p':
689	  format_s:
690	    {
691		VALUE arg = GETARG();
692		long len, slen;
693
694		if (*p == 'p') arg = rb_inspect(arg);
695		str = rb_obj_as_string(arg);
696		if (OBJ_TAINTED(str)) tainted = 1;
697		len = RSTRING_LEN(str);
698		rb_str_set_len(result, blen);
699		if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
700		    int cr = coderange;
701		    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr);
702		    ENC_CODERANGE_SET(result,
703				      (cr == ENC_CODERANGE_UNKNOWN ?
704				       ENC_CODERANGE_BROKEN : (coderange = cr)));
705		}
706		enc = rb_enc_check(result, str);
707		if (flags&(FPREC|FWIDTH)) {
708		    slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc);
709		    if (slen < 0) {
710			rb_raise(rb_eArgError, "invalid mbstring sequence");
711		    }
712		    if ((flags&FPREC) && (prec < slen)) {
713			char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str),
714					     prec, enc);
715			slen = prec;
716			len = p - RSTRING_PTR(str);
717		    }
718		    /* need to adjust multi-byte string pos */
719		    if ((flags&FWIDTH) && (width > slen)) {
720			width -= (int)slen;
721			if (!(flags&FMINUS)) {
722			    CHECK(width);
723			    while (width--) {
724				buf[blen++] = ' ';
725			    }
726			}
727			CHECK(len);
728			memcpy(&buf[blen], RSTRING_PTR(str), len);
729			RB_GC_GUARD(str);
730			blen += len;
731			if (flags&FMINUS) {
732			    CHECK(width);
733			    while (width--) {
734				buf[blen++] = ' ';
735			    }
736			}
737			rb_enc_associate(result, enc);
738			break;
739		    }
740		}
741		PUSH(RSTRING_PTR(str), len);
742		RB_GC_GUARD(str);
743		rb_enc_associate(result, enc);
744	    }
745	    break;
746
747	  case 'd':
748	  case 'i':
749	  case 'o':
750	  case 'x':
751	  case 'X':
752	  case 'b':
753	  case 'B':
754	  case 'u':
755	    {
756		volatile VALUE val = GETARG();
757		char fbuf[32], nbuf[64], *s;
758		const char *prefix = 0;
759		int sign = 0, dots = 0;
760		char sc = 0;
761		long v = 0;
762		int base, bignum = 0;
763		int len;
764
765		switch (*p) {
766		  case 'd':
767		  case 'i':
768		  case 'u':
769		    sign = 1; break;
770		  case 'o':
771		  case 'x':
772		  case 'X':
773		  case 'b':
774		  case 'B':
775		    if (flags&(FPLUS|FSPACE)) sign = 1;
776		    break;
777		}
778		if (flags & FSHARP) {
779		    switch (*p) {
780		      case 'o':
781			prefix = "0"; break;
782		      case 'x':
783			prefix = "0x"; break;
784		      case 'X':
785			prefix = "0X"; break;
786		      case 'b':
787			prefix = "0b"; break;
788		      case 'B':
789			prefix = "0B"; break;
790		    }
791		}
792
793	      bin_retry:
794		switch (TYPE(val)) {
795		  case T_FLOAT:
796		    if (FIXABLE(RFLOAT_VALUE(val))) {
797			val = LONG2FIX((long)RFLOAT_VALUE(val));
798			goto bin_retry;
799		    }
800		    val = rb_dbl2big(RFLOAT_VALUE(val));
801		    if (FIXNUM_P(val)) goto bin_retry;
802		    bignum = 1;
803		    break;
804		  case T_STRING:
805		    val = rb_str_to_inum(val, 0, TRUE);
806		    goto bin_retry;
807		  case T_BIGNUM:
808		    bignum = 1;
809		    break;
810		  case T_FIXNUM:
811		    v = FIX2LONG(val);
812		    break;
813		  default:
814		    val = rb_Integer(val);
815		    goto bin_retry;
816		}
817
818		switch (*p) {
819		  case 'o':
820		    base = 8; break;
821		  case 'x':
822		  case 'X':
823		    base = 16; break;
824		  case 'b':
825		  case 'B':
826		    base = 2; break;
827		  case 'u':
828		  case 'd':
829		  case 'i':
830		  default:
831		    base = 10; break;
832		}
833
834		if (!bignum) {
835		    if (base == 2) {
836			val = rb_int2big(v);
837			goto bin_retry;
838		    }
839		    if (sign) {
840			char c = *p;
841			if (c == 'i') c = 'd'; /* %d and %i are identical */
842			if (v < 0) {
843			    v = -v;
844			    sc = '-';
845			    width--;
846			}
847			else if (flags & FPLUS) {
848			    sc = '+';
849			    width--;
850			}
851			else if (flags & FSPACE) {
852			    sc = ' ';
853			    width--;
854			}
855			snprintf(fbuf, sizeof(fbuf), "%%l%c", c);
856			snprintf(nbuf, sizeof(nbuf), fbuf, v);
857			s = nbuf;
858		    }
859		    else {
860			s = nbuf;
861			if (v < 0) {
862			    dots = 1;
863			}
864			snprintf(fbuf, sizeof(fbuf), "%%l%c", *p == 'X' ? 'x' : *p);
865			snprintf(++s, sizeof(nbuf) - 1, fbuf, v);
866			if (v < 0) {
867			    char d = 0;
868
869			    s = remove_sign_bits(s, base);
870			    switch (base) {
871			      case 16:
872				d = 'f'; break;
873			      case 8:
874				d = '7'; break;
875			    }
876			    if (d && *s != d) {
877				*--s = d;
878			    }
879			}
880		    }
881		    len = (int)strlen(s);
882		}
883		else {
884		    if (sign) {
885			tmp = rb_big2str(val, base);
886			s = RSTRING_PTR(tmp);
887			if (s[0] == '-') {
888			    s++;
889			    sc = '-';
890			    width--;
891			}
892			else if (flags & FPLUS) {
893			    sc = '+';
894			    width--;
895			}
896			else if (flags & FSPACE) {
897			    sc = ' ';
898			    width--;
899			}
900		    }
901		    else {
902			if (!RBIGNUM_SIGN(val)) {
903			    val = rb_big_clone(val);
904			    rb_big_2comp(val);
905			}
906			tmp = rb_big2str0(val, base, RBIGNUM_SIGN(val));
907			s = RSTRING_PTR(tmp);
908			if (*s == '-') {
909			    dots = 1;
910			    if (base == 10) {
911				rb_warning("negative number for %%u specifier");
912			    }
913			    s = remove_sign_bits(++s, base);
914			    switch (base) {
915			      case 16:
916				if (s[0] != 'f') *--s = 'f'; break;
917			      case 8:
918				if (s[0] != '7') *--s = '7'; break;
919			      case 2:
920				if (s[0] != '1') *--s = '1'; break;
921			    }
922			}
923		    }
924		    len = rb_long2int(RSTRING_END(tmp) - s);
925		}
926
927		if (dots) {
928		    prec -= 2;
929		    width -= 2;
930		}
931
932		if (*p == 'X') {
933		    char *pp = s;
934		    int c;
935		    while ((c = (int)(unsigned char)*pp) != 0) {
936			*pp = rb_enc_toupper(c, enc);
937			pp++;
938		    }
939		}
940		if (prefix && !prefix[1]) { /* octal */
941		    if (dots) {
942			prefix = 0;
943		    }
944		    else if (len == 1 && *s == '0') {
945			len = 0;
946			if (flags & FPREC) prec--;
947		    }
948		    else if ((flags & FPREC) && (prec > len)) {
949			prefix = 0;
950		    }
951		}
952		else if (len == 1 && *s == '0') {
953		    prefix = 0;
954		}
955		if (prefix) {
956		    width -= (int)strlen(prefix);
957		}
958		if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
959		    prec = width;
960		    width = 0;
961		}
962		else {
963		    if (prec < len) {
964			if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
965			prec = len;
966		    }
967		    width -= prec;
968		}
969		if (!(flags&FMINUS)) {
970		    CHECK(width);
971		    while (width-- > 0) {
972			buf[blen++] = ' ';
973		    }
974		}
975		if (sc) PUSH(&sc, 1);
976		if (prefix) {
977		    int plen = (int)strlen(prefix);
978		    PUSH(prefix, plen);
979		}
980		CHECK(prec - len);
981		if (dots) PUSH("..", 2);
982		if (!bignum && v < 0) {
983		    char c = sign_bits(base, p);
984		    while (len < prec--) {
985			buf[blen++] = c;
986		    }
987		}
988		else if ((flags & (FMINUS|FPREC)) != FMINUS) {
989		    char c;
990
991		    if (!sign && bignum && !RBIGNUM_SIGN(val))
992			c = sign_bits(base, p);
993		    else
994			c = '0';
995		    while (len < prec--) {
996			buf[blen++] = c;
997		    }
998		}
999		PUSH(s, len);
1000		RB_GC_GUARD(tmp);
1001		CHECK(width);
1002		while (width-- > 0) {
1003		    buf[blen++] = ' ';
1004		}
1005	    }
1006	    break;
1007
1008	  case 'f':
1009	  case 'g':
1010	  case 'G':
1011	  case 'e':
1012	  case 'E':
1013	  case 'a':
1014	  case 'A':
1015	    {
1016		VALUE val = GETARG();
1017		double fval;
1018		int i, need = 6;
1019		char fbuf[32];
1020
1021		fval = RFLOAT_VALUE(rb_Float(val));
1022		if (isnan(fval) || isinf(fval)) {
1023		    const char *expr;
1024
1025		    if (isnan(fval)) {
1026			expr = "NaN";
1027		    }
1028		    else {
1029			expr = "Inf";
1030		    }
1031		    need = (int)strlen(expr);
1032		    if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
1033			need++;
1034		    if ((flags & FWIDTH) && need < width)
1035			need = width;
1036
1037		    CHECK(need + 1);
1038		    snprintf(&buf[blen], need + 1, "%*s", need, "");
1039		    if (flags & FMINUS) {
1040			if (!isnan(fval) && fval < 0.0)
1041			    buf[blen++] = '-';
1042			else if (flags & FPLUS)
1043			    buf[blen++] = '+';
1044			else if (flags & FSPACE)
1045			    blen++;
1046			memcpy(&buf[blen], expr, strlen(expr));
1047		    }
1048		    else {
1049			if (!isnan(fval) && fval < 0.0)
1050			    buf[blen + need - strlen(expr) - 1] = '-';
1051			else if (flags & FPLUS)
1052			    buf[blen + need - strlen(expr) - 1] = '+';
1053			else if ((flags & FSPACE) && need > width)
1054			    blen++;
1055			memcpy(&buf[blen + need - strlen(expr)], expr,
1056			       strlen(expr));
1057		    }
1058		    blen += strlen(&buf[blen]);
1059		    break;
1060		}
1061
1062		fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec);
1063		need = 0;
1064		if (*p != 'e' && *p != 'E') {
1065		    i = INT_MIN;
1066		    frexp(fval, &i);
1067		    if (i > 0)
1068			need = BIT_DIGITS(i);
1069		}
1070		need += (flags&FPREC) ? prec : 6;
1071		if ((flags&FWIDTH) && need < width)
1072		    need = width;
1073		need += 20;
1074
1075		CHECK(need);
1076		snprintf(&buf[blen], need, fbuf, fval);
1077		blen += strlen(&buf[blen]);
1078	    }
1079	    break;
1080	}
1081	flags = FNONE;
1082    }
1083
1084  sprint_exit:
1085    RB_GC_GUARD(fmt);
1086    /* XXX - We cannot validate the number of arguments if (digit)$ style used.
1087     */
1088    if (posarg >= 0 && nextarg < argc) {
1089	const char *mesg = "too many arguments for format string";
1090	if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg);
1091	if (RTEST(ruby_verbose)) rb_warn("%s", mesg);
1092    }
1093    rb_str_resize(result, blen);
1094
1095    if (tainted) OBJ_TAINT(result);
1096    return result;
1097}
1098
1099static void
1100fmt_setup(char *buf, size_t size, int c, int flags, int width, int prec)
1101{
1102    char *end = buf + size;
1103    *buf++ = '%';
1104    if (flags & FSHARP) *buf++ = '#';
1105    if (flags & FPLUS)  *buf++ = '+';
1106    if (flags & FMINUS) *buf++ = '-';
1107    if (flags & FZERO)  *buf++ = '0';
1108    if (flags & FSPACE) *buf++ = ' ';
1109
1110    if (flags & FWIDTH) {
1111	snprintf(buf, end - buf, "%d", width);
1112	buf += strlen(buf);
1113    }
1114
1115    if (flags & FPREC) {
1116	snprintf(buf, end - buf, ".%d", prec);
1117	buf += strlen(buf);
1118    }
1119
1120    *buf++ = c;
1121    *buf = '\0';
1122}
1123
1124#undef FILE
1125#define FILE rb_printf_buffer
1126#define __sbuf rb_printf_sbuf
1127#define __sFILE rb_printf_sfile
1128#undef feof
1129#undef ferror
1130#undef clearerr
1131#undef fileno
1132#if SIZEOF_LONG < SIZEOF_VOIDP
1133# if  SIZEOF_LONG_LONG == SIZEOF_VOIDP
1134#  define _HAVE_SANE_QUAD_
1135#  define _HAVE_LLP64_
1136#  define quad_t LONG_LONG
1137#  define u_quad_t unsigned LONG_LONG
1138# endif
1139#elif SIZEOF_LONG != SIZEOF_LONG_LONG && SIZEOF_LONG_LONG == 8
1140# define _HAVE_SANE_QUAD_
1141# define quad_t LONG_LONG
1142# define u_quad_t unsigned LONG_LONG
1143#endif
1144#define FLOATING_POINT 1
1145#define BSD__dtoa ruby_dtoa
1146#define BSD__hdtoa ruby_hdtoa
1147#include "vsnprintf.c"
1148
1149typedef struct {
1150    rb_printf_buffer base;
1151    volatile VALUE value;
1152} rb_printf_buffer_extra;
1153
1154static int
1155ruby__sfvwrite(register rb_printf_buffer *fp, register struct __suio *uio)
1156{
1157    struct __siov *iov;
1158    VALUE result = (VALUE)fp->_bf._base;
1159    char *buf = (char*)fp->_p;
1160    size_t len, n;
1161    size_t blen = buf - RSTRING_PTR(result), bsiz = fp->_w;
1162
1163    if (RBASIC(result)->klass) {
1164	rb_raise(rb_eRuntimeError, "rb_vsprintf reentered");
1165    }
1166    if ((len = uio->uio_resid) == 0)
1167	return 0;
1168    CHECK(len);
1169    buf += blen;
1170    fp->_w = bsiz;
1171    for (iov = uio->uio_iov; len > 0; ++iov) {
1172	MEMCPY(buf, iov->iov_base, char, n = iov->iov_len);
1173	buf += n;
1174	len -= n;
1175    }
1176    fp->_p = (unsigned char *)buf;
1177    rb_str_set_len(result, buf - RSTRING_PTR(result));
1178    return 0;
1179}
1180
1181static char *
1182ruby__sfvextra(rb_printf_buffer *fp, size_t valsize, void *valp, long *sz, int sign)
1183{
1184    VALUE value, result = (VALUE)fp->_bf._base;
1185    rb_encoding *enc;
1186    char *cp;
1187
1188    if (valsize != sizeof(VALUE)) return 0;
1189    value = *(VALUE *)valp;
1190    if (RBASIC(result)->klass) {
1191	rb_raise(rb_eRuntimeError, "rb_vsprintf reentered");
1192    }
1193    if (sign == '+') {
1194	value = rb_inspect(value);
1195    }
1196    else {
1197	value = rb_obj_as_string(value);
1198    }
1199    enc = rb_enc_compatible(result, value);
1200    if (enc) {
1201	rb_enc_associate(result, enc);
1202    }
1203    else {
1204	enc = rb_enc_get(result);
1205	value = rb_str_conv_enc_opts(value, rb_enc_get(value), enc,
1206				     ECONV_UNDEF_REPLACE|ECONV_INVALID_REPLACE,
1207				     Qnil);
1208	*(volatile VALUE *)valp = value;
1209    }
1210    StringValueCStr(value);
1211    RSTRING_GETMEM(value, cp, *sz);
1212    ((rb_printf_buffer_extra *)fp)->value = value;
1213    OBJ_INFECT(result, value);
1214    return cp;
1215}
1216
1217VALUE
1218rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap)
1219{
1220    rb_printf_buffer_extra buffer;
1221#define f buffer.base
1222    VALUE result;
1223
1224    f._flags = __SWR | __SSTR;
1225    f._bf._size = 0;
1226    f._w = 120;
1227    result = rb_str_buf_new(f._w);
1228    if (enc) {
1229	if (rb_enc_mbminlen(enc) > 1) {
1230	    /* the implementation deeply depends on plain char */
1231	    rb_raise(rb_eArgError, "cannot construct wchar_t based encoding string: %s",
1232		     rb_enc_name(enc));
1233	}
1234	rb_enc_associate(result, enc);
1235    }
1236    f._bf._base = (unsigned char *)result;
1237    f._p = (unsigned char *)RSTRING_PTR(result);
1238    RBASIC(result)->klass = 0;
1239    f.vwrite = ruby__sfvwrite;
1240    f.vextra = ruby__sfvextra;
1241    buffer.value = 0;
1242    BSD_vfprintf(&f, fmt, ap);
1243    RBASIC(result)->klass = rb_cString;
1244    rb_str_resize(result, (char *)f._p - RSTRING_PTR(result));
1245#undef f
1246
1247    return result;
1248}
1249
1250VALUE
1251rb_enc_sprintf(rb_encoding *enc, const char *format, ...)
1252{
1253    VALUE result;
1254    va_list ap;
1255
1256    va_start(ap, format);
1257    result = rb_enc_vsprintf(enc, format, ap);
1258    va_end(ap);
1259
1260    return result;
1261}
1262
1263VALUE
1264rb_vsprintf(const char *fmt, va_list ap)
1265{
1266    return rb_enc_vsprintf(NULL, fmt, ap);
1267}
1268
1269VALUE
1270rb_sprintf(const char *format, ...)
1271{
1272    VALUE result;
1273    va_list ap;
1274
1275    va_start(ap, format);
1276    result = rb_vsprintf(format, ap);
1277    va_end(ap);
1278
1279    return result;
1280}
1281
1282VALUE
1283rb_str_vcatf(VALUE str, const char *fmt, va_list ap)
1284{
1285    rb_printf_buffer_extra buffer;
1286#define f buffer.base
1287    VALUE klass;
1288
1289    StringValue(str);
1290    rb_str_modify(str);
1291    f._flags = __SWR | __SSTR;
1292    f._bf._size = 0;
1293    f._w = rb_str_capacity(str);
1294    f._bf._base = (unsigned char *)str;
1295    f._p = (unsigned char *)RSTRING_END(str);
1296    klass = RBASIC(str)->klass;
1297    RBASIC(str)->klass = 0;
1298    f.vwrite = ruby__sfvwrite;
1299    f.vextra = ruby__sfvextra;
1300    buffer.value = 0;
1301    BSD_vfprintf(&f, fmt, ap);
1302    RBASIC(str)->klass = klass;
1303    rb_str_resize(str, (char *)f._p - RSTRING_PTR(str));
1304#undef f
1305
1306    return str;
1307}
1308
1309VALUE
1310rb_str_catf(VALUE str, const char *format, ...)
1311{
1312    va_list ap;
1313
1314    va_start(ap, format);
1315    str = rb_str_vcatf(str, format, ap);
1316    va_end(ap);
1317
1318    return str;
1319}
1320