1/********************************************************************** 2 3 sprintf.c - 4 5 $Author: nagachika $ 6 created at: Fri Oct 15 10:39:26 JST 1993 7 8 Copyright (C) 1993-2007 Yukihiro Matsumoto 9 Copyright (C) 2000 Network Applied Communication Laboratory, Inc. 10 Copyright (C) 2000 Information-technology Promotion Agency, Japan 11 12**********************************************************************/ 13 14#include "ruby/ruby.h" 15#include "ruby/re.h" 16#include "ruby/encoding.h" 17#include <math.h> 18#include <stdarg.h> 19 20#ifdef HAVE_IEEEFP_H 21#include <ieeefp.h> 22#endif 23 24#define BIT_DIGITS(N) (((N)*146)/485 + 1) /* log2(10) =~ 146/485 */ 25#define BITSPERDIG (SIZEOF_BDIGITS*CHAR_BIT) 26#define EXTENDSIGN(n, l) (((~0 << (n)) >> (((n)*(l)) % BITSPERDIG)) & ~(~0 << (n))) 27 28static void fmt_setup(char*,size_t,int,int,int,int); 29 30static char* 31remove_sign_bits(char *str, int base) 32{ 33 char *t = str; 34 35 if (base == 16) { 36 while (*t == 'f') { 37 t++; 38 } 39 } 40 else if (base == 8) { 41 *t |= EXTENDSIGN(3, strlen(t)); 42 while (*t == '7') { 43 t++; 44 } 45 } 46 else if (base == 2) { 47 while (*t == '1') { 48 t++; 49 } 50 } 51 52 return t; 53} 54 55static char 56sign_bits(int base, const char *p) 57{ 58 char c = '.'; 59 60 switch (base) { 61 case 16: 62 if (*p == 'X') c = 'F'; 63 else c = 'f'; 64 break; 65 case 8: 66 c = '7'; break; 67 case 2: 68 c = '1'; break; 69 } 70 return c; 71} 72 73#define FNONE 0 74#define FSHARP 1 75#define FMINUS 2 76#define FPLUS 4 77#define FZERO 8 78#define FSPACE 16 79#define FWIDTH 32 80#define FPREC 64 81#define FPREC0 128 82 83#define CHECK(l) do {\ 84 int cr = ENC_CODERANGE(result);\ 85 while (blen + (l) >= bsiz) {\ 86 bsiz*=2;\ 87 }\ 88 rb_str_resize(result, bsiz);\ 89 ENC_CODERANGE_SET(result, cr);\ 90 buf = RSTRING_PTR(result);\ 91} while (0) 92 93#define PUSH(s, l) do { \ 94 CHECK(l);\ 95 memcpy(&buf[blen], (s), (l));\ 96 blen += (l);\ 97} while (0) 98 99#define FILL(c, l) do { \ 100 CHECK(l);\ 101 memset(&buf[blen], (c), (l));\ 102 blen += (l);\ 103} while (0) 104 105#define GETARG() (nextvalue != Qundef ? nextvalue : \ 106 posarg == -1 ? \ 107 (rb_raise(rb_eArgError, "unnumbered(%d) mixed with numbered", nextarg), 0) : \ 108 posarg == -2 ? \ 109 (rb_raise(rb_eArgError, "unnumbered(%d) mixed with named", nextarg), 0) : \ 110 (posarg = nextarg++, GETNTHARG(posarg))) 111 112#define GETPOSARG(n) (posarg > 0 ? \ 113 (rb_raise(rb_eArgError, "numbered(%d) after unnumbered(%d)", (n), posarg), 0) : \ 114 posarg == -2 ? \ 115 (rb_raise(rb_eArgError, "numbered(%d) after named", (n)), 0) : \ 116 (((n) < 1) ? (rb_raise(rb_eArgError, "invalid index - %d$", (n)), 0) : \ 117 (posarg = -1, GETNTHARG(n)))) 118 119#define GETNTHARG(nth) \ 120 (((nth) >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : argv[(nth)]) 121 122#define GETNAMEARG(id, name, len, enc) ( \ 123 posarg > 0 ? \ 124 (rb_enc_raise((enc), rb_eArgError, "named%.*s after unnumbered(%d)", (len), (name), posarg), 0) : \ 125 posarg == -1 ? \ 126 (rb_enc_raise((enc), rb_eArgError, "named%.*s after numbered", (len), (name)), 0) : \ 127 (posarg = -2, rb_hash_lookup2(get_hash(&hash, argc, argv), (id), Qundef))) 128 129#define GETNUM(n, val) \ 130 for (; p < end && rb_enc_isdigit(*p, enc); p++) { \ 131 int next_n = 10 * (n) + (*p - '0'); \ 132 if (next_n / 10 != (n)) {\ 133 rb_raise(rb_eArgError, #val " too big"); \ 134 } \ 135 (n) = next_n; \ 136 } \ 137 if (p >= end) { \ 138 rb_raise(rb_eArgError, "malformed format string - %%*[0-9]"); \ 139 } 140 141#define GETASTER(val) do { \ 142 t = p++; \ 143 n = 0; \ 144 GETNUM(n, (val)); \ 145 if (*p == '$') { \ 146 tmp = GETPOSARG(n); \ 147 } \ 148 else { \ 149 tmp = GETARG(); \ 150 p = t; \ 151 } \ 152 (val) = NUM2INT(tmp); \ 153} while (0) 154 155static VALUE 156get_hash(volatile VALUE *hash, int argc, const VALUE *argv) 157{ 158 VALUE tmp; 159 160 if (*hash != Qundef) return *hash; 161 if (argc != 2) { 162 rb_raise(rb_eArgError, "one hash required"); 163 } 164 tmp = rb_check_hash_type(argv[1]); 165 if (NIL_P(tmp)) { 166 rb_raise(rb_eArgError, "one hash required"); 167 } 168 return (*hash = tmp); 169} 170 171/* 172 * call-seq: 173 * format(format_string [, arguments...] ) -> string 174 * sprintf(format_string [, arguments...] ) -> string 175 * 176 * Returns the string resulting from applying <i>format_string</i> to 177 * any additional arguments. Within the format string, any characters 178 * other than format sequences are copied to the result. 179 * 180 * The syntax of a format sequence is follows. 181 * 182 * %[flags][width][.precision]type 183 * 184 * A format 185 * sequence consists of a percent sign, followed by optional flags, 186 * width, and precision indicators, then terminated with a field type 187 * character. The field type controls how the corresponding 188 * <code>sprintf</code> argument is to be interpreted, while the flags 189 * modify that interpretation. 190 * 191 * The field type characters are: 192 * 193 * Field | Integer Format 194 * ------+-------------------------------------------------------------- 195 * b | Convert argument as a binary number. 196 * | Negative numbers will be displayed as a two's complement 197 * | prefixed with `..1'. 198 * B | Equivalent to `b', but uses an uppercase 0B for prefix 199 * | in the alternative format by #. 200 * d | Convert argument as a decimal number. 201 * i | Identical to `d'. 202 * o | Convert argument as an octal number. 203 * | Negative numbers will be displayed as a two's complement 204 * | prefixed with `..7'. 205 * u | Identical to `d'. 206 * x | Convert argument as a hexadecimal number. 207 * | Negative numbers will be displayed as a two's complement 208 * | prefixed with `..f' (representing an infinite string of 209 * | leading 'ff's). 210 * X | Equivalent to `x', but uses uppercase letters. 211 * 212 * Field | Float Format 213 * ------+-------------------------------------------------------------- 214 * e | Convert floating point argument into exponential notation 215 * | with one digit before the decimal point as [-]d.dddddde[+-]dd. 216 * | The precision specifies the number of digits after the decimal 217 * | point (defaulting to six). 218 * E | Equivalent to `e', but uses an uppercase E to indicate 219 * | the exponent. 220 * f | Convert floating point argument as [-]ddd.dddddd, 221 * | where the precision specifies the number of digits after 222 * | the decimal point. 223 * g | Convert a floating point number using exponential form 224 * | if the exponent is less than -4 or greater than or 225 * | equal to the precision, or in dd.dddd form otherwise. 226 * | The precision specifies the number of significant digits. 227 * G | Equivalent to `g', but use an uppercase `E' in exponent form. 228 * a | Convert floating point argument as [-]0xh.hhhhp[+-]dd, 229 * | which is consisted from optional sign, "0x", fraction part 230 * | as hexadecimal, "p", and exponential part as decimal. 231 * A | Equivalent to `a', but use uppercase `X' and `P'. 232 * 233 * Field | Other Format 234 * ------+-------------------------------------------------------------- 235 * c | Argument is the numeric code for a single character or 236 * | a single character string itself. 237 * p | The valuing of argument.inspect. 238 * s | Argument is a string to be substituted. If the format 239 * | sequence contains a precision, at most that many characters 240 * | will be copied. 241 * % | A percent sign itself will be displayed. No argument taken. 242 * 243 * The flags modifies the behavior of the formats. 244 * The flag characters are: 245 * 246 * Flag | Applies to | Meaning 247 * ---------+---------------+----------------------------------------- 248 * space | bBdiouxX | Leave a space at the start of 249 * | aAeEfgG | non-negative numbers. 250 * | (numeric fmt) | For `o', `x', `X', `b' and `B', use 251 * | | a minus sign with absolute value for 252 * | | negative values. 253 * ---------+---------------+----------------------------------------- 254 * (digit)$ | all | Specifies the absolute argument number 255 * | | for this field. Absolute and relative 256 * | | argument numbers cannot be mixed in a 257 * | | sprintf string. 258 * ---------+---------------+----------------------------------------- 259 * # | bBoxX | Use an alternative format. 260 * | aAeEfgG | For the conversions `o', increase the precision 261 * | | until the first digit will be `0' if 262 * | | it is not formatted as complements. 263 * | | For the conversions `x', `X', `b' and `B' 264 * | | on non-zero, prefix the result with ``0x'', 265 * | | ``0X'', ``0b'' and ``0B'', respectively. 266 * | | For `a', `A', `e', `E', `f', `g', and 'G', 267 * | | force a decimal point to be added, 268 * | | even if no digits follow. 269 * | | For `g' and 'G', do not remove trailing zeros. 270 * ---------+---------------+----------------------------------------- 271 * + | bBdiouxX | Add a leading plus sign to non-negative 272 * | aAeEfgG | numbers. 273 * | (numeric fmt) | For `o', `x', `X', `b' and `B', use 274 * | | a minus sign with absolute value for 275 * | | negative values. 276 * ---------+---------------+----------------------------------------- 277 * - | all | Left-justify the result of this conversion. 278 * ---------+---------------+----------------------------------------- 279 * 0 (zero) | bBdiouxX | Pad with zeros, not spaces. 280 * | aAeEfgG | For `o', `x', `X', `b' and `B', radix-1 281 * | (numeric fmt) | is used for negative numbers formatted as 282 * | | complements. 283 * ---------+---------------+----------------------------------------- 284 * * | all | Use the next argument as the field width. 285 * | | If negative, left-justify the result. If the 286 * | | asterisk is followed by a number and a dollar 287 * | | sign, use the indicated argument as the width. 288 * 289 * Examples of flags: 290 * 291 * # `+' and space flag specifies the sign of non-negative numbers. 292 * sprintf("%d", 123) #=> "123" 293 * sprintf("%+d", 123) #=> "+123" 294 * sprintf("% d", 123) #=> " 123" 295 * 296 * # `#' flag for `o' increases number of digits to show `0'. 297 * # `+' and space flag changes format of negative numbers. 298 * sprintf("%o", 123) #=> "173" 299 * sprintf("%#o", 123) #=> "0173" 300 * sprintf("%+o", -123) #=> "-173" 301 * sprintf("%o", -123) #=> "..7605" 302 * sprintf("%#o", -123) #=> "..7605" 303 * 304 * # `#' flag for `x' add a prefix `0x' for non-zero numbers. 305 * # `+' and space flag disables complements for negative numbers. 306 * sprintf("%x", 123) #=> "7b" 307 * sprintf("%#x", 123) #=> "0x7b" 308 * sprintf("%+x", -123) #=> "-7b" 309 * sprintf("%x", -123) #=> "..f85" 310 * sprintf("%#x", -123) #=> "0x..f85" 311 * sprintf("%#x", 0) #=> "0" 312 * 313 * # `#' for `X' uses the prefix `0X'. 314 * sprintf("%X", 123) #=> "7B" 315 * sprintf("%#X", 123) #=> "0X7B" 316 * 317 * # `#' flag for `b' add a prefix `0b' for non-zero numbers. 318 * # `+' and space flag disables complements for negative numbers. 319 * sprintf("%b", 123) #=> "1111011" 320 * sprintf("%#b", 123) #=> "0b1111011" 321 * sprintf("%+b", -123) #=> "-1111011" 322 * sprintf("%b", -123) #=> "..10000101" 323 * sprintf("%#b", -123) #=> "0b..10000101" 324 * sprintf("%#b", 0) #=> "0" 325 * 326 * # `#' for `B' uses the prefix `0B'. 327 * sprintf("%B", 123) #=> "1111011" 328 * sprintf("%#B", 123) #=> "0B1111011" 329 * 330 * # `#' for `e' forces to show the decimal point. 331 * sprintf("%.0e", 1) #=> "1e+00" 332 * sprintf("%#.0e", 1) #=> "1.e+00" 333 * 334 * # `#' for `f' forces to show the decimal point. 335 * sprintf("%.0f", 1234) #=> "1234" 336 * sprintf("%#.0f", 1234) #=> "1234." 337 * 338 * # `#' for `g' forces to show the decimal point. 339 * # It also disables stripping lowest zeros. 340 * sprintf("%g", 123.4) #=> "123.4" 341 * sprintf("%#g", 123.4) #=> "123.400" 342 * sprintf("%g", 123456) #=> "123456" 343 * sprintf("%#g", 123456) #=> "123456." 344 * 345 * The field width is an optional integer, followed optionally by a 346 * period and a precision. The width specifies the minimum number of 347 * characters that will be written to the result for this field. 348 * 349 * Examples of width: 350 * 351 * # padding is done by spaces, width=20 352 * # 0 or radix-1. <------------------> 353 * sprintf("%20d", 123) #=> " 123" 354 * sprintf("%+20d", 123) #=> " +123" 355 * sprintf("%020d", 123) #=> "00000000000000000123" 356 * sprintf("%+020d", 123) #=> "+0000000000000000123" 357 * sprintf("% 020d", 123) #=> " 0000000000000000123" 358 * sprintf("%-20d", 123) #=> "123 " 359 * sprintf("%-+20d", 123) #=> "+123 " 360 * sprintf("%- 20d", 123) #=> " 123 " 361 * sprintf("%020x", -123) #=> "..ffffffffffffffff85" 362 * 363 * For 364 * numeric fields, the precision controls the number of decimal places 365 * displayed. For string fields, the precision determines the maximum 366 * number of characters to be copied from the string. (Thus, the format 367 * sequence <code>%10.10s</code> will always contribute exactly ten 368 * characters to the result.) 369 * 370 * Examples of precisions: 371 * 372 * # precision for `d', 'o', 'x' and 'b' is 373 * # minimum number of digits <------> 374 * sprintf("%20.8d", 123) #=> " 00000123" 375 * sprintf("%20.8o", 123) #=> " 00000173" 376 * sprintf("%20.8x", 123) #=> " 0000007b" 377 * sprintf("%20.8b", 123) #=> " 01111011" 378 * sprintf("%20.8d", -123) #=> " -00000123" 379 * sprintf("%20.8o", -123) #=> " ..777605" 380 * sprintf("%20.8x", -123) #=> " ..ffff85" 381 * sprintf("%20.8b", -11) #=> " ..110101" 382 * 383 * # "0x" and "0b" for `#x' and `#b' is not counted for 384 * # precision but "0" for `#o' is counted. <------> 385 * sprintf("%#20.8d", 123) #=> " 00000123" 386 * sprintf("%#20.8o", 123) #=> " 00000173" 387 * sprintf("%#20.8x", 123) #=> " 0x0000007b" 388 * sprintf("%#20.8b", 123) #=> " 0b01111011" 389 * sprintf("%#20.8d", -123) #=> " -00000123" 390 * sprintf("%#20.8o", -123) #=> " ..777605" 391 * sprintf("%#20.8x", -123) #=> " 0x..ffff85" 392 * sprintf("%#20.8b", -11) #=> " 0b..110101" 393 * 394 * # precision for `e' is number of 395 * # digits after the decimal point <------> 396 * sprintf("%20.8e", 1234.56789) #=> " 1.23456789e+03" 397 * 398 * # precision for `f' is number of 399 * # digits after the decimal point <------> 400 * sprintf("%20.8f", 1234.56789) #=> " 1234.56789000" 401 * 402 * # precision for `g' is number of 403 * # significant digits <-------> 404 * sprintf("%20.8g", 1234.56789) #=> " 1234.5679" 405 * 406 * # <-------> 407 * sprintf("%20.8g", 123456789) #=> " 1.2345679e+08" 408 * 409 * # precision for `s' is 410 * # maximum number of characters <------> 411 * sprintf("%20.8s", "string test") #=> " string t" 412 * 413 * Examples: 414 * 415 * sprintf("%d %04x", 123, 123) #=> "123 007b" 416 * sprintf("%08b '%4s'", 123, 123) #=> "01111011 ' 123'" 417 * sprintf("%1$*2$s %2$d %1$s", "hello", 8) #=> " hello 8 hello" 418 * sprintf("%1$*2$s %2$d", "hello", -8) #=> "hello -8" 419 * sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23) #=> "+1.23: 1.23:1.23" 420 * sprintf("%u", -123) #=> "-123" 421 * 422 * For more complex formatting, Ruby supports a reference by name. 423 * %<name>s style uses format style, but %{name} style doesn't. 424 * 425 * Examples: 426 * sprintf("%<foo>d : %<bar>f", { :foo => 1, :bar => 2 }) 427 * #=> 1 : 2.000000 428 * sprintf("%{foo}f", { :foo => 1 }) 429 * # => "1f" 430 */ 431 432VALUE 433rb_f_sprintf(int argc, const VALUE *argv) 434{ 435 return rb_str_format(argc - 1, argv + 1, GETNTHARG(0)); 436} 437 438VALUE 439rb_str_format(int argc, const VALUE *argv, VALUE fmt) 440{ 441 rb_encoding *enc; 442 const char *p, *end; 443 char *buf; 444 long blen, bsiz; 445 VALUE result; 446 447 long scanned = 0; 448 int coderange = ENC_CODERANGE_7BIT; 449 int width, prec, flags = FNONE; 450 int nextarg = 1; 451 int posarg = 0; 452 int tainted = 0; 453 VALUE nextvalue; 454 VALUE tmp; 455 VALUE str; 456 volatile VALUE hash = Qundef; 457 458#define CHECK_FOR_WIDTH(f) \ 459 if ((f) & FWIDTH) { \ 460 rb_raise(rb_eArgError, "width given twice"); \ 461 } \ 462 if ((f) & FPREC0) { \ 463 rb_raise(rb_eArgError, "width after precision"); \ 464 } 465#define CHECK_FOR_FLAGS(f) \ 466 if ((f) & FWIDTH) { \ 467 rb_raise(rb_eArgError, "flag after width"); \ 468 } \ 469 if ((f) & FPREC0) { \ 470 rb_raise(rb_eArgError, "flag after precision"); \ 471 } 472 473 ++argc; 474 --argv; 475 if (OBJ_TAINTED(fmt)) tainted = 1; 476 StringValue(fmt); 477 enc = rb_enc_get(fmt); 478 fmt = rb_str_new4(fmt); 479 p = RSTRING_PTR(fmt); 480 end = p + RSTRING_LEN(fmt); 481 blen = 0; 482 bsiz = 120; 483 result = rb_str_buf_new(bsiz); 484 rb_enc_copy(result, fmt); 485 buf = RSTRING_PTR(result); 486 memset(buf, 0, bsiz); 487 ENC_CODERANGE_SET(result, coderange); 488 489 for (; p < end; p++) { 490 const char *t; 491 int n; 492 ID id = 0; 493 494 for (t = p; t < end && *t != '%'; t++) ; 495 PUSH(p, t - p); 496 if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { 497 scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange); 498 ENC_CODERANGE_SET(result, coderange); 499 } 500 if (t >= end) { 501 /* end of fmt string */ 502 goto sprint_exit; 503 } 504 p = t + 1; /* skip `%' */ 505 506 width = prec = -1; 507 nextvalue = Qundef; 508 retry: 509 switch (*p) { 510 default: 511 if (rb_enc_isprint(*p, enc)) 512 rb_raise(rb_eArgError, "malformed format string - %%%c", *p); 513 else 514 rb_raise(rb_eArgError, "malformed format string"); 515 break; 516 517 case ' ': 518 CHECK_FOR_FLAGS(flags); 519 flags |= FSPACE; 520 p++; 521 goto retry; 522 523 case '#': 524 CHECK_FOR_FLAGS(flags); 525 flags |= FSHARP; 526 p++; 527 goto retry; 528 529 case '+': 530 CHECK_FOR_FLAGS(flags); 531 flags |= FPLUS; 532 p++; 533 goto retry; 534 535 case '-': 536 CHECK_FOR_FLAGS(flags); 537 flags |= FMINUS; 538 p++; 539 goto retry; 540 541 case '0': 542 CHECK_FOR_FLAGS(flags); 543 flags |= FZERO; 544 p++; 545 goto retry; 546 547 case '1': case '2': case '3': case '4': 548 case '5': case '6': case '7': case '8': case '9': 549 n = 0; 550 GETNUM(n, width); 551 if (*p == '$') { 552 if (nextvalue != Qundef) { 553 rb_raise(rb_eArgError, "value given twice - %d$", n); 554 } 555 nextvalue = GETPOSARG(n); 556 p++; 557 goto retry; 558 } 559 CHECK_FOR_WIDTH(flags); 560 width = n; 561 flags |= FWIDTH; 562 goto retry; 563 564 case '<': 565 case '{': 566 { 567 const char *start = p; 568 char term = (*p == '<') ? '>' : '}'; 569 int len; 570 571 for (; p < end && *p != term; ) { 572 p += rb_enc_mbclen(p, end, enc); 573 } 574 if (p >= end) { 575 rb_raise(rb_eArgError, "malformed name - unmatched parenthesis"); 576 } 577#if SIZEOF_INT < SIZEOF_SIZE_T 578 if ((size_t)(p - start) >= INT_MAX) { 579 const int message_limit = 20; 580 len = (int)(rb_enc_right_char_head(start, start + message_limit, p, enc) - start); 581 rb_enc_raise(enc, rb_eArgError, 582 "too long name (%"PRIdSIZE" bytes) - %.*s...%c", 583 (size_t)(p - start - 2), len, start, term); 584 } 585#endif 586 len = (int)(p - start + 1); /* including parenthesis */ 587 if (id) { 588 rb_enc_raise(enc, rb_eArgError, "named%.*s after <%s>", 589 len, start, rb_id2name(id)); 590 } 591 nextvalue = GETNAMEARG((id = rb_check_id_cstr(start + 1, 592 len - 2 /* without parenthesis */, 593 enc), 594 ID2SYM(id)), 595 start, len, enc); 596 if (nextvalue == Qundef) { 597 rb_enc_raise(enc, rb_eKeyError, "key%.*s not found", len, start); 598 } 599 if (term == '}') goto format_s; 600 p++; 601 goto retry; 602 } 603 604 case '*': 605 CHECK_FOR_WIDTH(flags); 606 flags |= FWIDTH; 607 GETASTER(width); 608 if (width < 0) { 609 flags |= FMINUS; 610 width = -width; 611 } 612 p++; 613 goto retry; 614 615 case '.': 616 if (flags & FPREC0) { 617 rb_raise(rb_eArgError, "precision given twice"); 618 } 619 flags |= FPREC|FPREC0; 620 621 prec = 0; 622 p++; 623 if (*p == '*') { 624 GETASTER(prec); 625 if (prec < 0) { /* ignore negative precision */ 626 flags &= ~FPREC; 627 } 628 p++; 629 goto retry; 630 } 631 632 GETNUM(prec, precision); 633 goto retry; 634 635 case '\n': 636 case '\0': 637 p--; 638 case '%': 639 if (flags != FNONE) { 640 rb_raise(rb_eArgError, "invalid format character - %%"); 641 } 642 PUSH("%", 1); 643 break; 644 645 case 'c': 646 { 647 VALUE val = GETARG(); 648 VALUE tmp; 649 unsigned int c; 650 int n; 651 652 tmp = rb_check_string_type(val); 653 if (!NIL_P(tmp)) { 654 if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) { 655 rb_raise(rb_eArgError, "%%c requires a character"); 656 } 657 c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc); 658 RB_GC_GUARD(tmp); 659 } 660 else { 661 c = NUM2INT(val); 662 n = rb_enc_codelen(c, enc); 663 } 664 if (n <= 0) { 665 rb_raise(rb_eArgError, "invalid character"); 666 } 667 if (!(flags & FWIDTH)) { 668 CHECK(n); 669 rb_enc_mbcput(c, &buf[blen], enc); 670 blen += n; 671 } 672 else if ((flags & FMINUS)) { 673 CHECK(n); 674 rb_enc_mbcput(c, &buf[blen], enc); 675 blen += n; 676 FILL(' ', width-1); 677 } 678 else { 679 FILL(' ', width-1); 680 CHECK(n); 681 rb_enc_mbcput(c, &buf[blen], enc); 682 blen += n; 683 } 684 } 685 break; 686 687 case 's': 688 case 'p': 689 format_s: 690 { 691 VALUE arg = GETARG(); 692 long len, slen; 693 694 if (*p == 'p') arg = rb_inspect(arg); 695 str = rb_obj_as_string(arg); 696 if (OBJ_TAINTED(str)) tainted = 1; 697 len = RSTRING_LEN(str); 698 rb_str_set_len(result, blen); 699 if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { 700 int cr = coderange; 701 scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr); 702 ENC_CODERANGE_SET(result, 703 (cr == ENC_CODERANGE_UNKNOWN ? 704 ENC_CODERANGE_BROKEN : (coderange = cr))); 705 } 706 enc = rb_enc_check(result, str); 707 if (flags&(FPREC|FWIDTH)) { 708 slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc); 709 if (slen < 0) { 710 rb_raise(rb_eArgError, "invalid mbstring sequence"); 711 } 712 if ((flags&FPREC) && (prec < slen)) { 713 char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str), 714 prec, enc); 715 slen = prec; 716 len = p - RSTRING_PTR(str); 717 } 718 /* need to adjust multi-byte string pos */ 719 if ((flags&FWIDTH) && (width > slen)) { 720 width -= (int)slen; 721 if (!(flags&FMINUS)) { 722 CHECK(width); 723 while (width--) { 724 buf[blen++] = ' '; 725 } 726 } 727 CHECK(len); 728 memcpy(&buf[blen], RSTRING_PTR(str), len); 729 RB_GC_GUARD(str); 730 blen += len; 731 if (flags&FMINUS) { 732 CHECK(width); 733 while (width--) { 734 buf[blen++] = ' '; 735 } 736 } 737 rb_enc_associate(result, enc); 738 break; 739 } 740 } 741 PUSH(RSTRING_PTR(str), len); 742 RB_GC_GUARD(str); 743 rb_enc_associate(result, enc); 744 } 745 break; 746 747 case 'd': 748 case 'i': 749 case 'o': 750 case 'x': 751 case 'X': 752 case 'b': 753 case 'B': 754 case 'u': 755 { 756 volatile VALUE val = GETARG(); 757 char fbuf[32], nbuf[64], *s; 758 const char *prefix = 0; 759 int sign = 0, dots = 0; 760 char sc = 0; 761 long v = 0; 762 int base, bignum = 0; 763 int len; 764 765 switch (*p) { 766 case 'd': 767 case 'i': 768 case 'u': 769 sign = 1; break; 770 case 'o': 771 case 'x': 772 case 'X': 773 case 'b': 774 case 'B': 775 if (flags&(FPLUS|FSPACE)) sign = 1; 776 break; 777 } 778 if (flags & FSHARP) { 779 switch (*p) { 780 case 'o': 781 prefix = "0"; break; 782 case 'x': 783 prefix = "0x"; break; 784 case 'X': 785 prefix = "0X"; break; 786 case 'b': 787 prefix = "0b"; break; 788 case 'B': 789 prefix = "0B"; break; 790 } 791 } 792 793 bin_retry: 794 switch (TYPE(val)) { 795 case T_FLOAT: 796 if (FIXABLE(RFLOAT_VALUE(val))) { 797 val = LONG2FIX((long)RFLOAT_VALUE(val)); 798 goto bin_retry; 799 } 800 val = rb_dbl2big(RFLOAT_VALUE(val)); 801 if (FIXNUM_P(val)) goto bin_retry; 802 bignum = 1; 803 break; 804 case T_STRING: 805 val = rb_str_to_inum(val, 0, TRUE); 806 goto bin_retry; 807 case T_BIGNUM: 808 bignum = 1; 809 break; 810 case T_FIXNUM: 811 v = FIX2LONG(val); 812 break; 813 default: 814 val = rb_Integer(val); 815 goto bin_retry; 816 } 817 818 switch (*p) { 819 case 'o': 820 base = 8; break; 821 case 'x': 822 case 'X': 823 base = 16; break; 824 case 'b': 825 case 'B': 826 base = 2; break; 827 case 'u': 828 case 'd': 829 case 'i': 830 default: 831 base = 10; break; 832 } 833 834 if (!bignum) { 835 if (base == 2) { 836 val = rb_int2big(v); 837 goto bin_retry; 838 } 839 if (sign) { 840 char c = *p; 841 if (c == 'i') c = 'd'; /* %d and %i are identical */ 842 if (v < 0) { 843 v = -v; 844 sc = '-'; 845 width--; 846 } 847 else if (flags & FPLUS) { 848 sc = '+'; 849 width--; 850 } 851 else if (flags & FSPACE) { 852 sc = ' '; 853 width--; 854 } 855 snprintf(fbuf, sizeof(fbuf), "%%l%c", c); 856 snprintf(nbuf, sizeof(nbuf), fbuf, v); 857 s = nbuf; 858 } 859 else { 860 s = nbuf; 861 if (v < 0) { 862 dots = 1; 863 } 864 snprintf(fbuf, sizeof(fbuf), "%%l%c", *p == 'X' ? 'x' : *p); 865 snprintf(++s, sizeof(nbuf) - 1, fbuf, v); 866 if (v < 0) { 867 char d = 0; 868 869 s = remove_sign_bits(s, base); 870 switch (base) { 871 case 16: 872 d = 'f'; break; 873 case 8: 874 d = '7'; break; 875 } 876 if (d && *s != d) { 877 *--s = d; 878 } 879 } 880 } 881 len = (int)strlen(s); 882 } 883 else { 884 if (sign) { 885 tmp = rb_big2str(val, base); 886 s = RSTRING_PTR(tmp); 887 if (s[0] == '-') { 888 s++; 889 sc = '-'; 890 width--; 891 } 892 else if (flags & FPLUS) { 893 sc = '+'; 894 width--; 895 } 896 else if (flags & FSPACE) { 897 sc = ' '; 898 width--; 899 } 900 } 901 else { 902 if (!RBIGNUM_SIGN(val)) { 903 val = rb_big_clone(val); 904 rb_big_2comp(val); 905 } 906 tmp = rb_big2str0(val, base, RBIGNUM_SIGN(val)); 907 s = RSTRING_PTR(tmp); 908 if (*s == '-') { 909 dots = 1; 910 if (base == 10) { 911 rb_warning("negative number for %%u specifier"); 912 } 913 s = remove_sign_bits(++s, base); 914 switch (base) { 915 case 16: 916 if (s[0] != 'f') *--s = 'f'; break; 917 case 8: 918 if (s[0] != '7') *--s = '7'; break; 919 case 2: 920 if (s[0] != '1') *--s = '1'; break; 921 } 922 } 923 } 924 len = rb_long2int(RSTRING_END(tmp) - s); 925 } 926 927 if (dots) { 928 prec -= 2; 929 width -= 2; 930 } 931 932 if (*p == 'X') { 933 char *pp = s; 934 int c; 935 while ((c = (int)(unsigned char)*pp) != 0) { 936 *pp = rb_enc_toupper(c, enc); 937 pp++; 938 } 939 } 940 if (prefix && !prefix[1]) { /* octal */ 941 if (dots) { 942 prefix = 0; 943 } 944 else if (len == 1 && *s == '0') { 945 len = 0; 946 if (flags & FPREC) prec--; 947 } 948 else if ((flags & FPREC) && (prec > len)) { 949 prefix = 0; 950 } 951 } 952 else if (len == 1 && *s == '0') { 953 prefix = 0; 954 } 955 if (prefix) { 956 width -= (int)strlen(prefix); 957 } 958 if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) { 959 prec = width; 960 width = 0; 961 } 962 else { 963 if (prec < len) { 964 if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0; 965 prec = len; 966 } 967 width -= prec; 968 } 969 if (!(flags&FMINUS)) { 970 CHECK(width); 971 while (width-- > 0) { 972 buf[blen++] = ' '; 973 } 974 } 975 if (sc) PUSH(&sc, 1); 976 if (prefix) { 977 int plen = (int)strlen(prefix); 978 PUSH(prefix, plen); 979 } 980 CHECK(prec - len); 981 if (dots) PUSH("..", 2); 982 if (!bignum && v < 0) { 983 char c = sign_bits(base, p); 984 while (len < prec--) { 985 buf[blen++] = c; 986 } 987 } 988 else if ((flags & (FMINUS|FPREC)) != FMINUS) { 989 char c; 990 991 if (!sign && bignum && !RBIGNUM_SIGN(val)) 992 c = sign_bits(base, p); 993 else 994 c = '0'; 995 while (len < prec--) { 996 buf[blen++] = c; 997 } 998 } 999 PUSH(s, len); 1000 RB_GC_GUARD(tmp); 1001 CHECK(width); 1002 while (width-- > 0) { 1003 buf[blen++] = ' '; 1004 } 1005 } 1006 break; 1007 1008 case 'f': 1009 case 'g': 1010 case 'G': 1011 case 'e': 1012 case 'E': 1013 case 'a': 1014 case 'A': 1015 { 1016 VALUE val = GETARG(); 1017 double fval; 1018 int i, need = 6; 1019 char fbuf[32]; 1020 1021 fval = RFLOAT_VALUE(rb_Float(val)); 1022 if (isnan(fval) || isinf(fval)) { 1023 const char *expr; 1024 1025 if (isnan(fval)) { 1026 expr = "NaN"; 1027 } 1028 else { 1029 expr = "Inf"; 1030 } 1031 need = (int)strlen(expr); 1032 if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS)) 1033 need++; 1034 if ((flags & FWIDTH) && need < width) 1035 need = width; 1036 1037 CHECK(need + 1); 1038 snprintf(&buf[blen], need + 1, "%*s", need, ""); 1039 if (flags & FMINUS) { 1040 if (!isnan(fval) && fval < 0.0) 1041 buf[blen++] = '-'; 1042 else if (flags & FPLUS) 1043 buf[blen++] = '+'; 1044 else if (flags & FSPACE) 1045 blen++; 1046 memcpy(&buf[blen], expr, strlen(expr)); 1047 } 1048 else { 1049 if (!isnan(fval) && fval < 0.0) 1050 buf[blen + need - strlen(expr) - 1] = '-'; 1051 else if (flags & FPLUS) 1052 buf[blen + need - strlen(expr) - 1] = '+'; 1053 else if ((flags & FSPACE) && need > width) 1054 blen++; 1055 memcpy(&buf[blen + need - strlen(expr)], expr, 1056 strlen(expr)); 1057 } 1058 blen += strlen(&buf[blen]); 1059 break; 1060 } 1061 1062 fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec); 1063 need = 0; 1064 if (*p != 'e' && *p != 'E') { 1065 i = INT_MIN; 1066 frexp(fval, &i); 1067 if (i > 0) 1068 need = BIT_DIGITS(i); 1069 } 1070 need += (flags&FPREC) ? prec : 6; 1071 if ((flags&FWIDTH) && need < width) 1072 need = width; 1073 need += 20; 1074 1075 CHECK(need); 1076 snprintf(&buf[blen], need, fbuf, fval); 1077 blen += strlen(&buf[blen]); 1078 } 1079 break; 1080 } 1081 flags = FNONE; 1082 } 1083 1084 sprint_exit: 1085 RB_GC_GUARD(fmt); 1086 /* XXX - We cannot validate the number of arguments if (digit)$ style used. 1087 */ 1088 if (posarg >= 0 && nextarg < argc) { 1089 const char *mesg = "too many arguments for format string"; 1090 if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg); 1091 if (RTEST(ruby_verbose)) rb_warn("%s", mesg); 1092 } 1093 rb_str_resize(result, blen); 1094 1095 if (tainted) OBJ_TAINT(result); 1096 return result; 1097} 1098 1099static void 1100fmt_setup(char *buf, size_t size, int c, int flags, int width, int prec) 1101{ 1102 char *end = buf + size; 1103 *buf++ = '%'; 1104 if (flags & FSHARP) *buf++ = '#'; 1105 if (flags & FPLUS) *buf++ = '+'; 1106 if (flags & FMINUS) *buf++ = '-'; 1107 if (flags & FZERO) *buf++ = '0'; 1108 if (flags & FSPACE) *buf++ = ' '; 1109 1110 if (flags & FWIDTH) { 1111 snprintf(buf, end - buf, "%d", width); 1112 buf += strlen(buf); 1113 } 1114 1115 if (flags & FPREC) { 1116 snprintf(buf, end - buf, ".%d", prec); 1117 buf += strlen(buf); 1118 } 1119 1120 *buf++ = c; 1121 *buf = '\0'; 1122} 1123 1124#undef FILE 1125#define FILE rb_printf_buffer 1126#define __sbuf rb_printf_sbuf 1127#define __sFILE rb_printf_sfile 1128#undef feof 1129#undef ferror 1130#undef clearerr 1131#undef fileno 1132#if SIZEOF_LONG < SIZEOF_VOIDP 1133# if SIZEOF_LONG_LONG == SIZEOF_VOIDP 1134# define _HAVE_SANE_QUAD_ 1135# define _HAVE_LLP64_ 1136# define quad_t LONG_LONG 1137# define u_quad_t unsigned LONG_LONG 1138# endif 1139#elif SIZEOF_LONG != SIZEOF_LONG_LONG && SIZEOF_LONG_LONG == 8 1140# define _HAVE_SANE_QUAD_ 1141# define quad_t LONG_LONG 1142# define u_quad_t unsigned LONG_LONG 1143#endif 1144#define FLOATING_POINT 1 1145#define BSD__dtoa ruby_dtoa 1146#define BSD__hdtoa ruby_hdtoa 1147#include "vsnprintf.c" 1148 1149typedef struct { 1150 rb_printf_buffer base; 1151 volatile VALUE value; 1152} rb_printf_buffer_extra; 1153 1154static int 1155ruby__sfvwrite(register rb_printf_buffer *fp, register struct __suio *uio) 1156{ 1157 struct __siov *iov; 1158 VALUE result = (VALUE)fp->_bf._base; 1159 char *buf = (char*)fp->_p; 1160 size_t len, n; 1161 size_t blen = buf - RSTRING_PTR(result), bsiz = fp->_w; 1162 1163 if (RBASIC(result)->klass) { 1164 rb_raise(rb_eRuntimeError, "rb_vsprintf reentered"); 1165 } 1166 if ((len = uio->uio_resid) == 0) 1167 return 0; 1168 CHECK(len); 1169 buf += blen; 1170 fp->_w = bsiz; 1171 for (iov = uio->uio_iov; len > 0; ++iov) { 1172 MEMCPY(buf, iov->iov_base, char, n = iov->iov_len); 1173 buf += n; 1174 len -= n; 1175 } 1176 fp->_p = (unsigned char *)buf; 1177 rb_str_set_len(result, buf - RSTRING_PTR(result)); 1178 return 0; 1179} 1180 1181static char * 1182ruby__sfvextra(rb_printf_buffer *fp, size_t valsize, void *valp, long *sz, int sign) 1183{ 1184 VALUE value, result = (VALUE)fp->_bf._base; 1185 rb_encoding *enc; 1186 char *cp; 1187 1188 if (valsize != sizeof(VALUE)) return 0; 1189 value = *(VALUE *)valp; 1190 if (RBASIC(result)->klass) { 1191 rb_raise(rb_eRuntimeError, "rb_vsprintf reentered"); 1192 } 1193 if (sign == '+') { 1194 value = rb_inspect(value); 1195 } 1196 else { 1197 value = rb_obj_as_string(value); 1198 } 1199 enc = rb_enc_compatible(result, value); 1200 if (enc) { 1201 rb_enc_associate(result, enc); 1202 } 1203 else { 1204 enc = rb_enc_get(result); 1205 value = rb_str_conv_enc_opts(value, rb_enc_get(value), enc, 1206 ECONV_UNDEF_REPLACE|ECONV_INVALID_REPLACE, 1207 Qnil); 1208 *(volatile VALUE *)valp = value; 1209 } 1210 StringValueCStr(value); 1211 RSTRING_GETMEM(value, cp, *sz); 1212 ((rb_printf_buffer_extra *)fp)->value = value; 1213 OBJ_INFECT(result, value); 1214 return cp; 1215} 1216 1217VALUE 1218rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap) 1219{ 1220 rb_printf_buffer_extra buffer; 1221#define f buffer.base 1222 VALUE result; 1223 1224 f._flags = __SWR | __SSTR; 1225 f._bf._size = 0; 1226 f._w = 120; 1227 result = rb_str_buf_new(f._w); 1228 if (enc) { 1229 if (rb_enc_mbminlen(enc) > 1) { 1230 /* the implementation deeply depends on plain char */ 1231 rb_raise(rb_eArgError, "cannot construct wchar_t based encoding string: %s", 1232 rb_enc_name(enc)); 1233 } 1234 rb_enc_associate(result, enc); 1235 } 1236 f._bf._base = (unsigned char *)result; 1237 f._p = (unsigned char *)RSTRING_PTR(result); 1238 RBASIC(result)->klass = 0; 1239 f.vwrite = ruby__sfvwrite; 1240 f.vextra = ruby__sfvextra; 1241 buffer.value = 0; 1242 BSD_vfprintf(&f, fmt, ap); 1243 RBASIC(result)->klass = rb_cString; 1244 rb_str_resize(result, (char *)f._p - RSTRING_PTR(result)); 1245#undef f 1246 1247 return result; 1248} 1249 1250VALUE 1251rb_enc_sprintf(rb_encoding *enc, const char *format, ...) 1252{ 1253 VALUE result; 1254 va_list ap; 1255 1256 va_start(ap, format); 1257 result = rb_enc_vsprintf(enc, format, ap); 1258 va_end(ap); 1259 1260 return result; 1261} 1262 1263VALUE 1264rb_vsprintf(const char *fmt, va_list ap) 1265{ 1266 return rb_enc_vsprintf(NULL, fmt, ap); 1267} 1268 1269VALUE 1270rb_sprintf(const char *format, ...) 1271{ 1272 VALUE result; 1273 va_list ap; 1274 1275 va_start(ap, format); 1276 result = rb_vsprintf(format, ap); 1277 va_end(ap); 1278 1279 return result; 1280} 1281 1282VALUE 1283rb_str_vcatf(VALUE str, const char *fmt, va_list ap) 1284{ 1285 rb_printf_buffer_extra buffer; 1286#define f buffer.base 1287 VALUE klass; 1288 1289 StringValue(str); 1290 rb_str_modify(str); 1291 f._flags = __SWR | __SSTR; 1292 f._bf._size = 0; 1293 f._w = rb_str_capacity(str); 1294 f._bf._base = (unsigned char *)str; 1295 f._p = (unsigned char *)RSTRING_END(str); 1296 klass = RBASIC(str)->klass; 1297 RBASIC(str)->klass = 0; 1298 f.vwrite = ruby__sfvwrite; 1299 f.vextra = ruby__sfvextra; 1300 buffer.value = 0; 1301 BSD_vfprintf(&f, fmt, ap); 1302 RBASIC(str)->klass = klass; 1303 rb_str_resize(str, (char *)f._p - RSTRING_PTR(str)); 1304#undef f 1305 1306 return str; 1307} 1308 1309VALUE 1310rb_str_catf(VALUE str, const char *format, ...) 1311{ 1312 va_list ap; 1313 1314 va_start(ap, format); 1315 str = rb_str_vcatf(str, format, ap); 1316 va_end(ap); 1317 1318 return str; 1319} 1320