1/* $NetBSD: printf.c,v 1.54 2021/05/20 02:01:07 christos Exp $ */ 2 3/* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33#ifndef lint 34#if !defined(BUILTIN) && !defined(SHELL) 35__COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 36 The Regents of the University of California. All rights reserved."); 37#endif 38#endif 39 40#ifndef lint 41#if 0 42static char sccsid[] = "@(#)printf.c 8.2 (Berkeley) 3/22/95"; 43#else 44__RCSID("$NetBSD: printf.c,v 1.54 2021/05/20 02:01:07 christos Exp $"); 45#endif 46#endif /* not lint */ 47 48#include <sys/types.h> 49 50#include <ctype.h> 51#include <err.h> 52#include <errno.h> 53#include <inttypes.h> 54#include <limits.h> 55#include <locale.h> 56#include <stdarg.h> 57#include <stdio.h> 58#include <stdlib.h> 59#include <string.h> 60#include <unistd.h> 61 62#ifdef __GNUC__ 63#define ESCAPE '\e' 64#else 65#define ESCAPE 033 66#endif 67 68static void conv_escape_str(char *, void (*)(int), int); 69static char *conv_escape(char *, char *, int); 70static char *conv_expand(const char *); 71static char getchr(void); 72static double getdouble(void); 73static int getwidth(void); 74static intmax_t getintmax(void); 75static char *getstr(void); 76static char *mklong(const char *, char); 77static intmax_t wide_char(const char *); 78static void check_conversion(const char *, const char *); 79static void usage(void); 80 81static void b_count(int); 82static void b_output(int); 83static size_t b_length; 84static char *b_fmt; 85 86static int rval; 87static char **gargv; 88 89#ifdef BUILTIN /* csh builtin */ 90#define main progprintf 91#endif 92 93#ifdef SHELL /* sh (aka ash) builtin */ 94#define main printfcmd 95#include "../../bin/sh/bltin/bltin.h" 96#endif /* SHELL */ 97 98#define PF(f, func) { \ 99 if (fieldwidth != -1) { \ 100 if (precision != -1) \ 101 error = printf(f, fieldwidth, precision, func); \ 102 else \ 103 error = printf(f, fieldwidth, func); \ 104 } else if (precision != -1) \ 105 error = printf(f, precision, func); \ 106 else \ 107 error = printf(f, func); \ 108} 109 110#define APF(cpp, f, func) { \ 111 if (fieldwidth != -1) { \ 112 if (precision != -1) \ 113 error = asprintf(cpp, f, fieldwidth, precision, func); \ 114 else \ 115 error = asprintf(cpp, f, fieldwidth, func); \ 116 } else if (precision != -1) \ 117 error = asprintf(cpp, f, precision, func); \ 118 else \ 119 error = asprintf(cpp, f, func); \ 120} 121 122#define isodigit(c) ((c) >= '0' && (c) <= '7') 123#define octtobin(c) ((c) - '0') 124#define check(c, a) (c) >= (a) && (c) <= (a) + 5 ? (c) - (a) + 10 125#define hextobin(c) (check(c, 'a') : check(c, 'A') : (c) - '0') 126#ifdef main 127int main(int, char *[]); 128#endif 129 130int 131main(int argc, char *argv[]) 132{ 133 char *fmt, *start; 134 int fieldwidth, precision; 135 char nextch; 136 char *format; 137 char ch; 138 int error; 139 140#if !defined(SHELL) && !defined(BUILTIN) 141 (void)setlocale (LC_ALL, ""); 142#endif 143 144 rval = 0; /* clear for builtin versions (avoid holdover) */ 145 clearerr(stdout); /* for the builtin version */ 146 147 /* 148 * printf does not comply with Posix XBD 12.2 - there are no opts, 149 * not even the -- end of options marker. Do not run getoot(). 150 */ 151 if (argc > 2 && strchr(argv[1], '%') == NULL) { 152 int o; 153 154 /* 155 * except that if there are multiple args and 156 * the first (the nominal format) contains no '%' 157 * conversions (which we will approximate as no '%' 158 * characters at all, conversions or not) then the 159 * results are unspecified, and we can do what we 160 * like. So in that case, for some backward compat 161 * to scripts which (stupidly) do: 162 * printf -- format args 163 * process this case the old way. 164 */ 165 166 while ((o = getopt(argc, argv, "")) != -1) { 167 switch (o) { 168 case '?': 169 default: 170 usage(); 171 return 1; 172 } 173 } 174 argc -= optind; 175 argv += optind; 176 } else { 177 argc -= 1; /* drop argv[0] (the program name) */ 178 argv += 1; 179 } 180 181 if (argc < 1) { 182 usage(); 183 return 1; 184 } 185 186 format = *argv; 187 gargv = ++argv; 188 189#define SKIP1 "#-+ 0'" 190#define SKIP2 "0123456789" 191 do { 192 /* 193 * Basic algorithm is to scan the format string for conversion 194 * specifications -- once one is found, find out if the field 195 * width or precision is a '*'; if it is, gather up value. 196 * Note, format strings are reused as necessary to use up the 197 * provided arguments, arguments of zero/null string are 198 * provided to use up the format string. 199 */ 200 201 /* find next format specification */ 202 for (fmt = format; (ch = *fmt++) != '\0';) { 203 if (ch == '\\') { 204 char c_ch; 205 fmt = conv_escape(fmt, &c_ch, 0); 206 putchar(c_ch); 207 continue; 208 } 209 if (ch != '%' || (*fmt == '%' && ++fmt)) { 210 (void)putchar(ch); 211 continue; 212 } 213 214 /* 215 * Ok - we've found a format specification, 216 * Save its address for a later printf(). 217 */ 218 start = fmt - 1; 219 220 /* skip to field width */ 221 fmt += strspn(fmt, SKIP1); 222 if (*fmt == '*') { 223 fmt++; 224 fieldwidth = getwidth(); 225 } else { 226 fieldwidth = -1; 227 228 /* skip to possible '.' for precision */ 229 fmt += strspn(fmt, SKIP2); 230 } 231 232 if (*fmt == '.') { 233 /* get following precision */ 234 fmt++; 235 if (*fmt == '*') { 236 fmt++; 237 precision = getwidth(); 238 } else { 239 precision = -1; 240 fmt += strspn(fmt, SKIP2); 241 } 242 } else 243 precision = -1; 244 245 ch = *fmt; 246 if (!ch) { 247 warnx("%s: missing format character", start); 248 return 1; 249 } 250 251 /* 252 * null terminate format string to we can use it 253 * as an argument to printf. 254 */ 255 nextch = fmt[1]; 256 fmt[1] = 0; 257 258 switch (ch) { 259 260 case 'B': { 261 const char *p = conv_expand(getstr()); 262 263 if (p == NULL) 264 goto out; 265 *fmt = 's'; 266 PF(start, p); 267 if (error < 0) 268 goto out; 269 break; 270 } 271 case 'b': { 272 /* 273 * There has to be a better way to do this, 274 * but the string we generate might have 275 * embedded nulls 276 */ 277 static char *a, *t; 278 char *cp = getstr(); 279 280 /* Free on entry in case shell longjumped out */ 281 if (a != NULL) 282 free(a); 283 a = NULL; 284 if (t != NULL) 285 free(t); 286 t = NULL; 287 288 /* Count number of bytes we want to output */ 289 b_length = 0; 290 conv_escape_str(cp, b_count, 0); 291 t = malloc(b_length + 1); 292 if (t == NULL) 293 goto out; 294 (void)memset(t, 'x', b_length); 295 t[b_length] = 0; 296 297 /* Get printf to calculate the lengths */ 298 *fmt = 's'; 299 APF(&a, start, t); 300 if (error == -1) 301 goto out; 302 b_fmt = a; 303 304 /* Output leading spaces and data bytes */ 305 conv_escape_str(cp, b_output, 1); 306 307 /* Add any trailing spaces */ 308 printf("%s", b_fmt); 309 break; 310 } 311 case 'c': { 312 char p = getchr(); 313 314 PF(start, p); 315 if (error < 0) 316 goto out; 317 break; 318 } 319 case 's': { 320 char *p = getstr(); 321 322 PF(start, p); 323 if (error < 0) 324 goto out; 325 break; 326 } 327 case 'd': 328 case 'i': { 329 intmax_t p = getintmax(); 330 char *f = mklong(start, ch); 331 332 PF(f, p); 333 if (error < 0) 334 goto out; 335 break; 336 } 337 case 'o': 338 case 'u': 339 case 'x': 340 case 'X': { 341 uintmax_t p = (uintmax_t)getintmax(); 342 char *f = mklong(start, ch); 343 344 PF(f, p); 345 if (error < 0) 346 goto out; 347 break; 348 } 349 case 'a': 350 case 'A': 351 case 'e': 352 case 'E': 353 case 'f': 354 case 'F': 355 case 'g': 356 case 'G': { 357 double p = getdouble(); 358 359 PF(start, p); 360 if (error < 0) 361 goto out; 362 break; 363 } 364 case '%': 365 /* Don't ask, but this is useful ... */ 366 if (fieldwidth == 'N' && precision == 'B') 367 return 0; 368 /* FALLTHROUGH */ 369 default: 370 warnx("%s: invalid directive", start); 371 return 1; 372 } 373 *fmt++ = ch; 374 *fmt = nextch; 375 /* escape if a \c was encountered */ 376 if (rval & 0x100) 377 goto done; 378 } 379 } while (gargv != argv && *gargv); 380 381 done: 382 (void)fflush(stdout); 383 if (ferror(stdout)) { 384 clearerr(stdout); 385 err(1, "write error"); 386 } 387 return rval & ~0x100; 388 out: 389 warn("print failed"); 390 return 1; 391} 392 393/* helper functions for conv_escape_str */ 394 395static void 396/*ARGSUSED*/ 397b_count(int ch) 398{ 399 b_length++; 400} 401 402/* Output one converted character for every 'x' in the 'format' */ 403 404static void 405b_output(int ch) 406{ 407 for (;;) { 408 switch (*b_fmt++) { 409 case 0: 410 b_fmt--; 411 return; 412 case ' ': 413 putchar(' '); 414 break; 415 default: 416 putchar(ch); 417 return; 418 } 419 } 420} 421 422 423/* 424 * Print SysV echo(1) style escape string 425 * Halts processing string if a \c escape is encountered. 426 */ 427static void 428conv_escape_str(char *str, void (*do_putchar)(int), int quiet) 429{ 430 int value; 431 int ch; 432 char c; 433 434 while ((ch = *str++) != '\0') { 435 if (ch != '\\') { 436 do_putchar(ch); 437 continue; 438 } 439 440 ch = *str++; 441 if (ch == 'c') { 442 /* \c as in SYSV echo - abort all processing.... */ 443 rval |= 0x100; 444 break; 445 } 446 447 /* 448 * %b string octal constants are not like those in C. 449 * They start with a \0, and are followed by 0, 1, 2, 450 * or 3 octal digits. 451 */ 452 if (ch == '0') { 453 int octnum = 0, i; 454 for (i = 0; i < 3; i++) { 455 if (!isdigit((unsigned char)*str) || *str > '7') 456 break; 457 octnum = (octnum << 3) | (*str++ - '0'); 458 } 459 do_putchar(octnum); 460 continue; 461 } 462 463 /* \[M][^|-]C as defined by vis(3) */ 464 if (ch == 'M' && *str == '-') { 465 do_putchar(0200 | str[1]); 466 str += 2; 467 continue; 468 } 469 if (ch == 'M' && *str == '^') { 470 str++; 471 value = 0200; 472 ch = '^'; 473 } else 474 value = 0; 475 if (ch == '^') { 476 ch = *str++; 477 if (ch == '?') 478 value |= 0177; 479 else 480 value |= ch & 037; 481 do_putchar(value); 482 continue; 483 } 484 485 /* Finally test for sequences valid in the format string */ 486 str = conv_escape(str - 1, &c, quiet); 487 do_putchar(c); 488 } 489} 490 491/* 492 * Print "standard" escape characters 493 */ 494static char * 495conv_escape(char *str, char *conv_ch, int quiet) 496{ 497 int value = 0; 498 char ch, *begin; 499 int c; 500 501 ch = *str++; 502 503 switch (ch) { 504 case '\0': 505 if (!quiet) 506 warnx("incomplete escape sequence"); 507 rval = 1; 508 value = '\\'; 509 --str; 510 break; 511 512 case '0': case '1': case '2': case '3': 513 case '4': case '5': case '6': case '7': 514 str--; 515 for (c = 3; c-- && isodigit(*str); str++) { 516 value <<= 3; 517 value += octtobin(*str); 518 } 519 break; 520 521 case 'x': 522 /* 523 * Hexadecimal character constants are not required to be 524 * supported (by SuS v1) because there is no consistent 525 * way to detect the end of the constant. 526 * Supporting 2 byte constants is a compromise. 527 */ 528 begin = str; 529 for (c = 2; c-- && isxdigit((unsigned char)*str); str++) { 530 value <<= 4; 531 value += hextobin(*str); 532 } 533 if (str == begin) { 534 if (!quiet) 535 warnx("\\x%s: missing hexadecimal number " 536 "in escape", begin); 537 rval = 1; 538 } 539 break; 540 541 case '\\': value = '\\'; break; /* backslash */ 542 case '\'': value = '\''; break; /* single quote */ 543 case '"': value = '"'; break; /* double quote */ 544 case 'a': value = '\a'; break; /* alert */ 545 case 'b': value = '\b'; break; /* backspace */ 546 case 'e': value = ESCAPE; break; /* escape */ 547 case 'E': value = ESCAPE; break; /* escape */ 548 case 'f': value = '\f'; break; /* form-feed */ 549 case 'n': value = '\n'; break; /* newline */ 550 case 'r': value = '\r'; break; /* carriage-return */ 551 case 't': value = '\t'; break; /* tab */ 552 case 'v': value = '\v'; break; /* vertical-tab */ 553 554 default: 555 if (!quiet) 556 warnx("unknown escape sequence `\\%c'", ch); 557 rval = 1; 558 value = ch; 559 break; 560 } 561 562 *conv_ch = (char)value; 563 return str; 564} 565 566/* expand a string so that everything is printable */ 567 568static char * 569conv_expand(const char *str) 570{ 571 static char *conv_str; 572 char *cp; 573 char ch; 574 575 if (conv_str) 576 free(conv_str); 577 /* get a buffer that is definitely large enough.... */ 578 conv_str = malloc(4 * strlen(str) + 1); 579 if (!conv_str) 580 return NULL; 581 cp = conv_str; 582 583 while ((ch = *(const char *)str++) != '\0') { 584 switch (ch) { 585 /* Use C escapes for expected control characters */ 586 case '\\': ch = '\\'; break; /* backslash */ 587 case '\'': ch = '\''; break; /* single quote */ 588 case '"': ch = '"'; break; /* double quote */ 589 case '\a': ch = 'a'; break; /* alert */ 590 case '\b': ch = 'b'; break; /* backspace */ 591 case ESCAPE: ch = 'e'; break; /* escape */ 592 case '\f': ch = 'f'; break; /* form-feed */ 593 case '\n': ch = 'n'; break; /* newline */ 594 case '\r': ch = 'r'; break; /* carriage-return */ 595 case '\t': ch = 't'; break; /* tab */ 596 case '\v': ch = 'v'; break; /* vertical-tab */ 597 default: 598 /* Copy anything printable */ 599 if (isprint((unsigned char)ch)) { 600 *cp++ = ch; 601 continue; 602 } 603 /* Use vis(3) encodings for the rest */ 604 *cp++ = '\\'; 605 if (ch & 0200) { 606 *cp++ = 'M'; 607 ch &= (char)~0200; 608 } 609 if (ch == 0177) { 610 *cp++ = '^'; 611 *cp++ = '?'; 612 continue; 613 } 614 if (ch < 040) { 615 *cp++ = '^'; 616 *cp++ = ch | 0100; 617 continue; 618 } 619 *cp++ = '-'; 620 *cp++ = ch; 621 continue; 622 } 623 *cp++ = '\\'; 624 *cp++ = ch; 625 } 626 627 *cp = 0; 628 return conv_str; 629} 630 631static char * 632mklong(const char *str, char ch) 633{ 634 static char copy[64]; 635 size_t len; 636 637 len = strlen(str) + 2; 638 if (len > sizeof copy) { 639 warnx("format \"%s\" too complex", str); 640 len = 4; 641 rval = 1; 642 } 643 (void)memmove(copy, str, len - 3); 644 copy[len - 3] = 'j'; 645 copy[len - 2] = ch; 646 copy[len - 1] = '\0'; 647 return copy; 648} 649 650static char 651getchr(void) 652{ 653 if (!*gargv) 654 return 0; 655 return **gargv++; 656} 657 658static char * 659getstr(void) 660{ 661 static char empty[] = ""; 662 if (!*gargv) 663 return empty; 664 return *gargv++; 665} 666 667static int 668getwidth(void) 669{ 670 unsigned long val; 671 char *s, *ep; 672 673 s = *gargv; 674 if (s == NULL) 675 return 0; 676 gargv++; 677 678 errno = 0; 679 val = strtoul(s, &ep, 0); 680 check_conversion(s, ep); 681 682 /* Arbitrarily 'restrict' field widths to 1Mbyte */ 683 if (val > 1 << 20) { 684 warnx("%s: invalid field width", s); 685 return 0; 686 } 687 688 return (int)val; 689} 690 691static intmax_t 692getintmax(void) 693{ 694 intmax_t val; 695 char *cp, *ep; 696 697 cp = *gargv; 698 if (cp == NULL) 699 return 0; 700 gargv++; 701 702 if (*cp == '\"' || *cp == '\'') 703 return wide_char(cp); 704 705 errno = 0; 706 val = strtoimax(cp, &ep, 0); 707 check_conversion(cp, ep); 708 return val; 709} 710 711static double 712getdouble(void) 713{ 714 double val; 715 char *ep; 716 717 if (!*gargv) 718 return 0.0; 719 720 /* This is a NetBSD extension, not required by POSIX (it is useless) */ 721 if (*(ep = *gargv) == '\"' || *ep == '\'') 722 return (double)wide_char(ep); 723 724 errno = 0; 725 val = strtod(*gargv, &ep); 726 check_conversion(*gargv++, ep); 727 return val; 728} 729 730/* 731 * XXX This is just a placeholder for a later version which 732 * will do mbtowc() on p+1 (and after checking that all of the 733 * string has been consumed) return that value. 734 * 735 * This (mbtowc) behaviour is required by POSIX (as is the check 736 * that the whole arg is consumed). 737 * 738 * What follows is actually correct if we assume that LC_CTYPE=C 739 * (or something else similar that is a single byte charset). 740 */ 741static intmax_t 742wide_char(const char *p) 743{ 744 intmax_t ch = (intmax_t)(unsigned char)p[1]; 745 746 if (ch != 0 && p[2] != '\0') { 747 warnx("%s: not completely converted", p); 748 rval = 1; 749 } 750 751 return ch; 752} 753 754static void 755check_conversion(const char *s, const char *ep) 756{ 757 if (*ep) { 758 if (ep == s) 759 warnx("%s: expected numeric value", s); 760 else 761 warnx("%s: not completely converted", s); 762 rval = 1; 763 } else if (errno == ERANGE) { 764 warnx("%s: %s", s, strerror(ERANGE)); 765 rval = 1; 766 } 767} 768 769static void 770usage(void) 771{ 772 (void)fprintf(stderr, "Usage: %s format [arg ...]\n", getprogname()); 773} 774