1/*	$NetBSD: printf.c,v 1.54 2021/05/20 02:01:07 christos Exp $	*/
2
3/*
4 * Copyright (c) 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33#ifndef lint
34#if !defined(BUILTIN) && !defined(SHELL)
35__COPYRIGHT("@(#) Copyright (c) 1989, 1993\
36 The Regents of the University of California.  All rights reserved.");
37#endif
38#endif
39
40#ifndef lint
41#if 0
42static char sccsid[] = "@(#)printf.c	8.2 (Berkeley) 3/22/95";
43#else
44__RCSID("$NetBSD: printf.c,v 1.54 2021/05/20 02:01:07 christos Exp $");
45#endif
46#endif /* not lint */
47
48#include <sys/types.h>
49
50#include <ctype.h>
51#include <err.h>
52#include <errno.h>
53#include <inttypes.h>
54#include <limits.h>
55#include <locale.h>
56#include <stdarg.h>
57#include <stdio.h>
58#include <stdlib.h>
59#include <string.h>
60#include <unistd.h>
61
62#ifdef __GNUC__
63#define ESCAPE '\e'
64#else
65#define ESCAPE 033
66#endif
67
68static void	 conv_escape_str(char *, void (*)(int), int);
69static char	*conv_escape(char *, char *, int);
70static char	*conv_expand(const char *);
71static char	 getchr(void);
72static double	 getdouble(void);
73static int	 getwidth(void);
74static intmax_t	 getintmax(void);
75static char	*getstr(void);
76static char	*mklong(const char *, char);
77static intmax_t	 wide_char(const char *);
78static void      check_conversion(const char *, const char *);
79static void	 usage(void);
80
81static void	b_count(int);
82static void	b_output(int);
83static size_t	b_length;
84static char	*b_fmt;
85
86static int	rval;
87static char  **gargv;
88
89#ifdef BUILTIN		/* csh builtin */
90#define main progprintf
91#endif
92
93#ifdef SHELL		/* sh (aka ash) builtin */
94#define main printfcmd
95#include "../../bin/sh/bltin/bltin.h"
96#endif /* SHELL */
97
98#define PF(f, func) { \
99	if (fieldwidth != -1) { \
100		if (precision != -1) \
101			error = printf(f, fieldwidth, precision, func); \
102		else \
103			error = printf(f, fieldwidth, func); \
104	} else if (precision != -1) \
105		error = printf(f, precision, func); \
106	else \
107		error = printf(f, func); \
108}
109
110#define APF(cpp, f, func) { \
111	if (fieldwidth != -1) { \
112		if (precision != -1) \
113			error = asprintf(cpp, f, fieldwidth, precision, func); \
114		else \
115			error = asprintf(cpp, f, fieldwidth, func); \
116	} else if (precision != -1) \
117		error = asprintf(cpp, f, precision, func); \
118	else \
119		error = asprintf(cpp, f, func); \
120}
121
122#define isodigit(c)	((c) >= '0' && (c) <= '7')
123#define octtobin(c)	((c) - '0')
124#define check(c, a)	(c) >= (a) && (c) <= (a) + 5 ? (c) - (a) + 10
125#define hextobin(c)	(check(c, 'a') : check(c, 'A') : (c) - '0')
126#ifdef main
127int main(int, char *[]);
128#endif
129
130int
131main(int argc, char *argv[])
132{
133	char *fmt, *start;
134	int fieldwidth, precision;
135	char nextch;
136	char *format;
137	char ch;
138	int error;
139
140#if !defined(SHELL) && !defined(BUILTIN)
141	(void)setlocale (LC_ALL, "");
142#endif
143
144	rval = 0;	/* clear for builtin versions (avoid holdover) */
145	clearerr(stdout);	/* for the builtin version */
146
147	/*
148	 * printf does not comply with Posix XBD 12.2 - there are no opts,
149	 * not even the -- end of options marker.   Do not run getoot().
150	 */
151	if (argc > 2 && strchr(argv[1], '%') == NULL) {
152		int o;
153
154		/*
155		 * except that if there are multiple args and
156		 * the first (the nominal format) contains no '%'
157		 * conversions (which we will approximate as no '%'
158		 * characters at all, conversions or not) then the
159		 * results are unspecified, and we can do what we
160		 * like.   So in that case, for some backward compat
161		 * to scripts which (stupidly) do:
162		 *	printf -- format args
163		 * process this case the old way.
164		 */
165
166		while ((o = getopt(argc, argv, "")) != -1) {
167			switch (o) {
168			case '?':
169			default:
170				usage();
171				return 1;
172			}
173		}
174		argc -= optind;
175		argv += optind;
176	} else {
177		argc -= 1;	/* drop argv[0] (the program name) */
178		argv += 1;
179	}
180
181	if (argc < 1) {
182		usage();
183		return 1;
184	}
185
186	format = *argv;
187	gargv = ++argv;
188
189#define SKIP1	"#-+ 0'"
190#define SKIP2	"0123456789"
191	do {
192		/*
193		 * Basic algorithm is to scan the format string for conversion
194		 * specifications -- once one is found, find out if the field
195		 * width or precision is a '*'; if it is, gather up value.
196		 * Note, format strings are reused as necessary to use up the
197		 * provided arguments, arguments of zero/null string are
198		 * provided to use up the format string.
199		 */
200
201		/* find next format specification */
202		for (fmt = format; (ch = *fmt++) != '\0';) {
203			if (ch == '\\') {
204				char c_ch;
205				fmt = conv_escape(fmt, &c_ch, 0);
206				putchar(c_ch);
207				continue;
208			}
209			if (ch != '%' || (*fmt == '%' && ++fmt)) {
210				(void)putchar(ch);
211				continue;
212			}
213
214			/*
215			 * Ok - we've found a format specification,
216			 * Save its address for a later printf().
217			 */
218			start = fmt - 1;
219
220			/* skip to field width */
221			fmt += strspn(fmt, SKIP1);
222			if (*fmt == '*') {
223				fmt++;
224				fieldwidth = getwidth();
225			} else {
226				fieldwidth = -1;
227
228				/* skip to possible '.' for precision */
229				fmt += strspn(fmt, SKIP2);
230			}
231
232			if (*fmt == '.') {
233				 /* get following precision */
234				fmt++;
235				if (*fmt == '*') {
236					fmt++;
237					precision = getwidth();
238				} else {
239					precision = -1;
240					fmt += strspn(fmt, SKIP2);
241				}
242			} else
243				precision = -1;
244
245			ch = *fmt;
246			if (!ch) {
247				warnx("%s: missing format character", start);
248				return 1;
249			}
250
251			/*
252			 * null terminate format string to we can use it
253			 * as an argument to printf.
254			 */
255			nextch = fmt[1];
256			fmt[1] = 0;
257
258			switch (ch) {
259
260			case 'B': {
261				const char *p = conv_expand(getstr());
262
263				if (p == NULL)
264					goto out;
265				*fmt = 's';
266				PF(start, p);
267				if (error < 0)
268					goto out;
269				break;
270			}
271			case 'b': {
272				/*
273				 * There has to be a better way to do this,
274				 * but the string we generate might have
275				 * embedded nulls
276				 */
277				static char *a, *t;
278				char *cp = getstr();
279
280				/* Free on entry in case shell longjumped out */
281				if (a != NULL)
282					free(a);
283				a = NULL;
284				if (t != NULL)
285					free(t);
286				t = NULL;
287
288				/* Count number of bytes we want to output */
289				b_length = 0;
290				conv_escape_str(cp, b_count, 0);
291				t = malloc(b_length + 1);
292				if (t == NULL)
293					goto out;
294				(void)memset(t, 'x', b_length);
295				t[b_length] = 0;
296
297				/* Get printf to calculate the lengths */
298				*fmt = 's';
299				APF(&a, start, t);
300				if (error == -1)
301					goto out;
302				b_fmt = a;
303
304				/* Output leading spaces and data bytes */
305				conv_escape_str(cp, b_output, 1);
306
307				/* Add any trailing spaces */
308				printf("%s", b_fmt);
309				break;
310			}
311			case 'c': {
312				char p = getchr();
313
314				PF(start, p);
315				if (error < 0)
316					goto out;
317				break;
318			}
319			case 's': {
320				char *p = getstr();
321
322				PF(start, p);
323				if (error < 0)
324					goto out;
325				break;
326			}
327			case 'd':
328			case 'i': {
329				intmax_t p = getintmax();
330				char *f = mklong(start, ch);
331
332				PF(f, p);
333				if (error < 0)
334					goto out;
335				break;
336			}
337			case 'o':
338			case 'u':
339			case 'x':
340			case 'X': {
341				uintmax_t p = (uintmax_t)getintmax();
342				char *f = mklong(start, ch);
343
344				PF(f, p);
345				if (error < 0)
346					goto out;
347				break;
348			}
349			case 'a':
350			case 'A':
351			case 'e':
352			case 'E':
353			case 'f':
354			case 'F':
355			case 'g':
356			case 'G': {
357				double p = getdouble();
358
359				PF(start, p);
360				if (error < 0)
361					goto out;
362				break;
363			}
364			case '%':
365				/* Don't ask, but this is useful ... */
366				if (fieldwidth == 'N' && precision == 'B')
367					return 0;
368				/* FALLTHROUGH */
369			default:
370				warnx("%s: invalid directive", start);
371				return 1;
372			}
373			*fmt++ = ch;
374			*fmt = nextch;
375			/* escape if a \c was encountered */
376			if (rval & 0x100)
377				goto done;
378		}
379	} while (gargv != argv && *gargv);
380
381  done:
382	(void)fflush(stdout);
383	if (ferror(stdout)) {
384		clearerr(stdout);
385		err(1, "write error");
386	}
387	return rval & ~0x100;
388  out:
389	warn("print failed");
390	return 1;
391}
392
393/* helper functions for conv_escape_str */
394
395static void
396/*ARGSUSED*/
397b_count(int ch)
398{
399	b_length++;
400}
401
402/* Output one converted character for every 'x' in the 'format' */
403
404static void
405b_output(int ch)
406{
407	for (;;) {
408		switch (*b_fmt++) {
409		case 0:
410			b_fmt--;
411			return;
412		case ' ':
413			putchar(' ');
414			break;
415		default:
416			putchar(ch);
417			return;
418		}
419	}
420}
421
422
423/*
424 * Print SysV echo(1) style escape string
425 *	Halts processing string if a \c escape is encountered.
426 */
427static void
428conv_escape_str(char *str, void (*do_putchar)(int), int quiet)
429{
430	int value;
431	int ch;
432	char c;
433
434	while ((ch = *str++) != '\0') {
435		if (ch != '\\') {
436			do_putchar(ch);
437			continue;
438		}
439
440		ch = *str++;
441		if (ch == 'c') {
442			/* \c as in SYSV echo - abort all processing.... */
443			rval |= 0x100;
444			break;
445		}
446
447		/*
448		 * %b string octal constants are not like those in C.
449		 * They start with a \0, and are followed by 0, 1, 2,
450		 * or 3 octal digits.
451		 */
452		if (ch == '0') {
453			int octnum = 0, i;
454			for (i = 0; i < 3; i++) {
455				if (!isdigit((unsigned char)*str) || *str > '7')
456					break;
457				octnum = (octnum << 3) | (*str++ - '0');
458			}
459			do_putchar(octnum);
460			continue;
461		}
462
463		/* \[M][^|-]C as defined by vis(3) */
464		if (ch == 'M' && *str == '-') {
465			do_putchar(0200 | str[1]);
466			str += 2;
467			continue;
468		}
469		if (ch == 'M' && *str == '^') {
470			str++;
471			value = 0200;
472			ch = '^';
473		} else
474			value = 0;
475		if (ch == '^') {
476			ch = *str++;
477			if (ch == '?')
478				value |= 0177;
479			else
480				value |= ch & 037;
481			do_putchar(value);
482			continue;
483		}
484
485		/* Finally test for sequences valid in the format string */
486		str = conv_escape(str - 1, &c, quiet);
487		do_putchar(c);
488	}
489}
490
491/*
492 * Print "standard" escape characters
493 */
494static char *
495conv_escape(char *str, char *conv_ch, int quiet)
496{
497	int value = 0;
498	char ch, *begin;
499	int c;
500
501	ch = *str++;
502
503	switch (ch) {
504	case '\0':
505		if (!quiet)
506			warnx("incomplete escape sequence");
507		rval = 1;
508		value = '\\';
509		--str;
510		break;
511
512	case '0': case '1': case '2': case '3':
513	case '4': case '5': case '6': case '7':
514		str--;
515		for (c = 3; c-- && isodigit(*str); str++) {
516			value <<= 3;
517			value += octtobin(*str);
518		}
519		break;
520
521	case 'x':
522		/*
523		 * Hexadecimal character constants are not required to be
524		 * supported (by SuS v1) because there is no consistent
525		 * way to detect the end of the constant.
526		 * Supporting 2 byte constants is a compromise.
527		 */
528		begin = str;
529		for (c = 2; c-- && isxdigit((unsigned char)*str); str++) {
530			value <<= 4;
531			value += hextobin(*str);
532		}
533		if (str == begin) {
534			if (!quiet)
535				warnx("\\x%s: missing hexadecimal number "
536				    "in escape", begin);
537			rval = 1;
538		}
539		break;
540
541	case '\\':	value = '\\';	break;	/* backslash */
542	case '\'':	value = '\'';	break;	/* single quote */
543	case '"':	value = '"';	break;	/* double quote */
544	case 'a':	value = '\a';	break;	/* alert */
545	case 'b':	value = '\b';	break;	/* backspace */
546	case 'e':	value = ESCAPE;	break;	/* escape */
547	case 'E':	value = ESCAPE;	break;	/* escape */
548	case 'f':	value = '\f';	break;	/* form-feed */
549	case 'n':	value = '\n';	break;	/* newline */
550	case 'r':	value = '\r';	break;	/* carriage-return */
551	case 't':	value = '\t';	break;	/* tab */
552	case 'v':	value = '\v';	break;	/* vertical-tab */
553
554	default:
555		if (!quiet)
556			warnx("unknown escape sequence `\\%c'", ch);
557		rval = 1;
558		value = ch;
559		break;
560	}
561
562	*conv_ch = (char)value;
563	return str;
564}
565
566/* expand a string so that everything is printable */
567
568static char *
569conv_expand(const char *str)
570{
571	static char *conv_str;
572	char *cp;
573	char ch;
574
575	if (conv_str)
576		free(conv_str);
577	/* get a buffer that is definitely large enough.... */
578	conv_str = malloc(4 * strlen(str) + 1);
579	if (!conv_str)
580		return NULL;
581	cp = conv_str;
582
583	while ((ch = *(const char *)str++) != '\0') {
584		switch (ch) {
585		/* Use C escapes for expected control characters */
586		case '\\':	ch = '\\';	break;	/* backslash */
587		case '\'':	ch = '\'';	break;	/* single quote */
588		case '"':	ch = '"';	break;	/* double quote */
589		case '\a':	ch = 'a';	break;	/* alert */
590		case '\b':	ch = 'b';	break;	/* backspace */
591		case ESCAPE:	ch = 'e';	break;	/* escape */
592		case '\f':	ch = 'f';	break;	/* form-feed */
593		case '\n':	ch = 'n';	break;	/* newline */
594		case '\r':	ch = 'r';	break;	/* carriage-return */
595		case '\t':	ch = 't';	break;	/* tab */
596		case '\v':	ch = 'v';	break;	/* vertical-tab */
597		default:
598			/* Copy anything printable */
599			if (isprint((unsigned char)ch)) {
600				*cp++ = ch;
601				continue;
602			}
603			/* Use vis(3) encodings for the rest */
604			*cp++ = '\\';
605			if (ch & 0200) {
606				*cp++ = 'M';
607				ch &= (char)~0200;
608			}
609			if (ch == 0177) {
610				*cp++ = '^';
611				*cp++ = '?';
612				continue;
613			}
614			if (ch < 040) {
615				*cp++ = '^';
616				*cp++ = ch | 0100;
617				continue;
618			}
619			*cp++ = '-';
620			*cp++ = ch;
621			continue;
622		}
623		*cp++ = '\\';
624		*cp++ = ch;
625	}
626
627	*cp = 0;
628	return conv_str;
629}
630
631static char *
632mklong(const char *str, char ch)
633{
634	static char copy[64];
635	size_t len;
636
637	len = strlen(str) + 2;
638	if (len > sizeof copy) {
639		warnx("format \"%s\" too complex", str);
640		len = 4;
641		rval = 1;
642	}
643	(void)memmove(copy, str, len - 3);
644	copy[len - 3] = 'j';
645	copy[len - 2] = ch;
646	copy[len - 1] = '\0';
647	return copy;
648}
649
650static char
651getchr(void)
652{
653	if (!*gargv)
654		return 0;
655	return **gargv++;
656}
657
658static char *
659getstr(void)
660{
661	static char empty[] = "";
662	if (!*gargv)
663		return empty;
664	return *gargv++;
665}
666
667static int
668getwidth(void)
669{
670	unsigned long val;
671	char *s, *ep;
672
673	s = *gargv;
674	if (s == NULL)
675		return 0;
676	gargv++;
677
678	errno = 0;
679	val = strtoul(s, &ep, 0);
680	check_conversion(s, ep);
681
682	/* Arbitrarily 'restrict' field widths to 1Mbyte */
683	if (val > 1 << 20) {
684		warnx("%s: invalid field width", s);
685		return 0;
686	}
687
688	return (int)val;
689}
690
691static intmax_t
692getintmax(void)
693{
694	intmax_t val;
695	char *cp, *ep;
696
697	cp = *gargv;
698	if (cp == NULL)
699		return 0;
700	gargv++;
701
702	if (*cp == '\"' || *cp == '\'')
703		return wide_char(cp);
704
705	errno = 0;
706	val = strtoimax(cp, &ep, 0);
707	check_conversion(cp, ep);
708	return val;
709}
710
711static double
712getdouble(void)
713{
714	double val;
715	char *ep;
716
717	if (!*gargv)
718		return 0.0;
719
720	/* This is a NetBSD extension, not required by POSIX (it is useless) */
721	if (*(ep = *gargv) == '\"' || *ep == '\'')
722		return (double)wide_char(ep);
723
724	errno = 0;
725	val = strtod(*gargv, &ep);
726	check_conversion(*gargv++, ep);
727	return val;
728}
729
730/*
731 * XXX This is just a placeholder for a later version which
732 *     will do mbtowc() on p+1 (and after checking that all of the
733 *     string has been consumed) return that value.
734 *
735 * This (mbtowc) behaviour is required by POSIX (as is the check
736 * that the whole arg is consumed).
737 *
738 * What follows is actually correct if we assume that LC_CTYPE=C
739 * (or something else similar that is a single byte charset).
740 */
741static intmax_t
742wide_char(const char *p)
743{
744	intmax_t ch = (intmax_t)(unsigned char)p[1];
745
746	if (ch != 0 && p[2] != '\0') {
747		warnx("%s: not completely converted", p);
748		rval = 1;
749	}
750
751	return ch;
752}
753
754static void
755check_conversion(const char *s, const char *ep)
756{
757	if (*ep) {
758		if (ep == s)
759			warnx("%s: expected numeric value", s);
760		else
761			warnx("%s: not completely converted", s);
762		rval = 1;
763	} else if (errno == ERANGE) {
764		warnx("%s: %s", s, strerror(ERANGE));
765		rval = 1;
766	}
767}
768
769static void
770usage(void)
771{
772	(void)fprintf(stderr, "Usage: %s format [arg ...]\n", getprogname());
773}
774