1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright 2018 Staysail Systems, Inc. <info@staysail.tech>
5 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
6 * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
7 * Copyright (c) 1989, 1993
8 *	The Regents of the University of California.  All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34/*
35 * Important: This file is used both as a standalone program /usr/bin/printf
36 * and as a builtin for /bin/sh (#define SHELL).
37 */
38
39#include <sys/types.h>
40
41#include <ctype.h>
42#include <err.h>
43#include <errno.h>
44#include <inttypes.h>
45#include <limits.h>
46#include <locale.h>
47#include <stdio.h>
48#include <stdlib.h>
49#include <string.h>
50#include <unistd.h>
51#include <wchar.h>
52
53#ifdef SHELL
54#define	main printfcmd
55#include "bltin/bltin.h"
56#include "options.h"
57#endif
58
59#define	PF(f, func) do {						\
60	if (havewidth)							\
61		if (haveprec)						\
62			(void)printf(f, fieldwidth, precision, func);	\
63		else							\
64			(void)printf(f, fieldwidth, func);		\
65	else if (haveprec)						\
66		(void)printf(f, precision, func);			\
67	else								\
68		(void)printf(f, func);					\
69} while (0)
70
71static int	 asciicode(void);
72static char	*printf_doformat(char *, int *);
73static int	 escape(char *, int, size_t *);
74static int	 getchr(void);
75static int	 getfloating(long double *, int);
76static int	 getint(int *);
77static int	 getnum(intmax_t *, uintmax_t *, int);
78static const char
79		*getstr(void);
80static char	*mknum(char *, char);
81static void	 usage(void);
82
83static const char digits[] = "0123456789";
84
85static char end_fmt[1];
86
87static int  myargc;
88static char **myargv;
89static char **gargv;
90static char **maxargv;
91
92int
93main(int argc, char *argv[])
94{
95	size_t len;
96	int end, rval;
97	char *format, *fmt, *start;
98#ifndef SHELL
99	int ch;
100
101	(void) setlocale(LC_ALL, "");
102#endif
103
104#ifdef SHELL
105	nextopt("");
106	argc -= argptr - argv;
107	argv = argptr;
108#else
109	while ((ch = getopt(argc, argv, "")) != -1)
110		switch (ch) {
111		case '?':
112		default:
113			usage();
114			return (1);
115		}
116	argc -= optind;
117	argv += optind;
118#endif
119
120	if (argc < 1) {
121		usage();
122		return (1);
123	}
124
125#ifdef SHELL
126	INTOFF;
127#endif
128	/*
129	 * Basic algorithm is to scan the format string for conversion
130	 * specifications -- once one is found, find out if the field
131	 * width or precision is a '*'; if it is, gather up value.  Note,
132	 * format strings are reused as necessary to use up the provided
133	 * arguments, arguments of zero/null string are provided to use
134	 * up the format string.
135	 */
136	fmt = format = *argv;
137	escape(fmt, 1, &len);		/* backslash interpretation */
138	rval = end = 0;
139	gargv = ++argv;
140
141	for (;;) {
142		maxargv = gargv;
143
144		myargv = gargv;
145		for (myargc = 0; gargv[myargc]; myargc++)
146			/* nop */;
147		start = fmt;
148		while (fmt < format + len) {
149			if (fmt[0] == '%') {
150				fwrite(start, 1, fmt - start, stdout);
151				if (fmt[1] == '%') {
152					/* %% prints a % */
153					putchar('%');
154					fmt += 2;
155				} else {
156					fmt = printf_doformat(fmt, &rval);
157					if (fmt == NULL || fmt == end_fmt) {
158#ifdef SHELL
159						INTON;
160#endif
161						return (fmt == NULL ? 1 : rval);
162					}
163					end = 0;
164				}
165				start = fmt;
166			} else
167				fmt++;
168			if (gargv > maxargv)
169				maxargv = gargv;
170		}
171		gargv = maxargv;
172
173		if (end == 1) {
174			warnx("missing format character");
175#ifdef SHELL
176			INTON;
177#endif
178			return (1);
179		}
180		fwrite(start, 1, fmt - start, stdout);
181		if (!*gargv) {
182#ifdef SHELL
183			INTON;
184#endif
185			return (rval);
186		}
187		/* Restart at the beginning of the format string. */
188		fmt = format;
189		end = 1;
190	}
191	/* NOTREACHED */
192}
193
194
195static char *
196printf_doformat(char *fmt, int *rval)
197{
198	static const char skip1[] = "#'-+ 0";
199	int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
200	char convch, nextch;
201	char start[strlen(fmt) + 1];
202	char **fargv;
203	char *dptr;
204	int l;
205
206	dptr = start;
207	*dptr++ = '%';
208	*dptr = 0;
209
210	fmt++;
211
212	/* look for "n$" field index specifier */
213	l = strspn(fmt, digits);
214	if ((l > 0) && (fmt[l] == '$')) {
215		int idx = atoi(fmt);
216		if (idx <= myargc) {
217			gargv = &myargv[idx - 1];
218		} else {
219			gargv = &myargv[myargc];
220		}
221		if (gargv > maxargv)
222			maxargv = gargv;
223		fmt += l + 1;
224
225		/* save format argument */
226		fargv = gargv;
227	} else {
228		fargv = NULL;
229	}
230
231	/* skip to field width */
232	while (*fmt && strchr(skip1, *fmt) != NULL) {
233		*dptr++ = *fmt++;
234		*dptr = 0;
235	}
236
237	if (*fmt == '*') {
238
239		fmt++;
240		l = strspn(fmt, digits);
241		if ((l > 0) && (fmt[l] == '$')) {
242			int idx = atoi(fmt);
243			if (fargv == NULL) {
244				warnx("incomplete use of n$");
245				return (NULL);
246			}
247			if (idx <= myargc) {
248				gargv = &myargv[idx - 1];
249			} else {
250				gargv = &myargv[myargc];
251			}
252			fmt += l + 1;
253		} else if (fargv != NULL) {
254			warnx("incomplete use of n$");
255			return (NULL);
256		}
257
258		if (getint(&fieldwidth))
259			return (NULL);
260		if (gargv > maxargv)
261			maxargv = gargv;
262		havewidth = 1;
263
264		*dptr++ = '*';
265		*dptr = 0;
266	} else {
267		havewidth = 0;
268
269		/* skip to possible '.', get following precision */
270		while (isdigit(*fmt)) {
271			*dptr++ = *fmt++;
272			*dptr = 0;
273		}
274	}
275
276	if (*fmt == '.') {
277		/* precision present? */
278		fmt++;
279		*dptr++ = '.';
280
281		if (*fmt == '*') {
282
283			fmt++;
284			l = strspn(fmt, digits);
285			if ((l > 0) && (fmt[l] == '$')) {
286				int idx = atoi(fmt);
287				if (fargv == NULL) {
288					warnx("incomplete use of n$");
289					return (NULL);
290				}
291				if (idx <= myargc) {
292					gargv = &myargv[idx - 1];
293				} else {
294					gargv = &myargv[myargc];
295				}
296				fmt += l + 1;
297			} else if (fargv != NULL) {
298				warnx("incomplete use of n$");
299				return (NULL);
300			}
301
302			if (getint(&precision))
303				return (NULL);
304			if (gargv > maxargv)
305				maxargv = gargv;
306			haveprec = 1;
307			*dptr++ = '*';
308			*dptr = 0;
309		} else {
310			haveprec = 0;
311
312			/* skip to conversion char */
313			while (isdigit(*fmt)) {
314				*dptr++ = *fmt++;
315				*dptr = 0;
316			}
317		}
318	} else
319		haveprec = 0;
320	if (!*fmt) {
321		warnx("missing format character");
322		return (NULL);
323	}
324	*dptr++ = *fmt;
325	*dptr = 0;
326
327	/*
328	 * Look for a length modifier.  POSIX doesn't have these, so
329	 * we only support them for floating-point conversions, which
330	 * are extensions.  This is useful because the L modifier can
331	 * be used to gain extra range and precision, while omitting
332	 * it is more likely to produce consistent results on different
333	 * architectures.  This is not so important for integers
334	 * because overflow is the only bad thing that can happen to
335	 * them, but consider the command  printf %a 1.1
336	 */
337	if (*fmt == 'L') {
338		mod_ldbl = 1;
339		fmt++;
340		if (!strchr("aAeEfFgG", *fmt)) {
341			warnx("bad modifier L for %%%c", *fmt);
342			return (NULL);
343		}
344	} else {
345		mod_ldbl = 0;
346	}
347
348	/* save the current arg offset, and set to the format arg */
349	if (fargv != NULL) {
350		gargv = fargv;
351	}
352
353	convch = *fmt;
354	nextch = *++fmt;
355
356	*fmt = '\0';
357	switch (convch) {
358	case 'b': {
359		size_t len;
360		char *p;
361		int getout;
362
363		/* Convert "b" to "s" for output. */
364		start[strlen(start) - 1] = 's';
365		if ((p = strdup(getstr())) == NULL) {
366			warnx("%s", strerror(ENOMEM));
367			return (NULL);
368		}
369		getout = escape(p, 0, &len);
370		PF(start, p);
371		/* Restore format for next loop. */
372
373		free(p);
374		if (getout)
375			return (end_fmt);
376		break;
377	}
378	case 'c': {
379		char p;
380
381		p = getchr();
382		if (p != '\0')
383			PF(start, p);
384		break;
385	}
386	case 's': {
387		const char *p;
388
389		p = getstr();
390		PF(start, p);
391		break;
392	}
393	case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
394		char *f;
395		intmax_t val;
396		uintmax_t uval;
397		int signedconv;
398
399		signedconv = (convch == 'd' || convch == 'i');
400		if ((f = mknum(start, convch)) == NULL)
401			return (NULL);
402		if (getnum(&val, &uval, signedconv))
403			*rval = 1;
404		if (signedconv)
405			PF(f, val);
406		else
407			PF(f, uval);
408		break;
409	}
410	case 'e': case 'E':
411	case 'f': case 'F':
412	case 'g': case 'G':
413	case 'a': case 'A': {
414		long double p;
415
416		if (getfloating(&p, mod_ldbl))
417			*rval = 1;
418		if (mod_ldbl)
419			PF(start, p);
420		else
421			PF(start, (double)p);
422		break;
423	}
424	default:
425		warnx("illegal format character %c", convch);
426		return (NULL);
427	}
428	*fmt = nextch;
429	/* return the gargv to the next element */
430	return (fmt);
431}
432
433static char *
434mknum(char *str, char ch)
435{
436	static char *copy;
437	static size_t copy_size;
438	char *newcopy;
439	size_t len, newlen;
440
441	len = strlen(str) + 2;
442	if (len > copy_size) {
443		newlen = ((len + 1023) >> 10) << 10;
444		if ((newcopy = realloc(copy, newlen)) == NULL) {
445			warnx("%s", strerror(ENOMEM));
446			return (NULL);
447		}
448		copy = newcopy;
449		copy_size = newlen;
450	}
451
452	memmove(copy, str, len - 3);
453	copy[len - 3] = 'j';
454	copy[len - 2] = ch;
455	copy[len - 1] = '\0';
456	return (copy);
457}
458
459static int
460escape(char *fmt, int percent, size_t *len)
461{
462	char *save, *store, c;
463	int value;
464
465	for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
466		if (c != '\\') {
467			*store = c;
468			continue;
469		}
470		switch (*++fmt) {
471		case '\0':		/* EOS, user error */
472			*store = '\\';
473			*++store = '\0';
474			*len = store - save;
475			return (0);
476		case '\\':		/* backslash */
477		case '\'':		/* single quote */
478			*store = *fmt;
479			break;
480		case 'a':		/* bell/alert */
481			*store = '\a';
482			break;
483		case 'b':		/* backspace */
484			*store = '\b';
485			break;
486		case 'c':
487			if (!percent) {
488				*store = '\0';
489				*len = store - save;
490				return (1);
491			}
492			*store = 'c';
493			break;
494		case 'f':		/* form-feed */
495			*store = '\f';
496			break;
497		case 'n':		/* newline */
498			*store = '\n';
499			break;
500		case 'r':		/* carriage-return */
501			*store = '\r';
502			break;
503		case 't':		/* horizontal tab */
504			*store = '\t';
505			break;
506		case 'v':		/* vertical tab */
507			*store = '\v';
508			break;
509					/* octal constant */
510		case '0': case '1': case '2': case '3':
511		case '4': case '5': case '6': case '7':
512			c = (!percent && *fmt == '0') ? 4 : 3;
513			for (value = 0;
514			    c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
515				value <<= 3;
516				value += *fmt - '0';
517			}
518			--fmt;
519			if (percent && value == '%') {
520				*store++ = '%';
521				*store = '%';
522			} else
523				*store = (char)value;
524			break;
525		default:
526			*store = *fmt;
527			break;
528		}
529	}
530	*store = '\0';
531	*len = store - save;
532	return (0);
533}
534
535static int
536getchr(void)
537{
538	if (!*gargv)
539		return ('\0');
540	return ((int)**gargv++);
541}
542
543static const char *
544getstr(void)
545{
546	if (!*gargv)
547		return ("");
548	return (*gargv++);
549}
550
551static int
552getint(int *ip)
553{
554	intmax_t val;
555	uintmax_t uval;
556	int rval;
557
558	if (getnum(&val, &uval, 1))
559		return (1);
560	rval = 0;
561	if (val < INT_MIN || val > INT_MAX) {
562		warnx("%s: %s", *gargv, strerror(ERANGE));
563		rval = 1;
564	}
565	*ip = (int)val;
566	return (rval);
567}
568
569static int
570getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
571{
572	char *ep;
573	int rval;
574
575	if (!*gargv) {
576		*ip = *uip = 0;
577		return (0);
578	}
579	if (**gargv == '"' || **gargv == '\'') {
580		if (signedconv)
581			*ip = asciicode();
582		else
583			*uip = asciicode();
584		return (0);
585	}
586	rval = 0;
587	errno = 0;
588	if (signedconv)
589		*ip = strtoimax(*gargv, &ep, 0);
590	else
591		*uip = strtoumax(*gargv, &ep, 0);
592	if (ep == *gargv) {
593		warnx("%s: expected numeric value", *gargv);
594		rval = 1;
595	}
596	else if (*ep != '\0') {
597		warnx("%s: not completely converted", *gargv);
598		rval = 1;
599	}
600	if (errno == ERANGE) {
601		warnx("%s: %s", *gargv, strerror(ERANGE));
602		rval = 1;
603	}
604	++gargv;
605	return (rval);
606}
607
608static int
609getfloating(long double *dp, int mod_ldbl)
610{
611	char *ep;
612	int rval;
613
614	if (!*gargv) {
615		*dp = 0.0;
616		return (0);
617	}
618	if (**gargv == '"' || **gargv == '\'') {
619		*dp = asciicode();
620		return (0);
621	}
622	rval = 0;
623	errno = 0;
624	if (mod_ldbl)
625		*dp = strtold(*gargv, &ep);
626	else
627		*dp = strtod(*gargv, &ep);
628	if (ep == *gargv) {
629		warnx("%s: expected numeric value", *gargv);
630		rval = 1;
631	} else if (*ep != '\0') {
632		warnx("%s: not completely converted", *gargv);
633		rval = 1;
634	}
635	if (errno == ERANGE) {
636		warnx("%s: %s", *gargv, strerror(ERANGE));
637		rval = 1;
638	}
639	++gargv;
640	return (rval);
641}
642
643static int
644asciicode(void)
645{
646	int ch;
647	wchar_t wch;
648	mbstate_t mbs;
649
650	ch = (unsigned char)**gargv;
651	if (ch == '\'' || ch == '"') {
652		memset(&mbs, 0, sizeof(mbs));
653		switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
654		case (size_t)-2:
655		case (size_t)-1:
656			wch = (unsigned char)gargv[0][1];
657			break;
658		case 0:
659			wch = 0;
660			break;
661		}
662		ch = wch;
663	}
664	++gargv;
665	return (ch);
666}
667
668static void
669usage(void)
670{
671	(void)fprintf(stderr, "usage: printf format [arguments ...]\n");
672}
673