1/*-
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29/*
30 * Important: This file is used both as a standalone program /usr/bin/printf
31 * and as a builtin for /bin/sh (#define SHELL).
32 */
33
34#ifndef SHELL
35#ifndef lint
36static char const copyright[] =
37"@(#) Copyright (c) 1989, 1993\n\
38	The Regents of the University of California.  All rights reserved.\n";
39#endif /* not lint */
40#endif
41
42#ifndef lint
43#if 0
44static char const sccsid[] = "@(#)printf.c	8.1 (Berkeley) 7/20/93";
45#endif
46static const char rcsid[] =
47  "$FreeBSD$";
48#endif /* not lint */
49
50#include <sys/types.h>
51
52#include <err.h>
53#include <errno.h>
54#include <inttypes.h>
55#include <limits.h>
56#include <locale.h>
57#include <stdio.h>
58#include <stdlib.h>
59#include <string.h>
60#include <unistd.h>
61#include <wchar.h>
62
63#ifdef SHELL
64#define	main printfcmd
65#include "bltin/bltin.h"
66#include "error.h"
67#endif
68
69#define	PF(f, func) do {						\
70	char *b = NULL;							\
71	if (havewidth)							\
72		if (haveprec)						\
73			(void)asprintf(&b, f, fieldwidth, precision, func); \
74		else							\
75			(void)asprintf(&b, f, fieldwidth, func);	\
76	else if (haveprec)						\
77		(void)asprintf(&b, f, precision, func);			\
78	else								\
79		(void)asprintf(&b, f, func);				\
80	if (b) {							\
81		(void)fputs(b, stdout);					\
82		free(b);						\
83	}								\
84} while (0)
85
86static int	 asciicode(void);
87static char	*printf_doformat(char *, int *);
88static int	 escape(char *, int, size_t *);
89static int	 getchr(void);
90static int	 getfloating(long double *, int);
91static int	 getint(int *);
92static int	 getnum(intmax_t *, uintmax_t *, int);
93static const char
94		*getstr(void);
95static char	*mknum(char *, char);
96static void	 usage(void);
97
98static char **gargv;
99
100int
101main(int argc, char *argv[])
102{
103	size_t len;
104	int ch, chopped, end, rval;
105	char *format, *fmt, *start;
106
107#ifndef SHELL
108	(void) setlocale(LC_ALL, "");
109#endif
110#ifdef SHELL
111	optreset = 1; optind = 1; opterr = 0; /* initialize getopt */
112#endif
113	while ((ch = getopt(argc, argv, "")) != -1)
114		switch (ch) {
115		case '?':
116		default:
117			usage();
118			return (1);
119		}
120	argc -= optind;
121	argv += optind;
122
123	if (argc < 1) {
124		usage();
125		return (1);
126	}
127
128#ifdef SHELL
129	INTOFF;
130#endif
131	/*
132	 * Basic algorithm is to scan the format string for conversion
133	 * specifications -- once one is found, find out if the field
134	 * width or precision is a '*'; if it is, gather up value.  Note,
135	 * format strings are reused as necessary to use up the provided
136	 * arguments, arguments of zero/null string are provided to use
137	 * up the format string.
138	 */
139	fmt = format = *argv;
140	chopped = escape(fmt, 1, &len);		/* backslash interpretation */
141	rval = end = 0;
142	gargv = ++argv;
143	for (;;) {
144		start = fmt;
145		while (fmt < format + len) {
146			if (fmt[0] == '%') {
147				fwrite(start, 1, fmt - start, stdout);
148				if (fmt[1] == '%') {
149					/* %% prints a % */
150					putchar('%');
151					fmt += 2;
152				} else {
153					fmt = printf_doformat(fmt, &rval);
154					if (fmt == NULL) {
155#ifdef SHELL
156						INTON;
157#endif
158						return (1);
159					}
160					end = 0;
161				}
162				start = fmt;
163			} else
164				fmt++;
165		}
166
167		if (end == 1) {
168			warnx("missing format character");
169#ifdef SHELL
170			INTON;
171#endif
172			return (1);
173		}
174		fwrite(start, 1, fmt - start, stdout);
175		if (chopped || !*gargv) {
176#ifdef SHELL
177			INTON;
178#endif
179			return (rval);
180		}
181		/* Restart at the beginning of the format string. */
182		fmt = format;
183		end = 1;
184	}
185	/* NOTREACHED */
186}
187
188
189static char *
190printf_doformat(char *start, int *rval)
191{
192	static const char skip1[] = "#'-+ 0";
193	static const char skip2[] = "0123456789";
194	char *fmt;
195	int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
196	char convch, nextch;
197
198	fmt = start + 1;
199	/* skip to field width */
200	fmt += strspn(fmt, skip1);
201	if (*fmt == '*') {
202		if (getint(&fieldwidth))
203			return (NULL);
204		havewidth = 1;
205		++fmt;
206	} else {
207		havewidth = 0;
208
209		/* skip to possible '.', get following precision */
210		fmt += strspn(fmt, skip2);
211	}
212	if (*fmt == '.') {
213		/* precision present? */
214		++fmt;
215		if (*fmt == '*') {
216			if (getint(&precision))
217				return (NULL);
218			haveprec = 1;
219			++fmt;
220		} else {
221			haveprec = 0;
222
223			/* skip to conversion char */
224			fmt += strspn(fmt, skip2);
225		}
226	} else
227		haveprec = 0;
228	if (!*fmt) {
229		warnx("missing format character");
230		return (NULL);
231	}
232
233	/*
234	 * Look for a length modifier.  POSIX doesn't have these, so
235	 * we only support them for floating-point conversions, which
236	 * are extensions.  This is useful because the L modifier can
237	 * be used to gain extra range and precision, while omitting
238	 * it is more likely to produce consistent results on different
239	 * architectures.  This is not so important for integers
240	 * because overflow is the only bad thing that can happen to
241	 * them, but consider the command  printf %a 1.1
242	 */
243	if (*fmt == 'L') {
244		mod_ldbl = 1;
245		fmt++;
246		if (!strchr("aAeEfFgG", *fmt)) {
247			warnx("bad modifier L for %%%c", *fmt);
248			return (NULL);
249		}
250	} else {
251		mod_ldbl = 0;
252	}
253
254	convch = *fmt;
255	nextch = *++fmt;
256	*fmt = '\0';
257	switch (convch) {
258	case 'b': {
259		size_t len;
260		char *p;
261		int getout;
262
263		p = strdup(getstr());
264		if (p == NULL) {
265			warnx("%s", strerror(ENOMEM));
266			return (NULL);
267		}
268		getout = escape(p, 0, &len);
269		*(fmt - 1) = 's';
270		PF(start, p);
271		*(fmt - 1) = 'b';
272		free(p);
273		if (getout)
274			return (fmt);
275		break;
276	}
277	case 'c': {
278		char p;
279
280		p = getchr();
281		PF(start, p);
282		break;
283	}
284	case 's': {
285		const char *p;
286
287		p = getstr();
288		PF(start, p);
289		break;
290	}
291	case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
292		char *f;
293		intmax_t val;
294		uintmax_t uval;
295		int signedconv;
296
297		signedconv = (convch == 'd' || convch == 'i');
298		if ((f = mknum(start, convch)) == NULL)
299			return (NULL);
300		if (getnum(&val, &uval, signedconv))
301			*rval = 1;
302		if (signedconv)
303			PF(f, val);
304		else
305			PF(f, uval);
306		break;
307	}
308	case 'e': case 'E':
309	case 'f': case 'F':
310	case 'g': case 'G':
311	case 'a': case 'A': {
312		long double p;
313
314		if (getfloating(&p, mod_ldbl))
315			*rval = 1;
316		if (mod_ldbl)
317			PF(start, p);
318		else
319			PF(start, (double)p);
320		break;
321	}
322	default:
323		warnx("illegal format character %c", convch);
324		return (NULL);
325	}
326	*fmt = nextch;
327	return (fmt);
328}
329
330static char *
331mknum(char *str, char ch)
332{
333	static char *copy;
334	static size_t copy_size;
335	char *newcopy;
336	size_t len, newlen;
337
338	len = strlen(str) + 2;
339	if (len > copy_size) {
340		newlen = ((len + 1023) >> 10) << 10;
341		if ((newcopy = realloc(copy, newlen)) == NULL)
342		{
343			warnx("%s", strerror(ENOMEM));
344			return (NULL);
345		}
346		copy = newcopy;
347		copy_size = newlen;
348	}
349
350	memmove(copy, str, len - 3);
351	copy[len - 3] = 'j';
352	copy[len - 2] = ch;
353	copy[len - 1] = '\0';
354	return (copy);
355}
356
357static int
358escape(char *fmt, int percent, size_t *len)
359{
360	char *save, *store, c;
361	int value;
362
363	for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
364		if (c != '\\') {
365			*store = c;
366			continue;
367		}
368		switch (*++fmt) {
369		case '\0':		/* EOS, user error */
370			*store = '\\';
371			*++store = '\0';
372			*len = store - save;
373			return (0);
374		case '\\':		/* backslash */
375		case '\'':		/* single quote */
376			*store = *fmt;
377			break;
378		case 'a':		/* bell/alert */
379			*store = '\a';
380			break;
381		case 'b':		/* backspace */
382			*store = '\b';
383			break;
384		case 'c':
385			*store = '\0';
386			*len = store - save;
387			return (1);
388		case 'f':		/* form-feed */
389			*store = '\f';
390			break;
391		case 'n':		/* newline */
392			*store = '\n';
393			break;
394		case 'r':		/* carriage-return */
395			*store = '\r';
396			break;
397		case 't':		/* horizontal tab */
398			*store = '\t';
399			break;
400		case 'v':		/* vertical tab */
401			*store = '\v';
402			break;
403					/* octal constant */
404		case '0': case '1': case '2': case '3':
405		case '4': case '5': case '6': case '7':
406			c = (!percent && *fmt == '0') ? 4 : 3;
407			for (value = 0;
408			    c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
409				value <<= 3;
410				value += *fmt - '0';
411			}
412			--fmt;
413			if (percent && value == '%') {
414				*store++ = '%';
415				*store = '%';
416			} else
417				*store = (char)value;
418			break;
419		default:
420			*store = *fmt;
421			break;
422		}
423	}
424	*store = '\0';
425	*len = store - save;
426	return (0);
427}
428
429static int
430getchr(void)
431{
432	if (!*gargv)
433		return ('\0');
434	return ((int)**gargv++);
435}
436
437static const char *
438getstr(void)
439{
440	if (!*gargv)
441		return ("");
442	return (*gargv++);
443}
444
445static int
446getint(int *ip)
447{
448	intmax_t val;
449	uintmax_t uval;
450	int rval;
451
452	if (getnum(&val, &uval, 1))
453		return (1);
454	rval = 0;
455	if (val < INT_MIN || val > INT_MAX) {
456		warnx("%s: %s", *gargv, strerror(ERANGE));
457		rval = 1;
458	}
459	*ip = (int)val;
460	return (rval);
461}
462
463static int
464getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
465{
466	char *ep;
467	int rval;
468
469	if (!*gargv) {
470		*ip = *uip = 0;
471		return (0);
472	}
473	if (**gargv == '"' || **gargv == '\'') {
474		if (signedconv)
475			*ip = asciicode();
476		else
477			*uip = asciicode();
478		return (0);
479	}
480	rval = 0;
481	errno = 0;
482	if (signedconv)
483		*ip = strtoimax(*gargv, &ep, 0);
484	else
485		*uip = strtoumax(*gargv, &ep, 0);
486	if (ep == *gargv) {
487		warnx("%s: expected numeric value", *gargv);
488		rval = 1;
489	}
490	else if (*ep != '\0') {
491		warnx("%s: not completely converted", *gargv);
492		rval = 1;
493	}
494	if (errno == ERANGE) {
495		warnx("%s: %s", *gargv, strerror(ERANGE));
496		rval = 1;
497	}
498	++gargv;
499	return (rval);
500}
501
502static int
503getfloating(long double *dp, int mod_ldbl)
504{
505	char *ep;
506	int rval;
507
508	if (!*gargv) {
509		*dp = 0.0;
510		return (0);
511	}
512	if (**gargv == '"' || **gargv == '\'') {
513		*dp = asciicode();
514		return (0);
515	}
516	rval = 0;
517	errno = 0;
518	if (mod_ldbl)
519		*dp = strtold(*gargv, &ep);
520	else
521		*dp = strtod(*gargv, &ep);
522	if (ep == *gargv) {
523		warnx("%s: expected numeric value", *gargv);
524		rval = 1;
525	} else if (*ep != '\0') {
526		warnx("%s: not completely converted", *gargv);
527		rval = 1;
528	}
529	if (errno == ERANGE) {
530		warnx("%s: %s", *gargv, strerror(ERANGE));
531		rval = 1;
532	}
533	++gargv;
534	return (rval);
535}
536
537static int
538asciicode(void)
539{
540	int ch;
541	wchar_t wch;
542	mbstate_t mbs;
543
544	ch = (unsigned char)**gargv;
545	if (ch == '\'' || ch == '"') {
546		memset(&mbs, 0, sizeof(mbs));
547		switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
548		case (size_t)-2:
549		case (size_t)-1:
550			wch = (unsigned char)gargv[0][1];
551			break;
552		case 0:
553			wch = 0;
554			break;
555		}
556		ch = wch;
557	}
558	++gargv;
559	return (ch);
560}
561
562static void
563usage(void)
564{
565	(void)fprintf(stderr, "usage: printf format [arguments ...]\n");
566}
567