1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Copyright (C) 2021 Weidm��ller Interface GmbH & Co. KG
4 * Roland Gaudig <roland.gaudig@weidmueller.com>
5 *
6 * Copyright 1999 Dave Cinege
7 * Portions copyright (C) 1990-1996 Free Software Foundation, Inc.
8 *
9 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
10 */
11/*
12 * This file provides a shell printf like format string expansion as required
13 * for the setexpr <name> fmt <format> <value> command.
14 * This source file was mostly taken from the BusyBox project (www.busybox.net)
15 * In contrast to the original sources the output is not written to stdout
16 * anymore but into a char array, which can be used as input for the env_set()
17 * function.
18 */
19/* Usage: printf format [argument...]
20 *
21 * A front end to the printf function that lets it be used from the shell.
22 *
23 * Backslash escapes:
24 *
25 * \" = double quote
26 * \\ = backslash
27 * \a = alert (bell)
28 * \b = backspace
29 * \c = produce no further output
30 * \f = form feed
31 * \n = new line
32 * \r = carriage return
33 * \t = horizontal tab
34 * \v = vertical tab
35 * \0ooo = octal number (ooo is 0 to 3 digits)
36 * \xhhh = hexadecimal number (hhh is 1 to 3 digits)
37 *
38 * Additional directive:
39 *
40 * %b = print an argument string, interpreting backslash escapes
41 *
42 * The 'format' argument is re-used as many times as necessary
43 * to convert all of the given arguments.
44 *
45 * David MacKenzie <djm@gnu.ai.mit.edu>
46 */
47/* 19990508 Busy Boxed! Dave Cinege */
48
49//config:config PRINTF
50//config:	bool "printf (3.8 kb)"
51//config:	default y
52//config:	help
53//config:	printf is used to format and print specified strings.
54//config:	It's similar to 'echo' except it has more options.
55
56//applet:IF_PRINTF(APPLET_NOFORK(printf, printf, BB_DIR_USR_BIN, BB_SUID_DROP, printf))
57
58//kbuild:lib-$(CONFIG_PRINTF) += printf.o
59//kbuild:lib-$(CONFIG_ASH_PRINTF)  += printf.o
60//kbuild:lib-$(CONFIG_HUSH_PRINTF) += printf.o
61
62//usage:#define printf_trivial_usage
63//usage:       "FORMAT [ARG]..."
64//usage:#define printf_full_usage "\n\n"
65//usage:       "Format and print ARG(s) according to FORMAT (a-la C printf)"
66//usage:
67//usage:#define printf_example_usage
68//usage:       "$ printf \"Val=%d\\n\" 5\n"
69//usage:       "Val=5\n"
70
71/* A note on bad input: neither bash 3.2 nor coreutils 6.10 stop on it.
72 * They report it:
73 *  bash: printf: XXX: invalid number
74 *  printf: XXX: expected a numeric value
75 *  bash: printf: 123XXX: invalid number
76 *  printf: 123XXX: value not completely converted
77 * but then they use 0 (or partially converted numeric prefix) as a value
78 * and continue. They exit with 1 in this case.
79 * Both accept insane field width/precision (e.g. %9999999999.9999999999d).
80 * Both print error message and assume 0 if %*.*f width/precision is "bad"
81 *  (but negative numbers are not "bad").
82 * Both accept negative numbers for %u specifier.
83 *
84 * We try to be compatible.
85 */
86
87#include <common.h>
88#include <ctype.h>
89#include <errno.h>
90#include <stddef.h>
91#include <stdio.h>
92#include <stdlib.h>
93
94#define WANT_HEX_ESCAPES 0
95#define PRINT_CONVERSION_ERROR 1
96#define PRINT_TRUNCATED_ERROR 2
97#define PRINT_SIZE_ERROR 4
98
99struct print_inf {
100	char *str;
101	size_t size;
102	size_t offset;
103	unsigned int error;
104};
105
106typedef void (*converter)(const char *arg, void *result);
107
108/**
109 * printf_str() - print formatted into char array with length checks
110 *
111 * This function povides a printf like function for printing into a char array
112 * with checking the boundaries.
113 * Unlike snprintf, all checks are performed inside this function and status
114 * reports are stored inside the print_inf struct. That way, this function can
115 * be used almost as drop-in replacement without needing much code changes.
116 * Unlike snprintf errors are not reported by return value, but inside the
117 * error member of struct print_inf. The output stored inside the struct
118 * print_inf str member shall only be used when the error member is 0.
119 *
120 * @inf: Info structure for print operation
121 * @char: format string with optional arguments
122 */
123static void printf_str(struct print_inf *inf, char *format, ...)
124{
125	va_list args;
126	int i;
127
128	if (!inf)
129		return;
130
131	/* Do not write anything if previous error is pending */
132	if (inf->error)
133		return;
134
135	/* Check if end of receiving buffer is already reached */
136	if (inf->offset >= inf->size) {
137		inf->error |= PRINT_SIZE_ERROR;
138		return;
139	}
140
141	size_t remaining = inf->size - inf->offset;
142
143	va_start(args, format);
144	i = vsnprintf(inf->str + inf->offset, remaining, format, args);
145	va_end(args);
146
147	if (i < 0)
148		inf->error |= PRINT_CONVERSION_ERROR;
149	else if ((unsigned int)i >= remaining)
150		inf->error |= PRINT_TRUNCATED_ERROR;
151	else
152		inf->offset += i;
153}
154
155/**
156 * putchar_str() - Print single character into char array with length checks
157 *
158 * This function provices a putchar like function, which stores the output
159 * into a char array with checking boundaries.
160 *
161 * @inf: Info structure for print operation
162 * @char: Single character to be printed
163 */
164static void putchar_str(struct print_inf *inf, char c)
165{
166	printf_str(inf, "%c", c);
167}
168
169static char process_escape_sequence(const char **ptr)
170{
171	const char *q;
172	unsigned int num_digits;
173	unsigned int n;
174	unsigned int base;
175
176	num_digits = 0;
177	n = 0;
178	base = 8;
179	q = *ptr;
180
181	if (WANT_HEX_ESCAPES && *q == 'x') {
182		++q;
183		base = 16;
184		++num_digits;
185	}
186
187	/* bash requires leading 0 in octal escapes:
188	 * \02 works, \2 does not (prints \ and 2).
189	 * We treat \2 as a valid octal escape sequence.
190	 */
191	do {
192		unsigned int r;
193		unsigned int d = (unsigned char)(*q) - '0';
194#if WANT_HEX_ESCAPES
195		if (d >= 10) {
196			d = (unsigned char)tolower(*q) - 'a';
197			//d += 10;
198			/* The above would map 'A'-'F' and 'a'-'f' to 10-15,
199			 * however, some chars like '@' would map to 9 < base.
200			 * Do not allow that, map invalid chars to N > base:
201			 */
202			if ((int)d >= 0)
203				d += 10;
204		}
205#endif
206		if (d >= base) {
207			if (WANT_HEX_ESCAPES && base == 16) {
208				--num_digits;
209				if (num_digits == 0) {
210					/* \x<bad_char>: return '\',
211					 * leave ptr pointing to x
212					 */
213					return '\\';
214				}
215			}
216			break;
217		}
218
219		r = n * base + d;
220		if (r > 255)
221			break;
222
223		n = r;
224		++q;
225	} while (++num_digits < 3);
226
227	if (num_digits == 0) {
228		/* Not octal or hex escape sequence.
229		 * Is it one-letter one?
230		 */
231		/* bash builtin "echo -e '\ec'" interprets \e as ESC,
232		 * but coreutils "/bin/echo -e '\ec'" does not.
233		 * Manpages tend to support coreutils way.
234		 * Update: coreutils added support for \e on 28 Oct 2009.
235		 */
236		static const char charmap[] = {
237			'a',  'b', 'e', 'f',  'n',  'r',  't',  'v',  '\\', '\0',
238			'\a', '\b', 27, '\f', '\n', '\r', '\t', '\v', '\\', '\\',
239		};
240
241		const char *p = charmap;
242
243		do {
244			if (*p == *q) {
245				q++;
246				break;
247			}
248		} while (*++p != '\0');
249		/* p points to found escape char or NUL,
250		 * advance it and find what it translates to.
251		 * Note that \NUL and unrecognized sequence \z return '\'
252		 * and leave ptr pointing to NUL or z.
253		 */
254		n = p[sizeof(charmap) / 2];
255	}
256
257	*ptr = q;
258
259	return (char)n;
260}
261
262static char *skip_whitespace(const char *s)
263{
264	/* In POSIX/C locale (the only locale we care about: do we REALLY want
265	 * to allow Unicode whitespace in, say, .conf files? nuts!)
266	 * isspace is only these chars: "\t\n\v\f\r" and space.
267	 * "\t\n\v\f\r" happen to have ASCII codes 9,10,11,12,13.
268	 * Use that.
269	 */
270	while (*s == ' ' || (unsigned char)(*s - 9) <= (13 - 9))
271		s++;
272
273	return (char *)s;
274}
275
276/* Like strcpy but can copy overlapping strings. */
277static void overlapping_strcpy(char *dst, const char *src)
278{
279	/* Cheap optimization for dst == src case -
280	 * better to have it here than in many callers.
281	 */
282	if (dst != src) {
283		while ((*dst = *src) != '\0') {
284			dst++;
285			src++;
286		}
287	}
288}
289
290static int multiconvert(const char *arg, void *result, converter convert)
291{
292	if (*arg == '"' || *arg == '\'')
293		sprintf((char *)arg + strlen(arg), "%u", (unsigned char)arg[1]);
294	//errno = 0;
295	convert(arg, result);
296	/* Unlike their Posix counterparts, simple_strtoll and
297	 * simple_strtoull do not set errno
298	 *
299	 * if (errno) {
300	 *	printf("error invalid number '%s'", arg);
301	 *	return 1;
302	 * }
303	 */
304	return 0;
305}
306
307static void conv_strtoull(const char *arg, void *result)
308{
309	/* both coreutils 6.10 and bash 3.2:
310	 * $ printf '%x\n' -2
311	 * fffffffffffffffe
312	 * Mimic that:
313	 */
314	if (arg[0] == '-') {
315		*(unsigned long long *)result = simple_strtoll(arg, NULL, 16);
316		return;
317	}
318	/* Allow leading '+' - simple_strtoull() by itself does not allow it,
319	 * and probably shouldn't (other callers might require purely numeric
320	 * inputs to be allowed.
321	 */
322	if (arg[0] == '+')
323		arg++;
324	*(unsigned long long *)result = simple_strtoull(arg, NULL, 16);
325}
326
327static void conv_strtoll(const char *arg, void *result)
328{
329	if (arg[0] == '+')
330		arg++;
331	*(long long *)result = simple_strtoll(arg, NULL, 16);
332}
333
334/* Callers should check errno to detect errors */
335static unsigned long long my_xstrtoull(const char *arg)
336{
337	unsigned long long result;
338
339	if (multiconvert(arg, &result, conv_strtoull))
340		result = 0;
341	return result;
342}
343
344static long long my_xstrtoll(const char *arg)
345{
346	long long result;
347
348	if (multiconvert(arg, &result, conv_strtoll))
349		result = 0;
350	return result;
351}
352
353/* Handles %b; return 1 if output is to be short-circuited by \c */
354static int print_esc_string(struct print_inf *inf, const char *str)
355{
356	char c;
357
358	while ((c = *str) != '\0') {
359		str++;
360		if (c == '\\') {
361			/* %b also accepts 4-digit octals of the form \0### */
362			if (*str == '0') {
363				if ((unsigned char)(str[1] - '0') < 8) {
364					/* 2nd char is 0..7: skip leading '0' */
365					str++;
366				}
367			} else if (*str == 'c') {
368				return 1;
369			}
370			{
371				/* optimization: don't force arg to be on-stack,
372				 * use another variable for that.
373				 */
374				const char *z = str;
375
376				c = process_escape_sequence(&z);
377				str = z;
378			}
379		}
380		putchar_str(inf, c);
381	}
382
383	return 0;
384}
385
386static void print_direc(struct print_inf *inf, char *format, unsigned int fmt_length,
387			int field_width, int precision,
388			const char *argument)
389{
390	long long llv;
391	char saved;
392	char *have_prec, *have_width;
393
394	saved = format[fmt_length];
395	format[fmt_length] = '\0';
396
397	have_prec = strstr(format, ".*");
398	have_width = strchr(format, '*');
399	if (have_width - 1 == have_prec)
400		have_width = NULL;
401
402	/* multiconvert sets errno = 0, but %s needs it cleared */
403	errno = 0;
404
405	switch (format[fmt_length - 1]) {
406	case 'c':
407		printf_str(inf, format, *argument);
408		break;
409	case 'd':
410	case 'i':
411		llv = my_xstrtoll(skip_whitespace(argument));
412 print_long:
413		if (!have_width) {
414			if (!have_prec)
415				printf_str(inf, format, llv);
416			else
417				printf_str(inf, format, precision, llv);
418		} else {
419			if (!have_prec)
420				printf_str(inf, format, field_width, llv);
421			else
422				printf_str(inf, format, field_width, precision, llv);
423		}
424		break;
425	case 'o':
426	case 'u':
427	case 'x':
428	case 'X':
429		llv = my_xstrtoull(skip_whitespace(argument));
430		/* cheat: unsigned long and long have same width, so... */
431		goto print_long;
432	case 's':
433		/* Are char* and long long the same? */
434		if (sizeof(argument) == sizeof(llv)) {
435			llv = (long long)(ptrdiff_t)argument;
436			goto print_long;
437		} else {
438			/* Hope compiler will optimize it out by moving call
439			 * instruction after the ifs...
440			 */
441			if (!have_width) {
442				if (!have_prec)
443					printf_str(inf, format, argument,
444						   /*unused:*/ argument, argument);
445				else
446					printf_str(inf, format, precision,
447						   argument, /*unused:*/ argument);
448			} else {
449				if (!have_prec)
450					printf_str(inf, format, field_width,
451						   argument, /*unused:*/ argument);
452				else
453					printf_str(inf, format, field_width,
454						   precision, argument);
455			}
456			break;
457		}
458		break;
459	} /* switch */
460
461	format[fmt_length] = saved;
462}
463
464/* Handle params for "%*.*f". Negative numbers are ok (compat). */
465static int get_width_prec(const char *str)
466{
467	long v = simple_strtol(str, NULL, 10);
468
469	/* Unlike its Posix counterpart, simple_strtol does not set errno
470	 *
471	 * if (errno) {
472	 *	printf("error invalid number '%s'", str);
473	 *	v = 0;
474	 * }
475	 */
476	return (int)v;
477}
478
479/* Print the text in FORMAT, using ARGV for arguments to any '%' directives.
480 * Return advanced ARGV.
481 */
482static char **print_formatted(struct print_inf *inf, char *f, char **argv, int *conv_err)
483{
484	char *direc_start;          /* Start of % directive.  */
485	unsigned int direc_length;  /* Length of % directive.  */
486	int field_width;            /* Arg to first '*' */
487	int precision;              /* Arg to second '*' */
488	char **saved_argv = argv;
489
490	for (; *f; ++f) {
491		switch (*f) {
492		case '%':
493			direc_start = f++;
494			direc_length = 1;
495			field_width = 0;
496			precision = 0;
497			if (*f == '%') {
498				putchar_str(inf, '%');
499				break;
500			}
501			if (*f == 'b') {
502				if (*argv) {
503					if (print_esc_string(inf, *argv))
504						return saved_argv; /* causes main() to exit */
505					++argv;
506				}
507				break;
508			}
509			if (*f && strchr("-+ #", *f)) {
510				++f;
511				++direc_length;
512			}
513			if (*f == '*') {
514				++f;
515				++direc_length;
516				if (*argv)
517					field_width = get_width_prec(*argv++);
518			} else {
519				while (isdigit(*f)) {
520					++f;
521					++direc_length;
522				}
523			}
524			if (*f == '.') {
525				++f;
526				++direc_length;
527				if (*f == '*') {
528					++f;
529					++direc_length;
530					if (*argv)
531						precision = get_width_prec(*argv++);
532				} else {
533					while (isdigit(*f)) {
534						++f;
535						++direc_length;
536					}
537				}
538			}
539
540			/* Remove "lLhz" size modifiers, repeatedly.
541			 * bash does not like "%lld", but coreutils
542			 * happily takes even "%Llllhhzhhzd"!
543			 * We are permissive like coreutils
544			 */
545			while ((*f | 0x20) == 'l' || *f == 'h' || *f == 'z')
546				overlapping_strcpy(f, f + 1);
547			/* Add "ll" if integer modifier, then print */
548			{
549				static const char format_chars[] = "diouxXcs";
550				char *p = strchr(format_chars, *f);
551				/* needed - try "printf %" without it */
552				if (!p || *f == '\0') {
553					printf("`%s': invalid format\n", direc_start);
554					/* causes main() to exit with error */
555					return saved_argv - 1;
556				}
557				++direc_length;
558				if (p - format_chars <= 5) {
559					/* it is one of "diouxX" */
560					p = malloc(direc_length + 3);
561					if (!p) {
562						/* exit with error */
563						return saved_argv - 1;
564					}
565					memcpy(p, direc_start, direc_length);
566					p[direc_length + 1] = p[direc_length - 1];
567					p[direc_length - 1] = 'l';
568					p[direc_length] = 'l';
569					//bb_error_msg("<%s>", p);
570					direc_length += 2;
571					direc_start = p;
572				} else {
573					p = NULL;
574				}
575				if (*argv) {
576					print_direc(inf, direc_start, direc_length,
577						    field_width, precision, *argv++);
578				} else {
579					print_direc(inf, direc_start, direc_length,
580						    field_width, precision, "");
581				}
582				*conv_err |= errno;
583				free(p);
584			}
585			break;
586		case '\\':
587			if (*++f == 'c')
588				return saved_argv; /* causes main() to exit */
589			putchar_str(inf, process_escape_sequence((const char **)&f));
590			f--;
591			break;
592		default:
593			putchar_str(inf, *f);
594		}
595	}
596
597	return argv;
598}
599
600/**
601 * printf_setexpr() - Implements the setexpr <name> fmt <format> command
602 *
603 * This function implements the format string evaluation for the
604 * setexpr <name> fmt <format> <value> command.
605 *
606 * @str: Output string of the evaluated expression
607 * @size: Length of @str buffer
608 * @argc: Number of arguments
609 * @argv: Argument list
610 * @return: 0 if OK, 1 on error
611 */
612int printf_setexpr(char *str, size_t size, int argc, char *const *argv)
613{
614	int conv_err;
615	char *format;
616	char **argv2;
617	struct print_inf inf = {
618		.str = str,
619		.size = size,
620		.offset = 0,
621		.error = 0,
622	};
623
624	if (!str || !size)
625		return 1;
626
627	inf.str[0] = '\0';
628
629	format = argv[0];
630	argv2 = (char **)argv + 1;
631
632	conv_err = 0;
633	argv = argv2;
634	/* In case any print_str call raises an error inf.error will be
635	 * set after print_formatted returns.
636	 */
637	argv2 = print_formatted(&inf, format, (char **)argv, &conv_err);
638
639	/* coreutils compat (bash doesn't do this):
640	 *if (*argv)
641	 *	fprintf(stderr, "excess args ignored");
642	 */
643
644	return (argv2 < argv) || /* if true, print_formatted errored out */
645		conv_err || /* print_formatted saw invalid number */
646		inf.error;  /* print_str reported error */
647}
648