1#include "stdio_impl.h"
2#include <ctype.h>
3#include <errno.h>
4#include <inttypes.h>
5#include <limits.h>
6#include <stdarg.h>
7#include <string.h>
8#include <wchar.h>
9
10/* Convenient bit representation for modifier flags, which all fall
11 * within 31 codepoints of the space character. */
12
13#define ALT_FORM (1U << ('#' - ' '))
14#define ZERO_PAD (1U << ('0' - ' '))
15#define LEFT_ADJ (1U << ('-' - ' '))
16#define PAD_POS (1U << (' ' - ' '))
17#define MARK_POS (1U << ('+' - ' '))
18#define GROUPED (1U << ('\'' - ' '))
19
20#define FLAGMASK (ALT_FORM | ZERO_PAD | LEFT_ADJ | PAD_POS | MARK_POS | GROUPED)
21
22#if UINT_MAX == ULONG_MAX
23#define LONG_IS_INT
24#endif
25
26#if SIZE_MAX != ULONG_MAX || UINTMAX_MAX != ULLONG_MAX
27#define ODD_TYPES
28#endif
29
30/* State machine to accept length modifiers + conversion specifiers.
31 * Result is 0 on failure, or an argument type to pop on success. */
32
33enum {
34    BARE,
35    LPRE,
36    LLPRE,
37    HPRE,
38    HHPRE,
39    BIGLPRE,
40    ZTPRE,
41    JPRE,
42    STOP,
43    PTR,
44    INT,
45    UINT,
46    ULLONG,
47#ifndef LONG_IS_INT
48    LONG,
49    ULONG,
50#else
51#define LONG INT
52#define ULONG UINT
53#endif
54    SHORT,
55    USHORT,
56    CHAR,
57    UCHAR,
58#ifdef ODD_TYPES
59    LLONG,
60    SIZET,
61    IMAX,
62    UMAX,
63    PDIFF,
64    UIPTR,
65#else
66#define LLONG ULLONG
67#define SIZET ULONG
68#define IMAX LLONG
69#define UMAX ULLONG
70#define PDIFF LONG
71#define UIPTR ULONG
72#endif
73    DBL,
74    LDBL,
75    NOARG,
76    MAXSTATE
77};
78
79#define S(x) [(x) - 'A']
80
81static const unsigned char states[]['z' - 'A' + 1] = {
82    {
83        /* 0: bare types */
84        S('d') = INT, S('i') = INT, S('o') = UINT, S('u') = UINT, S('x') = UINT,
85        S('X') = UINT, S('e') = DBL, S('f') = DBL, S('g') = DBL, S('a') = DBL,
86        S('E') = DBL, S('F') = DBL, S('G') = DBL, S('A') = DBL, S('c') = CHAR,
87        S('C') = INT, S('s') = PTR, S('S') = PTR, S('p') = UIPTR, S('n') = PTR,
88        S('m') = NOARG, S('l') = LPRE, S('h') = HPRE, S('L') = BIGLPRE, S('z') = ZTPRE,
89        S('j') = JPRE, S('t') = ZTPRE,
90    },
91    {
92        /* 1: l-prefixed */
93        S('d') = LONG, S('i') = LONG, S('o') = ULONG, S('u') = ULONG, S('x') = ULONG,
94        S('X') = ULONG, S('c') = INT, S('s') = PTR, S('n') = PTR, S('l') = LLPRE,
95    },
96    {
97        /* 2: ll-prefixed */
98        S('d') = LLONG, S('i') = LLONG, S('o') = ULLONG, S('u') = ULLONG, S('x') = ULLONG,
99        S('X') = ULLONG, S('n') = PTR,
100    },
101    {
102        /* 3: h-prefixed */
103        S('d') = SHORT, S('i') = SHORT, S('o') = USHORT, S('u') = USHORT, S('x') = USHORT,
104        S('X') = USHORT, S('n') = PTR, S('h') = HHPRE,
105    },
106    {
107        /* 4: hh-prefixed */
108        S('d') = CHAR, S('i') = CHAR, S('o') = UCHAR, S('u') = UCHAR, S('x') = UCHAR,
109        S('X') = UCHAR, S('n') = PTR,
110    },
111    {
112        /* 5: L-prefixed */
113        S('e') = LDBL, S('f') = LDBL, S('g') = LDBL, S('a') = LDBL, S('E') = LDBL, S('F') = LDBL,
114        S('G') = LDBL, S('A') = LDBL, S('n') = PTR,
115    },
116    {
117        /* 6: z- or t-prefixed (assumed to be same size) */
118        S('d') = PDIFF, S('i') = PDIFF, S('o') = SIZET, S('u') = SIZET, S('x') = SIZET,
119        S('X') = SIZET, S('n') = PTR,
120    },
121    {
122        /* 7: j-prefixed */
123        S('d') = IMAX, S('i') = IMAX, S('o') = UMAX, S('u') = UMAX, S('x') = UMAX, S('X') = UMAX,
124        S('n') = PTR,
125    }};
126
127#define OOB(x) ((unsigned)(x) - 'A' > 'z' - 'A')
128
129union arg {
130    uintmax_t i;
131    long double f;
132    void* p;
133};
134
135static void pop_arg(union arg* arg, int type, va_list* ap) {
136    /* Give the compiler a hint for optimizing the switch. */
137    if ((unsigned)type > MAXSTATE)
138        return;
139    switch (type) {
140    case PTR:
141        arg->p = va_arg(*ap, void*);
142        break;
143    case INT:
144        arg->i = va_arg(*ap, int);
145        break;
146    case UINT:
147        arg->i = va_arg(*ap, unsigned int);
148#ifndef LONG_IS_INT
149        break;
150    case LONG:
151        arg->i = va_arg(*ap, long);
152        break;
153    case ULONG:
154        arg->i = va_arg(*ap, unsigned long);
155#endif
156        break;
157    case ULLONG:
158        arg->i = va_arg(*ap, unsigned long long);
159        break;
160    case SHORT:
161        arg->i = (short)va_arg(*ap, int);
162        break;
163    case USHORT:
164        arg->i = (unsigned short)va_arg(*ap, int);
165        break;
166    case CHAR:
167        arg->i = (signed char)va_arg(*ap, int);
168        break;
169    case UCHAR:
170        arg->i = (unsigned char)va_arg(*ap, int);
171#ifdef ODD_TYPES
172        break;
173    case LLONG:
174        arg->i = va_arg(*ap, long long);
175        break;
176    case SIZET:
177        arg->i = va_arg(*ap, size_t);
178        break;
179    case IMAX:
180        arg->i = va_arg(*ap, intmax_t);
181        break;
182    case UMAX:
183        arg->i = va_arg(*ap, uintmax_t);
184        break;
185    case PDIFF:
186        arg->i = va_arg(*ap, ptrdiff_t);
187        break;
188    case UIPTR:
189        arg->i = (uintptr_t)va_arg(*ap, void*);
190#endif
191        break;
192    case DBL:
193        arg->f = va_arg(*ap, double);
194        break;
195    case LDBL:
196        arg->f = va_arg(*ap, long double);
197    }
198}
199
200static void out(FILE* f, const wchar_t* s, size_t l) {
201    while (l-- && !(f->flags & F_ERR))
202        fputwc(*s++, f);
203}
204
205static int getint(wchar_t** s) {
206    int i;
207    for (i = 0; iswdigit(**s); (*s)++)
208        i = 10 * i + (**s - '0');
209    return i;
210}
211
212static const char sizeprefix['y' - 'a'] = {['a' - 'a'] = 'L', ['e' - 'a'] = 'L', ['f' - 'a'] = 'L',
213                                           ['g' - 'a'] = 'L', ['d' - 'a'] = 'j', ['i' - 'a'] = 'j',
214                                           ['o' - 'a'] = 'j', ['u' - 'a'] = 'j', ['x' - 'a'] = 'j',
215                                           ['p' - 'a'] = 'j'};
216
217static int wprintf_core(FILE* f, const wchar_t* fmt, va_list* ap, union arg* nl_arg, int* nl_type) {
218    wchar_t *a, *z, *s = (wchar_t *)fmt;
219    unsigned l10n = 0, litpct, fl;
220    int w, p;
221    union arg arg;
222    int argpos;
223    unsigned st, ps;
224    int cnt = 0, l = 0;
225    int i;
226    int t;
227    char* bs;
228    char charfmt[16];
229    wchar_t wc;
230
231    for (;;) {
232        /* Update output count, end loop when fmt is exhausted */
233        if (cnt >= 0) {
234            if (l > INT_MAX - cnt) {
235                if (!ferror(f))
236                    errno = EOVERFLOW;
237                cnt = -1;
238            } else
239                cnt += l;
240        }
241        if (!*s)
242            break;
243
244        /* Handle literal text and %% format specifiers */
245        for (a = s; *s && *s != '%'; s++)
246            ;
247        litpct = wcsspn(s, L"%") / 2; /* Optimize %%%% runs */
248        z = s + litpct;
249        s += 2 * litpct;
250        l = z - a;
251        if (f)
252            out(f, a, l);
253        if (l)
254            continue;
255
256        if (iswdigit(s[1]) && s[2] == '$') {
257            l10n = 1;
258            argpos = s[1] - '0';
259            s += 3;
260        } else {
261            argpos = -1;
262            s++;
263        }
264
265        /* Read modifier flags */
266        for (fl = 0; (unsigned)*s - ' ' < 32 && (FLAGMASK & (1U << (*s - ' '))); s++)
267            fl |= 1U << (*s - ' ');
268
269        /* Read field width */
270        if (*s == '*') {
271            if (iswdigit(s[1]) && s[2] == '$') {
272                l10n = 1;
273                nl_type[s[1] - '0'] = INT;
274                w = nl_arg[s[1] - '0'].i;
275                s += 3;
276            } else if (!l10n) {
277                w = f ? va_arg(*ap, int) : 0;
278                s++;
279            } else
280                return -1;
281            if (w < 0)
282                fl |= LEFT_ADJ, w = -w;
283        } else if ((w = getint(&s)) < 0)
284            return -1;
285
286        /* Read precision */
287        if (*s == '.' && s[1] == '*') {
288            if (isdigit(s[2]) && s[3] == '$') {
289                nl_type[s[2] - '0'] = INT;
290                p = nl_arg[s[2] - '0'].i;
291                s += 4;
292            } else if (!l10n) {
293                p = f ? va_arg(*ap, int) : 0;
294                s += 2;
295            } else
296                return -1;
297        } else if (*s == '.') {
298            s++;
299            p = getint(&s);
300        } else
301            p = -1;
302
303        /* Format specifier state machine */
304        st = 0;
305        do {
306            if (OOB(*s))
307                return -1;
308            ps = st;
309            st = states[st] S(*s++);
310        } while (st - 1 < STOP);
311        if (!st)
312            return -1;
313
314        /* Check validity of argument type (nl/normal) */
315        if (st == NOARG) {
316            if (argpos >= 0)
317                return -1;
318        } else {
319            if (argpos >= 0)
320                nl_type[argpos] = st, arg = nl_arg[argpos];
321            else if (f)
322                pop_arg(&arg, st, ap);
323            else
324                return 0;
325        }
326
327        if (!f)
328            continue;
329        t = s[-1];
330        if (ps && (t & 15) == 3)
331            t &= ~32;
332
333        switch (t) {
334        case 'n':
335            switch (ps) {
336            case BARE:
337                *(int*)arg.p = cnt;
338                break;
339            case LPRE:
340                *(long*)arg.p = cnt;
341                break;
342            case LLPRE:
343                *(long long*)arg.p = cnt;
344                break;
345            case HPRE:
346                *(unsigned short*)arg.p = cnt;
347                break;
348            case HHPRE:
349                *(unsigned char*)arg.p = cnt;
350                break;
351            case ZTPRE:
352                *(size_t*)arg.p = cnt;
353                break;
354            case JPRE:
355                *(uintmax_t*)arg.p = cnt;
356                break;
357            }
358            continue;
359        case 'c':
360            fputwc(btowc(arg.i), f);
361            l = 1;
362            continue;
363        case 'C':
364            fputwc(arg.i, f);
365            l = 1;
366            continue;
367        case 'S':
368            a = arg.p;
369            z = wmemchr(a, 0, p);
370            if (z)
371                p = z - a;
372            if (w < p)
373                w = p;
374            if (!(fl & LEFT_ADJ))
375                fprintf(f, "%*s", w - p, "");
376            out(f, a, p);
377            if ((fl & LEFT_ADJ))
378                fprintf(f, "%*s", w - p, "");
379            l = w;
380            continue;
381        case 'm':
382            arg.p = strerror(errno);
383        case 's':
384            if (!arg.p)
385                arg.p = (char*)"(null)";
386            bs = arg.p;
387            if (p < 0)
388                p = INT_MAX;
389            for (i = l = 0; l < p && (i = mbtowc(&wc, bs, MB_LEN_MAX)) > 0; bs += i, l++)
390                ;
391            if (i < 0)
392                return -1;
393            p = l;
394            if (w < p)
395                w = p;
396            if (!(fl & LEFT_ADJ))
397                fprintf(f, "%*s", w - p, "");
398            bs = arg.p;
399            while (l--) {
400                i = mbtowc(&wc, bs, MB_LEN_MAX);
401                bs += i;
402                fputwc(wc, f);
403            }
404            if ((fl & LEFT_ADJ))
405                fprintf(f, "%*s", w - p, "");
406            l = w;
407            continue;
408        }
409
410        snprintf(charfmt, sizeof charfmt, "%%%s%s%s%s%s*.*%c%c", (fl & ALT_FORM) ? "#" : "",
411                 (fl & MARK_POS) ? "+" : "", (fl & LEFT_ADJ) ? "-" : "", (fl & PAD_POS) ? " " : "",
412                 (fl & ZERO_PAD) ? "0" : "", sizeprefix[(t | 32) - 'a'], t);
413
414        switch (t | 32) {
415        case 'a':
416        case 'e':
417        case 'f':
418        case 'g':
419            l = fprintf(f, charfmt, w, p, arg.f);
420            break;
421        case 'd':
422        case 'i':
423        case 'o':
424        case 'u':
425        case 'x':
426        case 'p':
427            l = fprintf(f, charfmt, w, p, arg.i);
428            break;
429        }
430    }
431
432    if (f)
433        return cnt;
434    if (!l10n)
435        return 0;
436
437    for (i = 1; i <= NL_ARGMAX && nl_type[i]; i++)
438        pop_arg(nl_arg + i, nl_type[i], ap);
439    for (; i <= NL_ARGMAX && !nl_type[i]; i++)
440        ;
441    if (i <= NL_ARGMAX)
442        return -1;
443    return 1;
444}
445
446int vfwprintf(FILE* restrict f, const wchar_t* restrict fmt, va_list ap) {
447    va_list ap2;
448    int nl_type[NL_ARGMAX] = {};
449    union arg nl_arg[NL_ARGMAX];
450    int olderr;
451    int ret;
452
453    /* the copy allows passing va_list* even if va_list is an array */
454    va_copy(ap2, ap);
455    if (wprintf_core(0, fmt, &ap2, nl_arg, nl_type) < 0) {
456        va_end(ap2);
457        return -1;
458    }
459
460    FLOCK(f);
461    fwide(f, 1);
462    olderr = f->flags & F_ERR;
463    f->flags &= ~F_ERR;
464    ret = wprintf_core(f, fmt, &ap2, nl_arg, nl_type);
465    if (f->flags & F_ERR)
466        ret = -1;
467    f->flags |= olderr;
468    FUNLOCK(f);
469    va_end(ap2);
470    return ret;
471}
472