1#include <ctype.h>
2#include <limits.h>
3#include <stdarg.h>
4#include <stdio.h>
5#include <stdlib.h>
6#include <string.h>
7#include <wchar.h>
8#include <wctype.h>
9
10#include "floatscan.h"
11#include "intscan.h"
12#include "libc.h"
13#include "shgetc.h"
14#include "stdio_impl.h"
15
16#define SIZE_hh -2
17#define SIZE_h -1
18#define SIZE_def 0
19#define SIZE_l 1
20#define SIZE_L 2
21#define SIZE_ll 3
22
23static void store_int(void* dest, int size, unsigned long long i) {
24    if (!dest)
25        return;
26    switch (size) {
27    case SIZE_hh:
28        *(char*)dest = i;
29        break;
30    case SIZE_h:
31        *(short*)dest = i;
32        break;
33    case SIZE_def:
34        *(int*)dest = i;
35        break;
36    case SIZE_l:
37        *(long*)dest = i;
38        break;
39    case SIZE_ll:
40        *(long long*)dest = i;
41        break;
42    }
43}
44
45static void* arg_n(va_list ap, unsigned int n) {
46    void* p;
47    unsigned int i;
48    va_list ap2;
49    va_copy(ap2, ap);
50    for (i = n; i > 1; i--)
51        va_arg(ap2, void*);
52    p = va_arg(ap2, void*);
53    va_end(ap2);
54    return p;
55}
56
57static int in_set(const wchar_t* set, int c) {
58    int j;
59    const wchar_t* p = set;
60    if (*p == '-') {
61        if (c == '-')
62            return 1;
63        p++;
64    } else if (*p == ']') {
65        if (c == ']')
66            return 1;
67        p++;
68    }
69    for (; *p && *p != ']'; p++) {
70        if (*p == '-' && p[1] && p[1] != ']')
71            for (j = p++ [-1]; j < *p; j++)
72                if (c == j)
73                    return 1;
74        if (c == *p)
75            return 1;
76    }
77    return 0;
78}
79
80#if 1
81#undef getwc
82#define getwc(f) ((f)->rpos < (f)->rend && *(f)->rpos < 128 ? *(f)->rpos++ : (getwc)(f))
83
84#undef ungetwc
85#define ungetwc(c, f) ((f)->rend && (c) < 128U ? *--(f)->rpos : ungetwc((c), (f)))
86#endif
87
88int vfwscanf(FILE* restrict f, const wchar_t* restrict fmt, va_list ap) {
89    int width;
90    int size;
91    int alloc;
92    const wchar_t* p;
93    int c = 0, t;
94    char* s = NULL;
95    wchar_t* wcs = NULL;
96    void* dest = NULL;
97    int invert;
98    int matches = 0;
99    off_t pos = 0, cnt;
100    static const char size_pfx[][3] = {"hh", "h", "", "l", "L", "ll"};
101    char tmp[3 * sizeof(int) + 10];
102    const wchar_t* set;
103    size_t i, k = 0u;
104
105    FLOCK(f);
106
107    fwide(f, 1);
108
109    for (p = fmt; *p; p++) {
110
111        alloc = 0;
112
113        if (iswspace(*p)) {
114            while (iswspace(p[1]))
115                p++;
116            while (iswspace((c = getwc(f))))
117                pos++;
118            ungetwc(c, f);
119            continue;
120        }
121        if (*p != '%' || p[1] == '%') {
122            p += *p == '%';
123            c = getwc(f);
124            if (c != *p) {
125                ungetwc(c, f);
126                if (c < 0)
127                    goto input_fail;
128                goto match_fail;
129            }
130            pos++;
131            continue;
132        }
133
134        p++;
135        if (*p == '*') {
136            dest = 0;
137            p++;
138        } else if (iswdigit(*p) && p[1] == '$') {
139            dest = arg_n(ap, *p - '0');
140            p += 2;
141        } else {
142            dest = va_arg(ap, void*);
143        }
144
145        for (width = 0; iswdigit(*p); p++) {
146            width = 10 * width + *p - '0';
147        }
148
149        if (*p == 'm') {
150            alloc = !!dest;
151            p++;
152        } else {
153            alloc = 0;
154        }
155
156        size = SIZE_def;
157        switch (*p++) {
158        case 'h':
159            if (*p == 'h')
160                p++, size = SIZE_hh;
161            else
162                size = SIZE_h;
163            break;
164        case 'l':
165            if (*p == 'l')
166                p++, size = SIZE_ll;
167            else
168                size = SIZE_l;
169            break;
170        case 'j':
171            size = SIZE_ll;
172            break;
173        case 'z':
174        case 't':
175            size = SIZE_l;
176            break;
177        case 'L':
178            size = SIZE_L;
179            break;
180        case 'd':
181        case 'i':
182        case 'o':
183        case 'u':
184        case 'x':
185        case 'a':
186        case 'e':
187        case 'f':
188        case 'g':
189        case 'A':
190        case 'E':
191        case 'F':
192        case 'G':
193        case 'X':
194        case 's':
195        case 'c':
196        case '[':
197        case 'S':
198        case 'C':
199        case 'p':
200        case 'n':
201            p--;
202            break;
203        default:
204            goto fmt_fail;
205        }
206
207        t = *p;
208
209        /* Transform S,C -> ls,lc */
210        if ((t & 0x2f) == 3) {
211            size = SIZE_l;
212            t |= 32;
213        }
214
215        if (t != 'n') {
216            if (t != '[' && (t | 32) != 'c')
217                while (iswspace((c = getwc(f))))
218                    pos++;
219            else
220                c = getwc(f);
221            if (c < 0)
222                goto input_fail;
223            ungetwc(c, f);
224        }
225
226        switch (t) {
227        case 'n':
228            store_int(dest, size, pos);
229            /* do not increment match count, etc! */
230            continue;
231
232        case 's':
233        case 'c':
234        case '[':
235            if (t == 'c') {
236                if (width < 1)
237                    width = 1;
238                invert = 1;
239                set = L"";
240            } else if (t == 's') {
241                invert = 1;
242                set = (const wchar_t[]){' ', '\t', '\n', '\r', 11, 12,
243                                        0x0085, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004,
244                                        0x2005, 0x2006, 0x2008, 0x2009, 0x200a, 0x2028,
245                                        0x2029, 0x205f, 0x3000, 0};
246            } else {
247                if (*++p == '^')
248                    p++, invert = 1;
249                else
250                    invert = 0;
251                set = p;
252                if (*p == ']')
253                    p++;
254                while (*p != ']') {
255                    if (!*p)
256                        goto fmt_fail;
257                    p++;
258                }
259            }
260
261            s = (size == SIZE_def) ? dest : 0;
262            wcs = (size == SIZE_l) ? dest : 0;
263
264            int gotmatch = 0;
265
266            if (width < 1)
267                width = -1;
268
269            i = 0;
270            if (alloc) {
271                k = t == 'c' ? width + 1U : 31;
272                if (size == SIZE_l) {
273                    wcs = malloc(k * sizeof(wchar_t));
274                    if (!wcs)
275                        goto alloc_fail;
276                } else {
277                    s = malloc(k);
278                    if (!s)
279                        goto alloc_fail;
280                }
281            }
282            while (width) {
283                if ((c = getwc(f)) < 0)
284                    break;
285                if (in_set(set, c) == invert)
286                    break;
287                if (wcs) {
288                    wcs[i++] = c;
289                    if (alloc && i == k) {
290                        k += k + 1;
291                        wchar_t* tmp = realloc(wcs, k * sizeof(wchar_t));
292                        if (!tmp)
293                            goto alloc_fail;
294                        wcs = tmp;
295                    }
296                } else if (size != SIZE_l) {
297                    int l = wctomb(s ? s + i : tmp, c);
298                    if (l < 0)
299                        goto input_fail;
300                    i += l;
301                    if (alloc && i > k - 4) {
302                        k += k + 1;
303                        char* tmp = realloc(s, k);
304                        if (!tmp)
305                            goto alloc_fail;
306                        s = tmp;
307                    }
308                }
309                pos++;
310                width -= (width > 0);
311                gotmatch = 1;
312            }
313            if (width) {
314                ungetwc(c, f);
315                if (t == 'c' || !gotmatch)
316                    goto match_fail;
317            }
318
319            if (alloc) {
320                if (size == SIZE_l)
321                    *(wchar_t**)dest = wcs;
322                else
323                    *(char**)dest = s;
324            }
325            if (t != 'c') {
326                if (wcs)
327                    wcs[i] = 0;
328                if (s)
329                    s[i] = 0;
330            }
331            break;
332
333        case 'd':
334        case 'i':
335        case 'o':
336        case 'u':
337        case 'x':
338        case 'a':
339        case 'e':
340        case 'f':
341        case 'g':
342        case 'A':
343        case 'E':
344        case 'F':
345        case 'G':
346        case 'X':
347        case 'p':
348            if (width < 1)
349                width = 0;
350            snprintf(tmp, sizeof tmp, "%.*s%.0d%s%c%%lln", 1 + !dest, "%*", width,
351                     size_pfx[size + 2], t);
352            cnt = 0;
353            if (fscanf(f, tmp, dest ? dest : &cnt, &cnt) == -1)
354                goto input_fail;
355            else if (!cnt)
356                goto match_fail;
357            pos += cnt;
358            break;
359        default:
360            goto fmt_fail;
361        }
362
363        if (dest)
364            matches++;
365    }
366    if (0) {
367    fmt_fail:
368    alloc_fail:
369    input_fail:
370        if (!matches)
371            matches--;
372    match_fail:
373        if (alloc) {
374            free(s);
375            free(wcs);
376        }
377    }
378    FUNLOCK(f);
379    return matches;
380}
381