1//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
2//
3// This file is distributed under the University of Illinois Open Source
4// License. See LICENSE.TXT for details.
5//
6//===----------------------------------------------------------------------===//
7//
8// Scanf/printf implementation for use in *Sanitizer interceptors.
9// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
10// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
11// with a few common GNU extensions.
12//
13//===----------------------------------------------------------------------===//
14#include <stdarg.h>
15
16static const char *parse_number(const char *p, int *out) {
17  *out = internal_atoll(p);
18  while (*p >= '0' && *p <= '9')
19    ++p;
20  return p;
21}
22
23static const char *maybe_parse_param_index(const char *p, int *out) {
24  // n$
25  if (*p >= '0' && *p <= '9') {
26    int number;
27    const char *q = parse_number(p, &number);
28    CHECK(q);
29    if (*q == '$') {
30      *out = number;
31      p = q + 1;
32    }
33  }
34
35  // Otherwise, do not change p. This will be re-parsed later as the field
36  // width.
37  return p;
38}
39
40static bool char_is_one_of(char c, const char *s) {
41  return !!internal_strchr(s, c);
42}
43
44static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
45  if (char_is_one_of(*p, "jztLq")) {
46    ll[0] = *p;
47    ++p;
48  } else if (*p == 'h') {
49    ll[0] = 'h';
50    ++p;
51    if (*p == 'h') {
52      ll[1] = 'h';
53      ++p;
54    }
55  } else if (*p == 'l') {
56    ll[0] = 'l';
57    ++p;
58    if (*p == 'l') {
59      ll[1] = 'l';
60      ++p;
61    }
62  }
63  return p;
64}
65
66// Returns true if the character is an integer conversion specifier.
67static bool format_is_integer_conv(char c) {
68  return char_is_one_of(c, "diouxXn");
69}
70
71// Returns true if the character is an floating point conversion specifier.
72static bool format_is_float_conv(char c) {
73  return char_is_one_of(c, "aAeEfFgG");
74}
75
76// Returns string output character size for string-like conversions,
77// or 0 if the conversion is invalid.
78static int format_get_char_size(char convSpecifier,
79                                const char lengthModifier[2]) {
80  if (char_is_one_of(convSpecifier, "CS")) {
81    return sizeof(wchar_t);
82  }
83
84  if (char_is_one_of(convSpecifier, "cs[")) {
85    if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
86      return sizeof(wchar_t);
87    else if (lengthModifier[0] == '\0')
88      return sizeof(char);
89  }
90
91  return 0;
92}
93
94enum FormatStoreSize {
95  // Store size not known in advance; can be calculated as wcslen() of the
96  // destination buffer.
97  FSS_WCSLEN = -2,
98  // Store size not known in advance; can be calculated as strlen() of the
99  // destination buffer.
100  FSS_STRLEN = -1,
101  // Invalid conversion specifier.
102  FSS_INVALID = 0
103};
104
105// Returns the memory size of a format directive (if >0), or a value of
106// FormatStoreSize.
107static int format_get_value_size(char convSpecifier,
108                                 const char lengthModifier[2],
109                                 bool promote_float) {
110  if (format_is_integer_conv(convSpecifier)) {
111    switch (lengthModifier[0]) {
112    case 'h':
113      return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
114    case 'l':
115      return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
116    case 'q':
117      return sizeof(long long);
118    case 'L':
119      return sizeof(long long);
120    case 'j':
121      return sizeof(INTMAX_T);
122    case 'z':
123      return sizeof(SIZE_T);
124    case 't':
125      return sizeof(PTRDIFF_T);
126    case 0:
127      return sizeof(int);
128    default:
129      return FSS_INVALID;
130    }
131  }
132
133  if (format_is_float_conv(convSpecifier)) {
134    switch (lengthModifier[0]) {
135    case 'L':
136    case 'q':
137      return sizeof(long double);
138    case 'l':
139      return lengthModifier[1] == 'l' ? sizeof(long double)
140                                           : sizeof(double);
141    case 0:
142      // Printf promotes floats to doubles but scanf does not
143      return promote_float ? sizeof(double) : sizeof(float);
144    default:
145      return FSS_INVALID;
146    }
147  }
148
149  if (convSpecifier == 'p') {
150    if (lengthModifier[0] != 0)
151      return FSS_INVALID;
152    return sizeof(void *);
153  }
154
155  return FSS_INVALID;
156}
157
158struct ScanfDirective {
159  int argIdx; // argument index, or -1 if not specified ("%n$")
160  int fieldWidth;
161  const char *begin;
162  const char *end;
163  bool suppressed; // suppress assignment ("*")
164  bool allocate;   // allocate space ("m")
165  char lengthModifier[2];
166  char convSpecifier;
167  bool maybeGnuMalloc;
168};
169
170// Parse scanf format string. If a valid directive in encountered, it is
171// returned in dir. This function returns the pointer to the first
172// unprocessed character, or 0 in case of error.
173// In case of the end-of-string, a pointer to the closing \0 is returned.
174static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
175                                    ScanfDirective *dir) {
176  internal_memset(dir, 0, sizeof(*dir));
177  dir->argIdx = -1;
178
179  while (*p) {
180    if (*p != '%') {
181      ++p;
182      continue;
183    }
184    dir->begin = p;
185    ++p;
186    // %%
187    if (*p == '%') {
188      ++p;
189      continue;
190    }
191    if (*p == '\0') {
192      return 0;
193    }
194    // %n$
195    p = maybe_parse_param_index(p, &dir->argIdx);
196    CHECK(p);
197    // *
198    if (*p == '*') {
199      dir->suppressed = true;
200      ++p;
201    }
202    // Field width
203    if (*p >= '0' && *p <= '9') {
204      p = parse_number(p, &dir->fieldWidth);
205      CHECK(p);
206      if (dir->fieldWidth <= 0)  // Width if at all must be non-zero
207        return 0;
208    }
209    // m
210    if (*p == 'm') {
211      dir->allocate = true;
212      ++p;
213    }
214    // Length modifier.
215    p = maybe_parse_length_modifier(p, dir->lengthModifier);
216    // Conversion specifier.
217    dir->convSpecifier = *p++;
218    // Consume %[...] expression.
219    if (dir->convSpecifier == '[') {
220      if (*p == '^')
221        ++p;
222      if (*p == ']')
223        ++p;
224      while (*p && *p != ']')
225        ++p;
226      if (*p == 0)
227        return 0; // unexpected end of string
228                  // Consume the closing ']'.
229      ++p;
230    }
231    // This is unfortunately ambiguous between old GNU extension
232    // of %as, %aS and %a[...] and newer POSIX %a followed by
233    // letters s, S or [.
234    if (allowGnuMalloc && dir->convSpecifier == 'a' &&
235        !dir->lengthModifier[0]) {
236      if (*p == 's' || *p == 'S') {
237        dir->maybeGnuMalloc = true;
238        ++p;
239      } else if (*p == '[') {
240        // Watch for %a[h-j%d], if % appears in the
241        // [...] range, then we need to give up, we don't know
242        // if scanf will parse it as POSIX %a [h-j %d ] or
243        // GNU allocation of string with range dh-j plus %.
244        const char *q = p + 1;
245        if (*q == '^')
246          ++q;
247        if (*q == ']')
248          ++q;
249        while (*q && *q != ']' && *q != '%')
250          ++q;
251        if (*q == 0 || *q == '%')
252          return 0;
253        p = q + 1; // Consume the closing ']'.
254        dir->maybeGnuMalloc = true;
255      }
256    }
257    dir->end = p;
258    break;
259  }
260  return p;
261}
262
263static int scanf_get_value_size(ScanfDirective *dir) {
264  if (dir->allocate) {
265    if (!char_is_one_of(dir->convSpecifier, "cCsS["))
266      return FSS_INVALID;
267    return sizeof(char *);
268  }
269
270  if (dir->maybeGnuMalloc) {
271    if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
272      return FSS_INVALID;
273    // This is ambiguous, so check the smaller size of char * (if it is
274    // a GNU extension of %as, %aS or %a[...]) and float (if it is
275    // POSIX %a followed by s, S or [ letters).
276    return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
277  }
278
279  if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
280    bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
281    unsigned charSize =
282        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
283    if (charSize == 0)
284      return FSS_INVALID;
285    if (dir->fieldWidth == 0) {
286      if (!needsTerminator)
287        return charSize;
288      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
289    }
290    return (dir->fieldWidth + needsTerminator) * charSize;
291  }
292
293  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
294}
295
296// Common part of *scanf interceptors.
297// Process format string and va_list, and report all store ranges.
298// Stops when "consuming" n_inputs input items.
299static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
300                         const char *format, va_list aq) {
301  CHECK_GT(n_inputs, 0);
302  const char *p = format;
303
304  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
305
306  while (*p) {
307    ScanfDirective dir;
308    p = scanf_parse_next(p, allowGnuMalloc, &dir);
309    if (!p)
310      break;
311    if (dir.convSpecifier == 0) {
312      // This can only happen at the end of the format string.
313      CHECK_EQ(*p, 0);
314      break;
315    }
316    // Here the directive is valid. Do what it says.
317    if (dir.argIdx != -1) {
318      // Unsupported.
319      break;
320    }
321    if (dir.suppressed)
322      continue;
323    int size = scanf_get_value_size(&dir);
324    if (size == FSS_INVALID) {
325      Report("WARNING: unexpected format specifier in scanf interceptor: "
326        "%.*s\n", dir.end - dir.begin, dir.begin);
327      break;
328    }
329    void *argp = va_arg(aq, void *);
330    if (dir.convSpecifier != 'n')
331      --n_inputs;
332    if (n_inputs < 0)
333      break;
334    if (size == FSS_STRLEN) {
335      size = internal_strlen((const char *)argp) + 1;
336    } else if (size == FSS_WCSLEN) {
337      // FIXME: actually use wcslen() to calculate it.
338      size = 0;
339    }
340    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
341  }
342}
343
344#if SANITIZER_INTERCEPT_PRINTF
345
346struct PrintfDirective {
347  int fieldWidth;
348  int fieldPrecision;
349  int argIdx; // width argument index, or -1 if not specified ("%*n$")
350  int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
351  const char *begin;
352  const char *end;
353  bool starredWidth;
354  bool starredPrecision;
355  char lengthModifier[2];
356  char convSpecifier;
357};
358
359static const char *maybe_parse_number(const char *p, int *out) {
360  if (*p >= '0' && *p <= '9')
361    p = parse_number(p, out);
362  return p;
363}
364
365static const char *maybe_parse_number_or_star(const char *p, int *out,
366                                              bool *star) {
367  if (*p == '*') {
368    *star = true;
369    ++p;
370  } else {
371    *star = false;
372    p = maybe_parse_number(p, out);
373  }
374  return p;
375}
376
377// Parse printf format string. Same as scanf_parse_next.
378static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
379  internal_memset(dir, 0, sizeof(*dir));
380  dir->argIdx = -1;
381  dir->precisionIdx = -1;
382
383  while (*p) {
384    if (*p != '%') {
385      ++p;
386      continue;
387    }
388    dir->begin = p;
389    ++p;
390    // %%
391    if (*p == '%') {
392      ++p;
393      continue;
394    }
395    if (*p == '\0') {
396      return 0;
397    }
398    // %n$
399    p = maybe_parse_param_index(p, &dir->precisionIdx);
400    CHECK(p);
401    // Flags
402    while (char_is_one_of(*p, "'-+ #0")) {
403      ++p;
404    }
405    // Field width
406    p = maybe_parse_number_or_star(p, &dir->fieldWidth,
407                                   &dir->starredWidth);
408    if (!p)
409      return 0;
410    // Precision
411    if (*p == '.') {
412      ++p;
413      // Actual precision is optional (surprise!)
414      p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
415                                     &dir->starredPrecision);
416      if (!p)
417        return 0;
418      // m$
419      if (dir->starredPrecision) {
420        p = maybe_parse_param_index(p, &dir->precisionIdx);
421        CHECK(p);
422      }
423    }
424    // Length modifier.
425    p = maybe_parse_length_modifier(p, dir->lengthModifier);
426    // Conversion specifier.
427    dir->convSpecifier = *p++;
428    dir->end = p;
429    break;
430  }
431  return p;
432}
433
434static int printf_get_value_size(PrintfDirective *dir) {
435  if (dir->convSpecifier == 'm') {
436    return sizeof(char *);
437  }
438
439  if (char_is_one_of(dir->convSpecifier, "cCsS")) {
440    unsigned charSize =
441        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
442    if (charSize == 0)
443      return FSS_INVALID;
444    if (char_is_one_of(dir->convSpecifier, "sS")) {
445      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
446    }
447    return charSize;
448  }
449
450  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
451}
452
453#define SKIP_SCALAR_ARG(aq, convSpecifier, size)                   \
454  do {                                                             \
455    if (format_is_float_conv(convSpecifier)) {                     \
456      switch (size) {                                              \
457      case 8:                                                      \
458        va_arg(*aq, double);                                       \
459        break;                                                     \
460      case 12:                                                     \
461        va_arg(*aq, long double);                                  \
462        break;                                                     \
463      case 16:                                                     \
464        va_arg(*aq, long double);                                  \
465        break;                                                     \
466      default:                                                     \
467        Report("WARNING: unexpected floating-point arg size"       \
468               " in printf interceptor: %d\n", size);              \
469        return;                                                    \
470      }                                                            \
471    } else {                                                       \
472      switch (size) {                                              \
473      case 1:                                                      \
474      case 2:                                                      \
475      case 4:                                                      \
476        va_arg(*aq, u32);                                          \
477        break;                                                     \
478      case 8:                                                      \
479        va_arg(*aq, u64);                                          \
480        break;                                                     \
481      default:                                                     \
482        Report("WARNING: unexpected arg size"                      \
483               " in printf interceptor: %d\n", size);              \
484        return;                                                    \
485      }                                                            \
486    }                                                              \
487  } while (0)
488
489// Common part of *printf interceptors.
490// Process format string and va_list, and report all load ranges.
491static void printf_common(void *ctx, const char *format, va_list aq) {
492  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
493
494  const char *p = format;
495
496  while (*p) {
497    PrintfDirective dir;
498    p = printf_parse_next(p, &dir);
499    if (!p)
500      break;
501    if (dir.convSpecifier == 0) {
502      // This can only happen at the end of the format string.
503      CHECK_EQ(*p, 0);
504      break;
505    }
506    // Here the directive is valid. Do what it says.
507    if (dir.argIdx != -1 || dir.precisionIdx != -1) {
508      // Unsupported.
509      break;
510    }
511    if (dir.starredWidth) {
512      // Dynamic width
513      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
514    }
515    if (dir.starredPrecision) {
516      // Dynamic precision
517      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
518    }
519    int size = printf_get_value_size(&dir);
520    if (size == FSS_INVALID) {
521      Report("WARNING: unexpected format specifier in printf "
522             "interceptor: %.*s\n", dir.end - dir.begin, dir.begin);
523      break;
524    }
525    if (dir.convSpecifier == 'n') {
526      void *argp = va_arg(aq, void *);
527      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
528      continue;
529    } else if (size == FSS_STRLEN) {
530      if (void *argp = va_arg(aq, void *)) {
531        if (dir.starredPrecision) {
532          // FIXME: properly support starred precision for strings.
533          size = 0;
534        } else if (dir.fieldPrecision > 0) {
535          // Won't read more than "precision" symbols.
536          size = internal_strnlen((const char *)argp, dir.fieldPrecision);
537          if (size < dir.fieldPrecision) size++;
538        } else {
539          // Whole string will be accessed.
540          size = internal_strlen((const char *)argp) + 1;
541        }
542        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
543      }
544    } else if (size == FSS_WCSLEN) {
545      if (void *argp = va_arg(aq, void *)) {
546        // FIXME: Properly support wide-character strings (via wcsrtombs).
547        size = 0;
548        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
549      }
550    } else {
551      // Skip non-pointer args
552      SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
553    }
554  }
555}
556
557#endif  // SANITIZER_INTERCEPT_PRINTF
558