1//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
2//
3// This file is distributed under the University of Illinois Open Source
4// License. See LICENSE.TXT for details.
5//
6//===----------------------------------------------------------------------===//
7//
8// Scanf/printf implementation for use in *Sanitizer interceptors.
9// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
10// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
11// with a few common GNU extensions.
12//
13//===----------------------------------------------------------------------===//
14
15#include <stdarg.h>
16
17static const char *parse_number(const char *p, int *out) {
18  *out = internal_atoll(p);
19  while (*p >= '0' && *p <= '9')
20    ++p;
21  return p;
22}
23
24static const char *maybe_parse_param_index(const char *p, int *out) {
25  // n$
26  if (*p >= '0' && *p <= '9') {
27    int number;
28    const char *q = parse_number(p, &number);
29    CHECK(q);
30    if (*q == '$') {
31      *out = number;
32      p = q + 1;
33    }
34  }
35
36  // Otherwise, do not change p. This will be re-parsed later as the field
37  // width.
38  return p;
39}
40
41static bool char_is_one_of(char c, const char *s) {
42  return !!internal_strchr(s, c);
43}
44
45static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
46  if (char_is_one_of(*p, "jztLq")) {
47    ll[0] = *p;
48    ++p;
49  } else if (*p == 'h') {
50    ll[0] = 'h';
51    ++p;
52    if (*p == 'h') {
53      ll[1] = 'h';
54      ++p;
55    }
56  } else if (*p == 'l') {
57    ll[0] = 'l';
58    ++p;
59    if (*p == 'l') {
60      ll[1] = 'l';
61      ++p;
62    }
63  }
64  return p;
65}
66
67// Returns true if the character is an integer conversion specifier.
68static bool format_is_integer_conv(char c) {
69  return char_is_one_of(c, "diouxXn");
70}
71
72// Returns true if the character is an floating point conversion specifier.
73static bool format_is_float_conv(char c) {
74  return char_is_one_of(c, "aAeEfFgG");
75}
76
77// Returns string output character size for string-like conversions,
78// or 0 if the conversion is invalid.
79static int format_get_char_size(char convSpecifier,
80                                const char lengthModifier[2]) {
81  if (char_is_one_of(convSpecifier, "CS")) {
82    return sizeof(wchar_t);
83  }
84
85  if (char_is_one_of(convSpecifier, "cs[")) {
86    if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
87      return sizeof(wchar_t);
88    else if (lengthModifier[0] == '\0')
89      return sizeof(char);
90  }
91
92  return 0;
93}
94
95enum FormatStoreSize {
96  // Store size not known in advance; can be calculated as wcslen() of the
97  // destination buffer.
98  FSS_WCSLEN = -2,
99  // Store size not known in advance; can be calculated as strlen() of the
100  // destination buffer.
101  FSS_STRLEN = -1,
102  // Invalid conversion specifier.
103  FSS_INVALID = 0
104};
105
106// Returns the memory size of a format directive (if >0), or a value of
107// FormatStoreSize.
108static int format_get_value_size(char convSpecifier,
109                                 const char lengthModifier[2],
110                                 bool promote_float) {
111  if (format_is_integer_conv(convSpecifier)) {
112    switch (lengthModifier[0]) {
113    case 'h':
114      return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
115    case 'l':
116      return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
117    case 'q':
118      return sizeof(long long);
119    case 'L':
120      return sizeof(long long);
121    case 'j':
122      return sizeof(INTMAX_T);
123    case 'z':
124      return sizeof(SIZE_T);
125    case 't':
126      return sizeof(PTRDIFF_T);
127    case 0:
128      return sizeof(int);
129    default:
130      return FSS_INVALID;
131    }
132  }
133
134  if (format_is_float_conv(convSpecifier)) {
135    switch (lengthModifier[0]) {
136    case 'L':
137    case 'q':
138      return sizeof(long double);
139    case 'l':
140      return lengthModifier[1] == 'l' ? sizeof(long double)
141                                           : sizeof(double);
142    case 0:
143      // Printf promotes floats to doubles but scanf does not
144      return promote_float ? sizeof(double) : sizeof(float);
145    default:
146      return FSS_INVALID;
147    }
148  }
149
150  if (convSpecifier == 'p') {
151    if (lengthModifier[0] != 0)
152      return FSS_INVALID;
153    return sizeof(void *);
154  }
155
156  return FSS_INVALID;
157}
158
159struct ScanfDirective {
160  int argIdx; // argument index, or -1 if not specified ("%n$")
161  int fieldWidth;
162  const char *begin;
163  const char *end;
164  bool suppressed; // suppress assignment ("*")
165  bool allocate;   // allocate space ("m")
166  char lengthModifier[2];
167  char convSpecifier;
168  bool maybeGnuMalloc;
169};
170
171// Parse scanf format string. If a valid directive in encountered, it is
172// returned in dir. This function returns the pointer to the first
173// unprocessed character, or 0 in case of error.
174// In case of the end-of-string, a pointer to the closing \0 is returned.
175static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
176                                    ScanfDirective *dir) {
177  internal_memset(dir, 0, sizeof(*dir));
178  dir->argIdx = -1;
179
180  while (*p) {
181    if (*p != '%') {
182      ++p;
183      continue;
184    }
185    dir->begin = p;
186    ++p;
187    // %%
188    if (*p == '%') {
189      ++p;
190      continue;
191    }
192    if (*p == '\0') {
193      return nullptr;
194    }
195    // %n$
196    p = maybe_parse_param_index(p, &dir->argIdx);
197    CHECK(p);
198    // *
199    if (*p == '*') {
200      dir->suppressed = true;
201      ++p;
202    }
203    // Field width
204    if (*p >= '0' && *p <= '9') {
205      p = parse_number(p, &dir->fieldWidth);
206      CHECK(p);
207      if (dir->fieldWidth <= 0)  // Width if at all must be non-zero
208        return nullptr;
209    }
210    // m
211    if (*p == 'm') {
212      dir->allocate = true;
213      ++p;
214    }
215    // Length modifier.
216    p = maybe_parse_length_modifier(p, dir->lengthModifier);
217    // Conversion specifier.
218    dir->convSpecifier = *p++;
219    // Consume %[...] expression.
220    if (dir->convSpecifier == '[') {
221      if (*p == '^')
222        ++p;
223      if (*p == ']')
224        ++p;
225      while (*p && *p != ']')
226        ++p;
227      if (*p == 0)
228        return nullptr; // unexpected end of string
229                        // Consume the closing ']'.
230      ++p;
231    }
232    // This is unfortunately ambiguous between old GNU extension
233    // of %as, %aS and %a[...] and newer POSIX %a followed by
234    // letters s, S or [.
235    if (allowGnuMalloc && dir->convSpecifier == 'a' &&
236        !dir->lengthModifier[0]) {
237      if (*p == 's' || *p == 'S') {
238        dir->maybeGnuMalloc = true;
239        ++p;
240      } else if (*p == '[') {
241        // Watch for %a[h-j%d], if % appears in the
242        // [...] range, then we need to give up, we don't know
243        // if scanf will parse it as POSIX %a [h-j %d ] or
244        // GNU allocation of string with range dh-j plus %.
245        const char *q = p + 1;
246        if (*q == '^')
247          ++q;
248        if (*q == ']')
249          ++q;
250        while (*q && *q != ']' && *q != '%')
251          ++q;
252        if (*q == 0 || *q == '%')
253          return nullptr;
254        p = q + 1; // Consume the closing ']'.
255        dir->maybeGnuMalloc = true;
256      }
257    }
258    dir->end = p;
259    break;
260  }
261  return p;
262}
263
264static int scanf_get_value_size(ScanfDirective *dir) {
265  if (dir->allocate) {
266    if (!char_is_one_of(dir->convSpecifier, "cCsS["))
267      return FSS_INVALID;
268    return sizeof(char *);
269  }
270
271  if (dir->maybeGnuMalloc) {
272    if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
273      return FSS_INVALID;
274    // This is ambiguous, so check the smaller size of char * (if it is
275    // a GNU extension of %as, %aS or %a[...]) and float (if it is
276    // POSIX %a followed by s, S or [ letters).
277    return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
278  }
279
280  if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
281    bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
282    unsigned charSize =
283        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
284    if (charSize == 0)
285      return FSS_INVALID;
286    if (dir->fieldWidth == 0) {
287      if (!needsTerminator)
288        return charSize;
289      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
290    }
291    return (dir->fieldWidth + needsTerminator) * charSize;
292  }
293
294  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
295}
296
297// Common part of *scanf interceptors.
298// Process format string and va_list, and report all store ranges.
299// Stops when "consuming" n_inputs input items.
300static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
301                         const char *format, va_list aq) {
302  CHECK_GT(n_inputs, 0);
303  const char *p = format;
304
305  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
306
307  while (*p) {
308    ScanfDirective dir;
309    p = scanf_parse_next(p, allowGnuMalloc, &dir);
310    if (!p)
311      break;
312    if (dir.convSpecifier == 0) {
313      // This can only happen at the end of the format string.
314      CHECK_EQ(*p, 0);
315      break;
316    }
317    // Here the directive is valid. Do what it says.
318    if (dir.argIdx != -1) {
319      // Unsupported.
320      break;
321    }
322    if (dir.suppressed)
323      continue;
324    int size = scanf_get_value_size(&dir);
325    if (size == FSS_INVALID) {
326      Report("%s: WARNING: unexpected format specifier in scanf interceptor: ",
327             SanitizerToolName, "%.*s\n", dir.end - dir.begin, dir.begin);
328      break;
329    }
330    void *argp = va_arg(aq, void *);
331    if (dir.convSpecifier != 'n')
332      --n_inputs;
333    if (n_inputs < 0)
334      break;
335    if (size == FSS_STRLEN) {
336      size = internal_strlen((const char *)argp) + 1;
337    } else if (size == FSS_WCSLEN) {
338      // FIXME: actually use wcslen() to calculate it.
339      size = 0;
340    }
341    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
342  }
343}
344
345#if SANITIZER_INTERCEPT_PRINTF
346
347struct PrintfDirective {
348  int fieldWidth;
349  int fieldPrecision;
350  int argIdx; // width argument index, or -1 if not specified ("%*n$")
351  int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
352  const char *begin;
353  const char *end;
354  bool starredWidth;
355  bool starredPrecision;
356  char lengthModifier[2];
357  char convSpecifier;
358};
359
360static const char *maybe_parse_number(const char *p, int *out) {
361  if (*p >= '0' && *p <= '9')
362    p = parse_number(p, out);
363  return p;
364}
365
366static const char *maybe_parse_number_or_star(const char *p, int *out,
367                                              bool *star) {
368  if (*p == '*') {
369    *star = true;
370    ++p;
371  } else {
372    *star = false;
373    p = maybe_parse_number(p, out);
374  }
375  return p;
376}
377
378// Parse printf format string. Same as scanf_parse_next.
379static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
380  internal_memset(dir, 0, sizeof(*dir));
381  dir->argIdx = -1;
382  dir->precisionIdx = -1;
383
384  while (*p) {
385    if (*p != '%') {
386      ++p;
387      continue;
388    }
389    dir->begin = p;
390    ++p;
391    // %%
392    if (*p == '%') {
393      ++p;
394      continue;
395    }
396    if (*p == '\0') {
397      return nullptr;
398    }
399    // %n$
400    p = maybe_parse_param_index(p, &dir->precisionIdx);
401    CHECK(p);
402    // Flags
403    while (char_is_one_of(*p, "'-+ #0")) {
404      ++p;
405    }
406    // Field width
407    p = maybe_parse_number_or_star(p, &dir->fieldWidth,
408                                   &dir->starredWidth);
409    if (!p)
410      return nullptr;
411    // Precision
412    if (*p == '.') {
413      ++p;
414      // Actual precision is optional (surprise!)
415      p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
416                                     &dir->starredPrecision);
417      if (!p)
418        return nullptr;
419      // m$
420      if (dir->starredPrecision) {
421        p = maybe_parse_param_index(p, &dir->precisionIdx);
422        CHECK(p);
423      }
424    }
425    // Length modifier.
426    p = maybe_parse_length_modifier(p, dir->lengthModifier);
427    // Conversion specifier.
428    dir->convSpecifier = *p++;
429    dir->end = p;
430    break;
431  }
432  return p;
433}
434
435static int printf_get_value_size(PrintfDirective *dir) {
436  if (char_is_one_of(dir->convSpecifier, "cCsS")) {
437    unsigned charSize =
438        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
439    if (charSize == 0)
440      return FSS_INVALID;
441    if (char_is_one_of(dir->convSpecifier, "sS")) {
442      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
443    }
444    return charSize;
445  }
446
447  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
448}
449
450#define SKIP_SCALAR_ARG(aq, convSpecifier, size)                   \
451  do {                                                             \
452    if (format_is_float_conv(convSpecifier)) {                     \
453      switch (size) {                                              \
454      case 8:                                                      \
455        va_arg(*aq, double);                                       \
456        break;                                                     \
457      case 12:                                                     \
458        va_arg(*aq, long double);                                  \
459        break;                                                     \
460      case 16:                                                     \
461        va_arg(*aq, long double);                                  \
462        break;                                                     \
463      default:                                                     \
464        Report("WARNING: unexpected floating-point arg size"       \
465               " in printf interceptor: %d\n", size);              \
466        return;                                                    \
467      }                                                            \
468    } else {                                                       \
469      switch (size) {                                              \
470      case 1:                                                      \
471      case 2:                                                      \
472      case 4:                                                      \
473        va_arg(*aq, u32);                                          \
474        break;                                                     \
475      case 8:                                                      \
476        va_arg(*aq, u64);                                          \
477        break;                                                     \
478      default:                                                     \
479        Report("WARNING: unexpected arg size"                      \
480               " in printf interceptor: %d\n", size);              \
481        return;                                                    \
482      }                                                            \
483    }                                                              \
484  } while (0)
485
486// Common part of *printf interceptors.
487// Process format string and va_list, and report all load ranges.
488static void printf_common(void *ctx, const char *format, va_list aq) {
489  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
490
491  const char *p = format;
492
493  while (*p) {
494    PrintfDirective dir;
495    p = printf_parse_next(p, &dir);
496    if (!p)
497      break;
498    if (dir.convSpecifier == 0) {
499      // This can only happen at the end of the format string.
500      CHECK_EQ(*p, 0);
501      break;
502    }
503    // Here the directive is valid. Do what it says.
504    if (dir.argIdx != -1 || dir.precisionIdx != -1) {
505      // Unsupported.
506      break;
507    }
508    if (dir.starredWidth) {
509      // Dynamic width
510      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
511    }
512    if (dir.starredPrecision) {
513      // Dynamic precision
514      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
515    }
516    // %m does not require an argument: strlen(errno).
517    if (dir.convSpecifier == 'm')
518      continue;
519    int size = printf_get_value_size(&dir);
520    if (size == FSS_INVALID) {
521      static int ReportedOnce;
522      if (!ReportedOnce++)
523        Report(
524            "%s: WARNING: unexpected format specifier in printf "
525            "interceptor: %.*s (reported once per process)\n",
526            SanitizerToolName, dir.end - dir.begin, dir.begin);
527      break;
528    }
529    if (dir.convSpecifier == 'n') {
530      void *argp = va_arg(aq, void *);
531      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
532      continue;
533    } else if (size == FSS_STRLEN) {
534      if (void *argp = va_arg(aq, void *)) {
535        if (dir.starredPrecision) {
536          // FIXME: properly support starred precision for strings.
537          size = 0;
538        } else if (dir.fieldPrecision > 0) {
539          // Won't read more than "precision" symbols.
540          size = internal_strnlen((const char *)argp, dir.fieldPrecision);
541          if (size < dir.fieldPrecision) size++;
542        } else {
543          // Whole string will be accessed.
544          size = internal_strlen((const char *)argp) + 1;
545        }
546        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
547      }
548    } else if (size == FSS_WCSLEN) {
549      if (void *argp = va_arg(aq, void *)) {
550        // FIXME: Properly support wide-character strings (via wcsrtombs).
551        size = 0;
552        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
553      }
554    } else {
555      // Skip non-pointer args
556      SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
557    }
558  }
559}
560
561#endif // SANITIZER_INTERCEPT_PRINTF
562