1//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===// 2// 3// This file is distributed under the University of Illinois Open Source 4// License. See LICENSE.TXT for details. 5// 6//===----------------------------------------------------------------------===// 7// 8// Scanf/printf implementation for use in *Sanitizer interceptors. 9// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html 10// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html 11// with a few common GNU extensions. 12// 13//===----------------------------------------------------------------------===// 14#include <stdarg.h> 15 16static const char *parse_number(const char *p, int *out) { 17 *out = internal_atoll(p); 18 while (*p >= '0' && *p <= '9') 19 ++p; 20 return p; 21} 22 23static const char *maybe_parse_param_index(const char *p, int *out) { 24 // n$ 25 if (*p >= '0' && *p <= '9') { 26 int number; 27 const char *q = parse_number(p, &number); 28 CHECK(q); 29 if (*q == '$') { 30 *out = number; 31 p = q + 1; 32 } 33 } 34 35 // Otherwise, do not change p. This will be re-parsed later as the field 36 // width. 37 return p; 38} 39 40static bool char_is_one_of(char c, const char *s) { 41 return !!internal_strchr(s, c); 42} 43 44static const char *maybe_parse_length_modifier(const char *p, char ll[2]) { 45 if (char_is_one_of(*p, "jztLq")) { 46 ll[0] = *p; 47 ++p; 48 } else if (*p == 'h') { 49 ll[0] = 'h'; 50 ++p; 51 if (*p == 'h') { 52 ll[1] = 'h'; 53 ++p; 54 } 55 } else if (*p == 'l') { 56 ll[0] = 'l'; 57 ++p; 58 if (*p == 'l') { 59 ll[1] = 'l'; 60 ++p; 61 } 62 } 63 return p; 64} 65 66// Returns true if the character is an integer conversion specifier. 67static bool format_is_integer_conv(char c) { 68 return char_is_one_of(c, "diouxXn"); 69} 70 71// Returns true if the character is an floating point conversion specifier. 72static bool format_is_float_conv(char c) { 73 return char_is_one_of(c, "aAeEfFgG"); 74} 75 76// Returns string output character size for string-like conversions, 77// or 0 if the conversion is invalid. 78static int format_get_char_size(char convSpecifier, 79 const char lengthModifier[2]) { 80 if (char_is_one_of(convSpecifier, "CS")) { 81 return sizeof(wchar_t); 82 } 83 84 if (char_is_one_of(convSpecifier, "cs[")) { 85 if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0') 86 return sizeof(wchar_t); 87 else if (lengthModifier[0] == '\0') 88 return sizeof(char); 89 } 90 91 return 0; 92} 93 94enum FormatStoreSize { 95 // Store size not known in advance; can be calculated as wcslen() of the 96 // destination buffer. 97 FSS_WCSLEN = -2, 98 // Store size not known in advance; can be calculated as strlen() of the 99 // destination buffer. 100 FSS_STRLEN = -1, 101 // Invalid conversion specifier. 102 FSS_INVALID = 0 103}; 104 105// Returns the memory size of a format directive (if >0), or a value of 106// FormatStoreSize. 107static int format_get_value_size(char convSpecifier, 108 const char lengthModifier[2], 109 bool promote_float) { 110 if (format_is_integer_conv(convSpecifier)) { 111 switch (lengthModifier[0]) { 112 case 'h': 113 return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short); 114 case 'l': 115 return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long); 116 case 'q': 117 return sizeof(long long); 118 case 'L': 119 return sizeof(long long); 120 case 'j': 121 return sizeof(INTMAX_T); 122 case 'z': 123 return sizeof(SIZE_T); 124 case 't': 125 return sizeof(PTRDIFF_T); 126 case 0: 127 return sizeof(int); 128 default: 129 return FSS_INVALID; 130 } 131 } 132 133 if (format_is_float_conv(convSpecifier)) { 134 switch (lengthModifier[0]) { 135 case 'L': 136 case 'q': 137 return sizeof(long double); 138 case 'l': 139 return lengthModifier[1] == 'l' ? sizeof(long double) 140 : sizeof(double); 141 case 0: 142 // Printf promotes floats to doubles but scanf does not 143 return promote_float ? sizeof(double) : sizeof(float); 144 default: 145 return FSS_INVALID; 146 } 147 } 148 149 if (convSpecifier == 'p') { 150 if (lengthModifier[0] != 0) 151 return FSS_INVALID; 152 return sizeof(void *); 153 } 154 155 return FSS_INVALID; 156} 157 158struct ScanfDirective { 159 int argIdx; // argument index, or -1 if not specified ("%n$") 160 int fieldWidth; 161 const char *begin; 162 const char *end; 163 bool suppressed; // suppress assignment ("*") 164 bool allocate; // allocate space ("m") 165 char lengthModifier[2]; 166 char convSpecifier; 167 bool maybeGnuMalloc; 168}; 169 170// Parse scanf format string. If a valid directive in encountered, it is 171// returned in dir. This function returns the pointer to the first 172// unprocessed character, or 0 in case of error. 173// In case of the end-of-string, a pointer to the closing \0 is returned. 174static const char *scanf_parse_next(const char *p, bool allowGnuMalloc, 175 ScanfDirective *dir) { 176 internal_memset(dir, 0, sizeof(*dir)); 177 dir->argIdx = -1; 178 179 while (*p) { 180 if (*p != '%') { 181 ++p; 182 continue; 183 } 184 dir->begin = p; 185 ++p; 186 // %% 187 if (*p == '%') { 188 ++p; 189 continue; 190 } 191 if (*p == '\0') { 192 return 0; 193 } 194 // %n$ 195 p = maybe_parse_param_index(p, &dir->argIdx); 196 CHECK(p); 197 // * 198 if (*p == '*') { 199 dir->suppressed = true; 200 ++p; 201 } 202 // Field width 203 if (*p >= '0' && *p <= '9') { 204 p = parse_number(p, &dir->fieldWidth); 205 CHECK(p); 206 if (dir->fieldWidth <= 0) // Width if at all must be non-zero 207 return 0; 208 } 209 // m 210 if (*p == 'm') { 211 dir->allocate = true; 212 ++p; 213 } 214 // Length modifier. 215 p = maybe_parse_length_modifier(p, dir->lengthModifier); 216 // Conversion specifier. 217 dir->convSpecifier = *p++; 218 // Consume %[...] expression. 219 if (dir->convSpecifier == '[') { 220 if (*p == '^') 221 ++p; 222 if (*p == ']') 223 ++p; 224 while (*p && *p != ']') 225 ++p; 226 if (*p == 0) 227 return 0; // unexpected end of string 228 // Consume the closing ']'. 229 ++p; 230 } 231 // This is unfortunately ambiguous between old GNU extension 232 // of %as, %aS and %a[...] and newer POSIX %a followed by 233 // letters s, S or [. 234 if (allowGnuMalloc && dir->convSpecifier == 'a' && 235 !dir->lengthModifier[0]) { 236 if (*p == 's' || *p == 'S') { 237 dir->maybeGnuMalloc = true; 238 ++p; 239 } else if (*p == '[') { 240 // Watch for %a[h-j%d], if % appears in the 241 // [...] range, then we need to give up, we don't know 242 // if scanf will parse it as POSIX %a [h-j %d ] or 243 // GNU allocation of string with range dh-j plus %. 244 const char *q = p + 1; 245 if (*q == '^') 246 ++q; 247 if (*q == ']') 248 ++q; 249 while (*q && *q != ']' && *q != '%') 250 ++q; 251 if (*q == 0 || *q == '%') 252 return 0; 253 p = q + 1; // Consume the closing ']'. 254 dir->maybeGnuMalloc = true; 255 } 256 } 257 dir->end = p; 258 break; 259 } 260 return p; 261} 262 263static int scanf_get_value_size(ScanfDirective *dir) { 264 if (dir->allocate) { 265 if (!char_is_one_of(dir->convSpecifier, "cCsS[")) 266 return FSS_INVALID; 267 return sizeof(char *); 268 } 269 270 if (dir->maybeGnuMalloc) { 271 if (dir->convSpecifier != 'a' || dir->lengthModifier[0]) 272 return FSS_INVALID; 273 // This is ambiguous, so check the smaller size of char * (if it is 274 // a GNU extension of %as, %aS or %a[...]) and float (if it is 275 // POSIX %a followed by s, S or [ letters). 276 return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float); 277 } 278 279 if (char_is_one_of(dir->convSpecifier, "cCsS[")) { 280 bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS["); 281 unsigned charSize = 282 format_get_char_size(dir->convSpecifier, dir->lengthModifier); 283 if (charSize == 0) 284 return FSS_INVALID; 285 if (dir->fieldWidth == 0) { 286 if (!needsTerminator) 287 return charSize; 288 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; 289 } 290 return (dir->fieldWidth + needsTerminator) * charSize; 291 } 292 293 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false); 294} 295 296// Common part of *scanf interceptors. 297// Process format string and va_list, and report all store ranges. 298// Stops when "consuming" n_inputs input items. 299static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc, 300 const char *format, va_list aq) { 301 CHECK_GT(n_inputs, 0); 302 const char *p = format; 303 304 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); 305 306 while (*p) { 307 ScanfDirective dir; 308 p = scanf_parse_next(p, allowGnuMalloc, &dir); 309 if (!p) 310 break; 311 if (dir.convSpecifier == 0) { 312 // This can only happen at the end of the format string. 313 CHECK_EQ(*p, 0); 314 break; 315 } 316 // Here the directive is valid. Do what it says. 317 if (dir.argIdx != -1) { 318 // Unsupported. 319 break; 320 } 321 if (dir.suppressed) 322 continue; 323 int size = scanf_get_value_size(&dir); 324 if (size == FSS_INVALID) { 325 Report("WARNING: unexpected format specifier in scanf interceptor: " 326 "%.*s\n", dir.end - dir.begin, dir.begin); 327 break; 328 } 329 void *argp = va_arg(aq, void *); 330 if (dir.convSpecifier != 'n') 331 --n_inputs; 332 if (n_inputs < 0) 333 break; 334 if (size == FSS_STRLEN) { 335 size = internal_strlen((const char *)argp) + 1; 336 } else if (size == FSS_WCSLEN) { 337 // FIXME: actually use wcslen() to calculate it. 338 size = 0; 339 } 340 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); 341 } 342} 343 344#if SANITIZER_INTERCEPT_PRINTF 345 346struct PrintfDirective { 347 int fieldWidth; 348 int fieldPrecision; 349 int argIdx; // width argument index, or -1 if not specified ("%*n$") 350 int precisionIdx; // precision argument index, or -1 if not specified (".*n$") 351 const char *begin; 352 const char *end; 353 bool starredWidth; 354 bool starredPrecision; 355 char lengthModifier[2]; 356 char convSpecifier; 357}; 358 359static const char *maybe_parse_number(const char *p, int *out) { 360 if (*p >= '0' && *p <= '9') 361 p = parse_number(p, out); 362 return p; 363} 364 365static const char *maybe_parse_number_or_star(const char *p, int *out, 366 bool *star) { 367 if (*p == '*') { 368 *star = true; 369 ++p; 370 } else { 371 *star = false; 372 p = maybe_parse_number(p, out); 373 } 374 return p; 375} 376 377// Parse printf format string. Same as scanf_parse_next. 378static const char *printf_parse_next(const char *p, PrintfDirective *dir) { 379 internal_memset(dir, 0, sizeof(*dir)); 380 dir->argIdx = -1; 381 dir->precisionIdx = -1; 382 383 while (*p) { 384 if (*p != '%') { 385 ++p; 386 continue; 387 } 388 dir->begin = p; 389 ++p; 390 // %% 391 if (*p == '%') { 392 ++p; 393 continue; 394 } 395 if (*p == '\0') { 396 return 0; 397 } 398 // %n$ 399 p = maybe_parse_param_index(p, &dir->precisionIdx); 400 CHECK(p); 401 // Flags 402 while (char_is_one_of(*p, "'-+ #0")) { 403 ++p; 404 } 405 // Field width 406 p = maybe_parse_number_or_star(p, &dir->fieldWidth, 407 &dir->starredWidth); 408 if (!p) 409 return 0; 410 // Precision 411 if (*p == '.') { 412 ++p; 413 // Actual precision is optional (surprise!) 414 p = maybe_parse_number_or_star(p, &dir->fieldPrecision, 415 &dir->starredPrecision); 416 if (!p) 417 return 0; 418 // m$ 419 if (dir->starredPrecision) { 420 p = maybe_parse_param_index(p, &dir->precisionIdx); 421 CHECK(p); 422 } 423 } 424 // Length modifier. 425 p = maybe_parse_length_modifier(p, dir->lengthModifier); 426 // Conversion specifier. 427 dir->convSpecifier = *p++; 428 dir->end = p; 429 break; 430 } 431 return p; 432} 433 434static int printf_get_value_size(PrintfDirective *dir) { 435 if (dir->convSpecifier == 'm') { 436 return sizeof(char *); 437 } 438 439 if (char_is_one_of(dir->convSpecifier, "cCsS")) { 440 unsigned charSize = 441 format_get_char_size(dir->convSpecifier, dir->lengthModifier); 442 if (charSize == 0) 443 return FSS_INVALID; 444 if (char_is_one_of(dir->convSpecifier, "sS")) { 445 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; 446 } 447 return charSize; 448 } 449 450 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true); 451} 452 453#define SKIP_SCALAR_ARG(aq, convSpecifier, size) \ 454 do { \ 455 if (format_is_float_conv(convSpecifier)) { \ 456 switch (size) { \ 457 case 8: \ 458 va_arg(*aq, double); \ 459 break; \ 460 case 12: \ 461 va_arg(*aq, long double); \ 462 break; \ 463 case 16: \ 464 va_arg(*aq, long double); \ 465 break; \ 466 default: \ 467 Report("WARNING: unexpected floating-point arg size" \ 468 " in printf interceptor: %d\n", size); \ 469 return; \ 470 } \ 471 } else { \ 472 switch (size) { \ 473 case 1: \ 474 case 2: \ 475 case 4: \ 476 va_arg(*aq, u32); \ 477 break; \ 478 case 8: \ 479 va_arg(*aq, u64); \ 480 break; \ 481 default: \ 482 Report("WARNING: unexpected arg size" \ 483 " in printf interceptor: %d\n", size); \ 484 return; \ 485 } \ 486 } \ 487 } while (0) 488 489// Common part of *printf interceptors. 490// Process format string and va_list, and report all load ranges. 491static void printf_common(void *ctx, const char *format, va_list aq) { 492 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); 493 494 const char *p = format; 495 496 while (*p) { 497 PrintfDirective dir; 498 p = printf_parse_next(p, &dir); 499 if (!p) 500 break; 501 if (dir.convSpecifier == 0) { 502 // This can only happen at the end of the format string. 503 CHECK_EQ(*p, 0); 504 break; 505 } 506 // Here the directive is valid. Do what it says. 507 if (dir.argIdx != -1 || dir.precisionIdx != -1) { 508 // Unsupported. 509 break; 510 } 511 if (dir.starredWidth) { 512 // Dynamic width 513 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); 514 } 515 if (dir.starredPrecision) { 516 // Dynamic precision 517 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); 518 } 519 int size = printf_get_value_size(&dir); 520 if (size == FSS_INVALID) { 521 Report("WARNING: unexpected format specifier in printf " 522 "interceptor: %.*s\n", dir.end - dir.begin, dir.begin); 523 break; 524 } 525 if (dir.convSpecifier == 'n') { 526 void *argp = va_arg(aq, void *); 527 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); 528 continue; 529 } else if (size == FSS_STRLEN) { 530 if (void *argp = va_arg(aq, void *)) { 531 if (dir.starredPrecision) { 532 // FIXME: properly support starred precision for strings. 533 size = 0; 534 } else if (dir.fieldPrecision > 0) { 535 // Won't read more than "precision" symbols. 536 size = internal_strnlen((const char *)argp, dir.fieldPrecision); 537 if (size < dir.fieldPrecision) size++; 538 } else { 539 // Whole string will be accessed. 540 size = internal_strlen((const char *)argp) + 1; 541 } 542 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size); 543 } 544 } else if (size == FSS_WCSLEN) { 545 if (void *argp = va_arg(aq, void *)) { 546 // FIXME: Properly support wide-character strings (via wcsrtombs). 547 size = 0; 548 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size); 549 } 550 } else { 551 // Skip non-pointer args 552 SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size); 553 } 554 } 555} 556 557#endif // SANITIZER_INTERCEPT_PRINTF 558