1//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===// 2// 3// This file is distributed under the University of Illinois Open Source 4// License. See LICENSE.TXT for details. 5// 6//===----------------------------------------------------------------------===// 7// 8// Scanf/printf implementation for use in *Sanitizer interceptors. 9// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html 10// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html 11// with a few common GNU extensions. 12// 13//===----------------------------------------------------------------------===// 14 15#include <stdarg.h> 16 17static const char *parse_number(const char *p, int *out) { 18 *out = internal_atoll(p); 19 while (*p >= '0' && *p <= '9') 20 ++p; 21 return p; 22} 23 24static const char *maybe_parse_param_index(const char *p, int *out) { 25 // n$ 26 if (*p >= '0' && *p <= '9') { 27 int number; 28 const char *q = parse_number(p, &number); 29 CHECK(q); 30 if (*q == '$') { 31 *out = number; 32 p = q + 1; 33 } 34 } 35 36 // Otherwise, do not change p. This will be re-parsed later as the field 37 // width. 38 return p; 39} 40 41static bool char_is_one_of(char c, const char *s) { 42 return !!internal_strchr(s, c); 43} 44 45static const char *maybe_parse_length_modifier(const char *p, char ll[2]) { 46 if (char_is_one_of(*p, "jztLq")) { 47 ll[0] = *p; 48 ++p; 49 } else if (*p == 'h') { 50 ll[0] = 'h'; 51 ++p; 52 if (*p == 'h') { 53 ll[1] = 'h'; 54 ++p; 55 } 56 } else if (*p == 'l') { 57 ll[0] = 'l'; 58 ++p; 59 if (*p == 'l') { 60 ll[1] = 'l'; 61 ++p; 62 } 63 } 64 return p; 65} 66 67// Returns true if the character is an integer conversion specifier. 68static bool format_is_integer_conv(char c) { 69 return char_is_one_of(c, "diouxXn"); 70} 71 72// Returns true if the character is an floating point conversion specifier. 73static bool format_is_float_conv(char c) { 74 return char_is_one_of(c, "aAeEfFgG"); 75} 76 77// Returns string output character size for string-like conversions, 78// or 0 if the conversion is invalid. 79static int format_get_char_size(char convSpecifier, 80 const char lengthModifier[2]) { 81 if (char_is_one_of(convSpecifier, "CS")) { 82 return sizeof(wchar_t); 83 } 84 85 if (char_is_one_of(convSpecifier, "cs[")) { 86 if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0') 87 return sizeof(wchar_t); 88 else if (lengthModifier[0] == '\0') 89 return sizeof(char); 90 } 91 92 return 0; 93} 94 95enum FormatStoreSize { 96 // Store size not known in advance; can be calculated as wcslen() of the 97 // destination buffer. 98 FSS_WCSLEN = -2, 99 // Store size not known in advance; can be calculated as strlen() of the 100 // destination buffer. 101 FSS_STRLEN = -1, 102 // Invalid conversion specifier. 103 FSS_INVALID = 0 104}; 105 106// Returns the memory size of a format directive (if >0), or a value of 107// FormatStoreSize. 108static int format_get_value_size(char convSpecifier, 109 const char lengthModifier[2], 110 bool promote_float) { 111 if (format_is_integer_conv(convSpecifier)) { 112 switch (lengthModifier[0]) { 113 case 'h': 114 return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short); 115 case 'l': 116 return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long); 117 case 'q': 118 return sizeof(long long); 119 case 'L': 120 return sizeof(long long); 121 case 'j': 122 return sizeof(INTMAX_T); 123 case 'z': 124 return sizeof(SIZE_T); 125 case 't': 126 return sizeof(PTRDIFF_T); 127 case 0: 128 return sizeof(int); 129 default: 130 return FSS_INVALID; 131 } 132 } 133 134 if (format_is_float_conv(convSpecifier)) { 135 switch (lengthModifier[0]) { 136 case 'L': 137 case 'q': 138 return sizeof(long double); 139 case 'l': 140 return lengthModifier[1] == 'l' ? sizeof(long double) 141 : sizeof(double); 142 case 0: 143 // Printf promotes floats to doubles but scanf does not 144 return promote_float ? sizeof(double) : sizeof(float); 145 default: 146 return FSS_INVALID; 147 } 148 } 149 150 if (convSpecifier == 'p') { 151 if (lengthModifier[0] != 0) 152 return FSS_INVALID; 153 return sizeof(void *); 154 } 155 156 return FSS_INVALID; 157} 158 159struct ScanfDirective { 160 int argIdx; // argument index, or -1 if not specified ("%n$") 161 int fieldWidth; 162 const char *begin; 163 const char *end; 164 bool suppressed; // suppress assignment ("*") 165 bool allocate; // allocate space ("m") 166 char lengthModifier[2]; 167 char convSpecifier; 168 bool maybeGnuMalloc; 169}; 170 171// Parse scanf format string. If a valid directive in encountered, it is 172// returned in dir. This function returns the pointer to the first 173// unprocessed character, or 0 in case of error. 174// In case of the end-of-string, a pointer to the closing \0 is returned. 175static const char *scanf_parse_next(const char *p, bool allowGnuMalloc, 176 ScanfDirective *dir) { 177 internal_memset(dir, 0, sizeof(*dir)); 178 dir->argIdx = -1; 179 180 while (*p) { 181 if (*p != '%') { 182 ++p; 183 continue; 184 } 185 dir->begin = p; 186 ++p; 187 // %% 188 if (*p == '%') { 189 ++p; 190 continue; 191 } 192 if (*p == '\0') { 193 return nullptr; 194 } 195 // %n$ 196 p = maybe_parse_param_index(p, &dir->argIdx); 197 CHECK(p); 198 // * 199 if (*p == '*') { 200 dir->suppressed = true; 201 ++p; 202 } 203 // Field width 204 if (*p >= '0' && *p <= '9') { 205 p = parse_number(p, &dir->fieldWidth); 206 CHECK(p); 207 if (dir->fieldWidth <= 0) // Width if at all must be non-zero 208 return nullptr; 209 } 210 // m 211 if (*p == 'm') { 212 dir->allocate = true; 213 ++p; 214 } 215 // Length modifier. 216 p = maybe_parse_length_modifier(p, dir->lengthModifier); 217 // Conversion specifier. 218 dir->convSpecifier = *p++; 219 // Consume %[...] expression. 220 if (dir->convSpecifier == '[') { 221 if (*p == '^') 222 ++p; 223 if (*p == ']') 224 ++p; 225 while (*p && *p != ']') 226 ++p; 227 if (*p == 0) 228 return nullptr; // unexpected end of string 229 // Consume the closing ']'. 230 ++p; 231 } 232 // This is unfortunately ambiguous between old GNU extension 233 // of %as, %aS and %a[...] and newer POSIX %a followed by 234 // letters s, S or [. 235 if (allowGnuMalloc && dir->convSpecifier == 'a' && 236 !dir->lengthModifier[0]) { 237 if (*p == 's' || *p == 'S') { 238 dir->maybeGnuMalloc = true; 239 ++p; 240 } else if (*p == '[') { 241 // Watch for %a[h-j%d], if % appears in the 242 // [...] range, then we need to give up, we don't know 243 // if scanf will parse it as POSIX %a [h-j %d ] or 244 // GNU allocation of string with range dh-j plus %. 245 const char *q = p + 1; 246 if (*q == '^') 247 ++q; 248 if (*q == ']') 249 ++q; 250 while (*q && *q != ']' && *q != '%') 251 ++q; 252 if (*q == 0 || *q == '%') 253 return nullptr; 254 p = q + 1; // Consume the closing ']'. 255 dir->maybeGnuMalloc = true; 256 } 257 } 258 dir->end = p; 259 break; 260 } 261 return p; 262} 263 264static int scanf_get_value_size(ScanfDirective *dir) { 265 if (dir->allocate) { 266 if (!char_is_one_of(dir->convSpecifier, "cCsS[")) 267 return FSS_INVALID; 268 return sizeof(char *); 269 } 270 271 if (dir->maybeGnuMalloc) { 272 if (dir->convSpecifier != 'a' || dir->lengthModifier[0]) 273 return FSS_INVALID; 274 // This is ambiguous, so check the smaller size of char * (if it is 275 // a GNU extension of %as, %aS or %a[...]) and float (if it is 276 // POSIX %a followed by s, S or [ letters). 277 return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float); 278 } 279 280 if (char_is_one_of(dir->convSpecifier, "cCsS[")) { 281 bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS["); 282 unsigned charSize = 283 format_get_char_size(dir->convSpecifier, dir->lengthModifier); 284 if (charSize == 0) 285 return FSS_INVALID; 286 if (dir->fieldWidth == 0) { 287 if (!needsTerminator) 288 return charSize; 289 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; 290 } 291 return (dir->fieldWidth + needsTerminator) * charSize; 292 } 293 294 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false); 295} 296 297// Common part of *scanf interceptors. 298// Process format string and va_list, and report all store ranges. 299// Stops when "consuming" n_inputs input items. 300static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc, 301 const char *format, va_list aq) { 302 CHECK_GT(n_inputs, 0); 303 const char *p = format; 304 305 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); 306 307 while (*p) { 308 ScanfDirective dir; 309 p = scanf_parse_next(p, allowGnuMalloc, &dir); 310 if (!p) 311 break; 312 if (dir.convSpecifier == 0) { 313 // This can only happen at the end of the format string. 314 CHECK_EQ(*p, 0); 315 break; 316 } 317 // Here the directive is valid. Do what it says. 318 if (dir.argIdx != -1) { 319 // Unsupported. 320 break; 321 } 322 if (dir.suppressed) 323 continue; 324 int size = scanf_get_value_size(&dir); 325 if (size == FSS_INVALID) { 326 Report("%s: WARNING: unexpected format specifier in scanf interceptor: ", 327 SanitizerToolName, "%.*s\n", dir.end - dir.begin, dir.begin); 328 break; 329 } 330 void *argp = va_arg(aq, void *); 331 if (dir.convSpecifier != 'n') 332 --n_inputs; 333 if (n_inputs < 0) 334 break; 335 if (size == FSS_STRLEN) { 336 size = internal_strlen((const char *)argp) + 1; 337 } else if (size == FSS_WCSLEN) { 338 // FIXME: actually use wcslen() to calculate it. 339 size = 0; 340 } 341 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); 342 } 343} 344 345#if SANITIZER_INTERCEPT_PRINTF 346 347struct PrintfDirective { 348 int fieldWidth; 349 int fieldPrecision; 350 int argIdx; // width argument index, or -1 if not specified ("%*n$") 351 int precisionIdx; // precision argument index, or -1 if not specified (".*n$") 352 const char *begin; 353 const char *end; 354 bool starredWidth; 355 bool starredPrecision; 356 char lengthModifier[2]; 357 char convSpecifier; 358}; 359 360static const char *maybe_parse_number(const char *p, int *out) { 361 if (*p >= '0' && *p <= '9') 362 p = parse_number(p, out); 363 return p; 364} 365 366static const char *maybe_parse_number_or_star(const char *p, int *out, 367 bool *star) { 368 if (*p == '*') { 369 *star = true; 370 ++p; 371 } else { 372 *star = false; 373 p = maybe_parse_number(p, out); 374 } 375 return p; 376} 377 378// Parse printf format string. Same as scanf_parse_next. 379static const char *printf_parse_next(const char *p, PrintfDirective *dir) { 380 internal_memset(dir, 0, sizeof(*dir)); 381 dir->argIdx = -1; 382 dir->precisionIdx = -1; 383 384 while (*p) { 385 if (*p != '%') { 386 ++p; 387 continue; 388 } 389 dir->begin = p; 390 ++p; 391 // %% 392 if (*p == '%') { 393 ++p; 394 continue; 395 } 396 if (*p == '\0') { 397 return nullptr; 398 } 399 // %n$ 400 p = maybe_parse_param_index(p, &dir->precisionIdx); 401 CHECK(p); 402 // Flags 403 while (char_is_one_of(*p, "'-+ #0")) { 404 ++p; 405 } 406 // Field width 407 p = maybe_parse_number_or_star(p, &dir->fieldWidth, 408 &dir->starredWidth); 409 if (!p) 410 return nullptr; 411 // Precision 412 if (*p == '.') { 413 ++p; 414 // Actual precision is optional (surprise!) 415 p = maybe_parse_number_or_star(p, &dir->fieldPrecision, 416 &dir->starredPrecision); 417 if (!p) 418 return nullptr; 419 // m$ 420 if (dir->starredPrecision) { 421 p = maybe_parse_param_index(p, &dir->precisionIdx); 422 CHECK(p); 423 } 424 } 425 // Length modifier. 426 p = maybe_parse_length_modifier(p, dir->lengthModifier); 427 // Conversion specifier. 428 dir->convSpecifier = *p++; 429 dir->end = p; 430 break; 431 } 432 return p; 433} 434 435static int printf_get_value_size(PrintfDirective *dir) { 436 if (char_is_one_of(dir->convSpecifier, "cCsS")) { 437 unsigned charSize = 438 format_get_char_size(dir->convSpecifier, dir->lengthModifier); 439 if (charSize == 0) 440 return FSS_INVALID; 441 if (char_is_one_of(dir->convSpecifier, "sS")) { 442 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; 443 } 444 return charSize; 445 } 446 447 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true); 448} 449 450#define SKIP_SCALAR_ARG(aq, convSpecifier, size) \ 451 do { \ 452 if (format_is_float_conv(convSpecifier)) { \ 453 switch (size) { \ 454 case 8: \ 455 va_arg(*aq, double); \ 456 break; \ 457 case 12: \ 458 va_arg(*aq, long double); \ 459 break; \ 460 case 16: \ 461 va_arg(*aq, long double); \ 462 break; \ 463 default: \ 464 Report("WARNING: unexpected floating-point arg size" \ 465 " in printf interceptor: %d\n", size); \ 466 return; \ 467 } \ 468 } else { \ 469 switch (size) { \ 470 case 1: \ 471 case 2: \ 472 case 4: \ 473 va_arg(*aq, u32); \ 474 break; \ 475 case 8: \ 476 va_arg(*aq, u64); \ 477 break; \ 478 default: \ 479 Report("WARNING: unexpected arg size" \ 480 " in printf interceptor: %d\n", size); \ 481 return; \ 482 } \ 483 } \ 484 } while (0) 485 486// Common part of *printf interceptors. 487// Process format string and va_list, and report all load ranges. 488static void printf_common(void *ctx, const char *format, va_list aq) { 489 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); 490 491 const char *p = format; 492 493 while (*p) { 494 PrintfDirective dir; 495 p = printf_parse_next(p, &dir); 496 if (!p) 497 break; 498 if (dir.convSpecifier == 0) { 499 // This can only happen at the end of the format string. 500 CHECK_EQ(*p, 0); 501 break; 502 } 503 // Here the directive is valid. Do what it says. 504 if (dir.argIdx != -1 || dir.precisionIdx != -1) { 505 // Unsupported. 506 break; 507 } 508 if (dir.starredWidth) { 509 // Dynamic width 510 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); 511 } 512 if (dir.starredPrecision) { 513 // Dynamic precision 514 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); 515 } 516 // %m does not require an argument: strlen(errno). 517 if (dir.convSpecifier == 'm') 518 continue; 519 int size = printf_get_value_size(&dir); 520 if (size == FSS_INVALID) { 521 static int ReportedOnce; 522 if (!ReportedOnce++) 523 Report( 524 "%s: WARNING: unexpected format specifier in printf " 525 "interceptor: %.*s (reported once per process)\n", 526 SanitizerToolName, dir.end - dir.begin, dir.begin); 527 break; 528 } 529 if (dir.convSpecifier == 'n') { 530 void *argp = va_arg(aq, void *); 531 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); 532 continue; 533 } else if (size == FSS_STRLEN) { 534 if (void *argp = va_arg(aq, void *)) { 535 if (dir.starredPrecision) { 536 // FIXME: properly support starred precision for strings. 537 size = 0; 538 } else if (dir.fieldPrecision > 0) { 539 // Won't read more than "precision" symbols. 540 size = internal_strnlen((const char *)argp, dir.fieldPrecision); 541 if (size < dir.fieldPrecision) size++; 542 } else { 543 // Whole string will be accessed. 544 size = internal_strlen((const char *)argp) + 1; 545 } 546 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size); 547 } 548 } else if (size == FSS_WCSLEN) { 549 if (void *argp = va_arg(aq, void *)) { 550 // FIXME: Properly support wide-character strings (via wcsrtombs). 551 size = 0; 552 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size); 553 } 554 } else { 555 // Skip non-pointer args 556 SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size); 557 } 558 } 559} 560 561#endif // SANITIZER_INTERCEPT_PRINTF 562