1/* Boost format strings. 2 Copyright (C) 2001-2004, 2006-2007 Free Software Foundation, Inc. 3 Written by Bruno Haible <haible@clisp.cons.org>, 2006. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18#ifdef HAVE_CONFIG_H 19# include <config.h> 20#endif 21 22#include <stdbool.h> 23#include <stdlib.h> 24 25#include "format.h" 26#include "c-ctype.h" 27#include "xalloc.h" 28#include "xvasprintf.h" 29#include "format-invalid.h" 30#include "gettext.h" 31 32#define _(str) gettext (str) 33 34/* Boost format strings are described in 35 boost_1_33_1/libs/format/doc/format.html 36 and implemented in 37 boost_1_33_1/boost/format/parsing.hpp. 38 A directive (other than '%%') 39 - starts with '%' or '%|'; in the latter case it must end in '|', 40 - is continued either by 41 - 'm%' where m is a positive integer, starting with a nonzero digit; 42 in this case the directive must not have started with '%|'; or 43 - the following: 44 - optional: 'm$' where m is a positive integer, starting with a 45 nonzero digit, 46 - optional: any of the characters '#', '0', '-', ' ', '+', "'", 47 '_', '=', 'h', 'l', 48 - optional: a width specification: '*' (reads an argument) or '*m$' 49 or a nonempty digit sequence, 50 - optional: a '.' and a precision specification: '*' (reads an 51 argument) or '*m$' or a nonempty digit sequence, 52 - optional: any of the characters 'h', 'l', 'L', 53 - if the directive started with '%|': 54 an optional specifier and a final '|', 55 otherwise 56 a mandatory specifier. 57 If no specifier is given, it needs an argument of any type. 58 The possible specifiers are: 59 - 'c', 'C', that need a character argument, 60 - 's', 'S', that need an argument of any type, 61 - 'i', 'd', 'o', 'u', 'x', 'X', that need an integer argument, 62 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument, 63 - 'p', that needs a 'void *' argument, 64 - 't', that doesn't need an argument, 65 - 'TX', where X is any character, that doesn't need an argument, 66 - 'n', that needs a pointer to integer. 67 The Boost format string interpreter doesn't actually care about 68 the argument types, but we do, because it increases the likelihood 69 of detecting translator mistakes. 70 Numbered ('%m%' or '%m$' or '*m$') and unnumbered argument specifications 71 cannot be used in the same string. 72 */ 73 74enum format_arg_type 75{ 76 FAT_NONE = 0, 77 /* Basic types */ 78 FAT_INTEGER = 1, 79 FAT_DOUBLE = 2, 80 FAT_CHAR = 3, 81 FAT_POINTER = 4, 82 FAT_ANY = 5 83}; 84 85struct numbered_arg 86{ 87 unsigned int number; 88 enum format_arg_type type; 89}; 90 91struct spec 92{ 93 unsigned int directives; 94 unsigned int numbered_arg_count; 95 unsigned int allocated; 96 struct numbered_arg *numbered; 97}; 98 99/* Locale independent test for a decimal digit. 100 Argument can be 'char' or 'unsigned char'. (Whereas the argument of 101 <ctype.h> isdigit must be an 'unsigned char'.) */ 102#undef isdigit 103#define isdigit(c) ((unsigned int) ((c) - '0') < 10) 104 105 106static int 107numbered_arg_compare (const void *p1, const void *p2) 108{ 109 unsigned int n1 = ((const struct numbered_arg *) p1)->number; 110 unsigned int n2 = ((const struct numbered_arg *) p2)->number; 111 112 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0); 113} 114 115static void * 116format_parse (const char *format, bool translated, char *fdi, 117 char **invalid_reason) 118{ 119 const char *const format_start = format; 120 struct spec spec; 121 unsigned int unnumbered_arg_count; 122 struct spec *result; 123 124 spec.directives = 0; 125 spec.numbered_arg_count = 0; 126 spec.allocated = 0; 127 spec.numbered = NULL; 128 unnumbered_arg_count = 0; 129 130 for (; *format != '\0';) 131 if (*format++ == '%') 132 { 133 /* A directive. */ 134 FDI_SET (format - 1, FMTDIR_START); 135 spec.directives++; 136 137 if (*format == '%') 138 format++; 139 else 140 { 141 bool brackets = false; 142 bool done = false; 143 unsigned int number = 0; 144 enum format_arg_type type = FAT_NONE; 145 146 if (*format == '|') 147 { 148 format++; 149 brackets = true; 150 } 151 152 if (isdigit (*format) && *format != '0') 153 { 154 const char *f = format; 155 unsigned int m = 0; 156 157 do 158 { 159 m = 10 * m + (*f - '0'); 160 f++; 161 } 162 while (isdigit (*f)); 163 164 if ((!brackets && *f == '%') || *f == '$') 165 { 166 if (m == 0) /* can happen if m overflows */ 167 { 168 *invalid_reason = INVALID_ARGNO_0 (spec.directives); 169 FDI_SET (f, FMTDIR_ERROR); 170 goto bad_format; 171 } 172 number = m; 173 if (*f == '%') 174 { 175 type = FAT_ANY; 176 done = true; 177 } 178 format = ++f; 179 } 180 } 181 182 if (!done) 183 { 184 /* Parse flags. */ 185 for (;;) 186 { 187 if (*format == ' ' || *format == '+' || *format == '-' 188 || *format == '#' || *format == '0' || *format == '\'' 189 || *format == '_' || *format == '=' || *format == 'h' 190 || *format == 'l') 191 format++; 192 else 193 break; 194 } 195 196 /* Parse width. */ 197 if (*format == '*') 198 { 199 unsigned int width_number = 0; 200 201 format++; 202 203 if (isdigit (*format)) 204 { 205 const char *f = format; 206 unsigned int m = 0; 207 208 do 209 { 210 m = 10 * m + (*f - '0'); 211 f++; 212 } 213 while (isdigit (*f)); 214 215 if (*f == '$') 216 { 217 if (m == 0) 218 { 219 *invalid_reason = 220 INVALID_WIDTH_ARGNO_0 (spec.directives); 221 FDI_SET (f, FMTDIR_ERROR); 222 goto bad_format; 223 } 224 width_number = m; 225 format = ++f; 226 } 227 } 228 229 if (width_number) 230 { 231 /* Numbered argument. */ 232 233 /* Numbered and unnumbered specifications are 234 exclusive. */ 235 if (unnumbered_arg_count > 0) 236 { 237 *invalid_reason = 238 INVALID_MIXES_NUMBERED_UNNUMBERED (); 239 FDI_SET (format - 1, FMTDIR_ERROR); 240 goto bad_format; 241 } 242 243 if (spec.allocated == spec.numbered_arg_count) 244 { 245 spec.allocated = 2 * spec.allocated + 1; 246 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 247 } 248 spec.numbered[spec.numbered_arg_count].number = width_number; 249 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER; 250 spec.numbered_arg_count++; 251 } 252 else 253 { 254 /* Unnumbered argument. */ 255 256 /* Numbered and unnumbered specifications are 257 exclusive. */ 258 if (spec.numbered_arg_count > 0) 259 { 260 *invalid_reason = 261 INVALID_MIXES_NUMBERED_UNNUMBERED (); 262 FDI_SET (format - 1, FMTDIR_ERROR); 263 goto bad_format; 264 } 265 266 if (spec.allocated == unnumbered_arg_count) 267 { 268 spec.allocated = 2 * spec.allocated + 1; 269 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 270 } 271 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; 272 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER; 273 unnumbered_arg_count++; 274 } 275 } 276 else if (isdigit (*format)) 277 { 278 do format++; while (isdigit (*format)); 279 } 280 281 /* Parse precision. */ 282 if (*format == '.') 283 { 284 format++; 285 286 if (*format == '*') 287 { 288 unsigned int precision_number = 0; 289 290 format++; 291 292 if (isdigit (*format)) 293 { 294 const char *f = format; 295 unsigned int m = 0; 296 297 do 298 { 299 m = 10 * m + (*f - '0'); 300 f++; 301 } 302 while (isdigit (*f)); 303 304 if (*f == '$') 305 { 306 if (m == 0) 307 { 308 *invalid_reason = 309 INVALID_PRECISION_ARGNO_0 (spec.directives); 310 FDI_SET (f, FMTDIR_ERROR); 311 goto bad_format; 312 } 313 precision_number = m; 314 format = ++f; 315 } 316 } 317 318 if (precision_number) 319 { 320 /* Numbered argument. */ 321 322 /* Numbered and unnumbered specifications are 323 exclusive. */ 324 if (unnumbered_arg_count > 0) 325 { 326 *invalid_reason = 327 INVALID_MIXES_NUMBERED_UNNUMBERED (); 328 FDI_SET (format - 1, FMTDIR_ERROR); 329 goto bad_format; 330 } 331 332 if (spec.allocated == spec.numbered_arg_count) 333 { 334 spec.allocated = 2 * spec.allocated + 1; 335 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 336 } 337 spec.numbered[spec.numbered_arg_count].number = precision_number; 338 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER; 339 spec.numbered_arg_count++; 340 } 341 else 342 { 343 /* Unnumbered argument. */ 344 345 /* Numbered and unnumbered specifications are 346 exclusive. */ 347 if (spec.numbered_arg_count > 0) 348 { 349 *invalid_reason = 350 INVALID_MIXES_NUMBERED_UNNUMBERED (); 351 FDI_SET (format - 1, FMTDIR_ERROR); 352 goto bad_format; 353 } 354 355 if (spec.allocated == unnumbered_arg_count) 356 { 357 spec.allocated = 2 * spec.allocated + 1; 358 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 359 } 360 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; 361 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER; 362 unnumbered_arg_count++; 363 } 364 } 365 else if (isdigit (*format)) 366 { 367 do format++; while (isdigit (*format)); 368 } 369 } 370 371 /* Parse size. */ 372 for (;;) 373 { 374 if (*format == 'h' || *format == 'l' || *format == 'L') 375 format++; 376 else 377 break; 378 } 379 380 switch (*format++) 381 { 382 case 'c': case 'C': 383 type = FAT_CHAR; 384 break; 385 case 's': case 'S': 386 type = FAT_ANY; 387 break; 388 case 'i': case 'd': case 'o': case 'u': case 'x': case 'X': 389 type = FAT_INTEGER; 390 break; 391 case 'e': case 'E': case 'f': case 'g': case 'G': 392 type = FAT_DOUBLE; 393 break; 394 case 'p': 395 type = FAT_POINTER; 396 break; 397 case 't': 398 type = FAT_NONE; 399 break; 400 case 'T': 401 if (*format == '\0') 402 { 403 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); 404 FDI_SET (format - 1, FMTDIR_ERROR); 405 goto bad_format; 406 } 407 format++; 408 type = FAT_NONE; 409 break; 410 case 'n': 411 type = FAT_NONE; 412 break; 413 case '|': 414 if (brackets) 415 { 416 --format; 417 type = FAT_ANY; 418 break; 419 } 420 /*FALLTHROUGH*/ 421 default: 422 --format; 423 if (*format == '\0') 424 { 425 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); 426 FDI_SET (format - 1, FMTDIR_ERROR); 427 } 428 else 429 { 430 *invalid_reason = 431 INVALID_CONVERSION_SPECIFIER (spec.directives, 432 *format); 433 FDI_SET (format, FMTDIR_ERROR); 434 } 435 goto bad_format; 436 } 437 if (brackets) 438 { 439 if (*format != '|') 440 { 441 if (*format == '\0') 442 { 443 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); 444 FDI_SET (format - 1, FMTDIR_ERROR); 445 } 446 else 447 { 448 *invalid_reason = 449 xasprintf (_("The directive number %u starts with | but does not end with |."), 450 spec.directives); 451 FDI_SET (format, FMTDIR_ERROR); 452 } 453 goto bad_format; 454 } 455 format++; 456 } 457 } 458 459 if (type != FAT_NONE) 460 { 461 if (number) 462 { 463 /* Numbered argument. */ 464 465 /* Numbered and unnumbered specifications are exclusive. */ 466 if (unnumbered_arg_count > 0) 467 { 468 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 469 FDI_SET (format - 1, FMTDIR_ERROR); 470 goto bad_format; 471 } 472 473 if (spec.allocated == spec.numbered_arg_count) 474 { 475 spec.allocated = 2 * spec.allocated + 1; 476 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 477 } 478 spec.numbered[spec.numbered_arg_count].number = number; 479 spec.numbered[spec.numbered_arg_count].type = type; 480 spec.numbered_arg_count++; 481 } 482 else 483 { 484 /* Unnumbered argument. */ 485 486 /* Numbered and unnumbered specifications are exclusive. */ 487 if (spec.numbered_arg_count > 0) 488 { 489 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 490 FDI_SET (format - 1, FMTDIR_ERROR); 491 goto bad_format; 492 } 493 494 if (spec.allocated == unnumbered_arg_count) 495 { 496 spec.allocated = 2 * spec.allocated + 1; 497 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 498 } 499 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; 500 spec.numbered[unnumbered_arg_count].type = type; 501 unnumbered_arg_count++; 502 } 503 } 504 } 505 506 FDI_SET (format - 1, FMTDIR_END); 507 } 508 509 /* Convert the unnumbered argument array to numbered arguments. */ 510 if (unnumbered_arg_count > 0) 511 spec.numbered_arg_count = unnumbered_arg_count; 512 /* Sort the numbered argument array, and eliminate duplicates. */ 513 else if (spec.numbered_arg_count > 1) 514 { 515 unsigned int i, j; 516 bool err; 517 518 qsort (spec.numbered, spec.numbered_arg_count, 519 sizeof (struct numbered_arg), numbered_arg_compare); 520 521 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ 522 err = false; 523 for (i = j = 0; i < spec.numbered_arg_count; i++) 524 if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number) 525 { 526 enum format_arg_type type1 = spec.numbered[i].type; 527 enum format_arg_type type2 = spec.numbered[j-1].type; 528 enum format_arg_type type_both; 529 530 if (type1 == type2 || type2 == FAT_ANY) 531 type_both = type1; 532 else if (type1 == FAT_ANY) 533 type_both = type2; 534 else 535 { 536 /* Incompatible types. */ 537 type_both = FAT_NONE; 538 if (!err) 539 *invalid_reason = 540 INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number); 541 err = true; 542 } 543 544 spec.numbered[j-1].type = type_both; 545 } 546 else 547 { 548 if (j < i) 549 { 550 spec.numbered[j].number = spec.numbered[i].number; 551 spec.numbered[j].type = spec.numbered[i].type; 552 } 553 j++; 554 } 555 spec.numbered_arg_count = j; 556 if (err) 557 /* *invalid_reason has already been set above. */ 558 goto bad_format; 559 } 560 561 result = XMALLOC (struct spec); 562 *result = spec; 563 return result; 564 565 bad_format: 566 if (spec.numbered != NULL) 567 free (spec.numbered); 568 return NULL; 569} 570 571static void 572format_free (void *descr) 573{ 574 struct spec *spec = (struct spec *) descr; 575 576 if (spec->numbered != NULL) 577 free (spec->numbered); 578 free (spec); 579} 580 581static int 582format_get_number_of_directives (void *descr) 583{ 584 struct spec *spec = (struct spec *) descr; 585 586 return spec->directives; 587} 588 589static bool 590format_check (void *msgid_descr, void *msgstr_descr, bool equality, 591 formatstring_error_logger_t error_logger, 592 const char *pretty_msgstr) 593{ 594 struct spec *spec1 = (struct spec *) msgid_descr; 595 struct spec *spec2 = (struct spec *) msgstr_descr; 596 bool err = false; 597 598 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) 599 { 600 unsigned int i, j; 601 unsigned int n1 = spec1->numbered_arg_count; 602 unsigned int n2 = spec2->numbered_arg_count; 603 604 /* Check the argument names are the same. 605 Both arrays are sorted. We search for the first difference. */ 606 for (i = 0, j = 0; i < n1 || j < n2; ) 607 { 608 int cmp = (i >= n1 ? 1 : 609 j >= n2 ? -1 : 610 spec1->numbered[i].number > spec2->numbered[j].number ? 1 : 611 spec1->numbered[i].number < spec2->numbered[j].number ? -1 : 612 0); 613 614 if (cmp > 0) 615 { 616 if (error_logger) 617 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"), 618 spec2->numbered[j].number, pretty_msgstr); 619 err = true; 620 break; 621 } 622 else if (cmp < 0) 623 { 624 if (equality) 625 { 626 if (error_logger) 627 error_logger (_("a format specification for argument %u doesn't exist in '%s'"), 628 spec1->numbered[i].number, pretty_msgstr); 629 err = true; 630 break; 631 } 632 else 633 i++; 634 } 635 else 636 j++, i++; 637 } 638 /* Check the argument types are the same. */ 639 if (!err) 640 for (i = 0, j = 0; j < n2; ) 641 { 642 if (spec1->numbered[i].number == spec2->numbered[j].number) 643 { 644 if (spec1->numbered[i].type != spec2->numbered[j].type) 645 { 646 if (error_logger) 647 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"), 648 pretty_msgstr, spec2->numbered[j].number); 649 err = true; 650 break; 651 } 652 j++, i++; 653 } 654 else 655 i++; 656 } 657 } 658 659 return err; 660} 661 662 663struct formatstring_parser formatstring_boost = 664{ 665 format_parse, 666 format_free, 667 format_get_number_of_directives, 668 NULL, 669 format_check 670}; 671 672 673#ifdef TEST 674 675/* Test program: Print the argument list specification returned by 676 format_parse for strings read from standard input. */ 677 678#include <stdio.h> 679 680static void 681format_print (void *descr) 682{ 683 struct spec *spec = (struct spec *) descr; 684 unsigned int last; 685 unsigned int i; 686 687 if (spec == NULL) 688 { 689 printf ("INVALID"); 690 return; 691 } 692 693 printf ("("); 694 last = 1; 695 for (i = 0; i < spec->numbered_arg_count; i++) 696 { 697 unsigned int number = spec->numbered[i].number; 698 699 if (i > 0) 700 printf (" "); 701 if (number < last) 702 abort (); 703 for (; last < number; last++) 704 printf ("_ "); 705 switch (spec->numbered[i].type) 706 { 707 case FAT_INTEGER: 708 printf ("i"); 709 break; 710 case FAT_DOUBLE: 711 printf ("f"); 712 break; 713 case FAT_CHAR: 714 printf ("c"); 715 break; 716 case FAT_POINTER: 717 printf ("p"); 718 break; 719 case FAT_ANY: 720 printf ("*"); 721 break; 722 default: 723 abort (); 724 } 725 last = number + 1; 726 } 727 printf (")"); 728} 729 730int 731main () 732{ 733 for (;;) 734 { 735 char *line = NULL; 736 size_t line_size = 0; 737 int line_len; 738 char *invalid_reason; 739 void *descr; 740 741 line_len = getline (&line, &line_size, stdin); 742 if (line_len < 0) 743 break; 744 if (line_len > 0 && line[line_len - 1] == '\n') 745 line[--line_len] = '\0'; 746 747 invalid_reason = NULL; 748 descr = format_parse (line, false, NULL, &invalid_reason); 749 750 format_print (descr); 751 printf ("\n"); 752 if (descr == NULL) 753 printf ("%s\n", invalid_reason); 754 755 free (invalid_reason); 756 free (line); 757 } 758 759 return 0; 760} 761 762/* 763 * For Emacs M-x compile 764 * Local Variables: 765 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-boost.c ../gnulib-lib/libgettextlib.la" 766 * End: 767 */ 768 769#endif /* TEST */ 770 771