1/* Boost format strings. 2 Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc. 3 Written by Bruno Haible <haible@clisp.cons.org>, 2006. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software Foundation, 17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19#ifdef HAVE_CONFIG_H 20# include <config.h> 21#endif 22 23#include <stdbool.h> 24#include <stdlib.h> 25 26#include "format.h" 27#include "c-ctype.h" 28#include "xalloc.h" 29#include "xvasprintf.h" 30#include "format-invalid.h" 31#include "gettext.h" 32 33#define _(str) gettext (str) 34 35/* Boost format strings are described in 36 boost_1_33_1/libs/format/doc/format.html 37 and implemented in 38 boost_1_33_1/boost/format/parsing.hpp. 39 A directive (other than '%%') 40 - starts with '%' or '%|'; in the latter case it must end in '|', 41 - is continued either by 42 - 'm%' where m is a positive integer, starting with a nonzero digit; 43 in this case the directive must not have started with '%|'; or 44 - the following: 45 - optional: 'm$' where m is a positive integer, starting with a 46 nonzero digit, 47 - optional: any of the characters '#', '0', '-', ' ', '+', "'", 48 '_', '=', 'h', 'l', 49 - optional: a width specification: '*' (reads an argument) or '*m$' 50 or a nonempty digit sequence, 51 - optional: a '.' and a precision specification: '*' (reads an 52 argument) or '*m$' or a nonempty digit sequence, 53 - optional: any of the characters 'h', 'l', 'L', 54 - if the directive started with '%|': 55 an optional specifier and a final '|', 56 otherwise 57 a mandatory specifier. 58 If no specifier is given, it needs an argument of any type. 59 The possible specifiers are: 60 - 'c', 'C', that need a character argument, 61 - 's', 'S', that need an argument of any type, 62 - 'i', 'd', 'o', 'u', 'x', 'X', that need an integer argument, 63 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument, 64 - 'p', that needs a 'void *' argument, 65 - 't', that doesn't need an argument, 66 - 'TX', where X is any character, that doesn't need an argument, 67 - 'n', that needs a pointer to integer. 68 The Boost format string interpreter doesn't actually care about 69 the argument types, but we do, because it increases the likelihood 70 of detecting translator mistakes. 71 Numbered ('%m%' or '%m$' or '*m$') and unnumbered argument specifications 72 cannot be used in the same string. 73 */ 74 75enum format_arg_type 76{ 77 FAT_NONE = 0, 78 /* Basic types */ 79 FAT_INTEGER = 1, 80 FAT_DOUBLE = 2, 81 FAT_CHAR = 3, 82 FAT_POINTER = 4, 83 FAT_ANY = 5 84}; 85 86struct numbered_arg 87{ 88 unsigned int number; 89 enum format_arg_type type; 90}; 91 92struct spec 93{ 94 unsigned int directives; 95 unsigned int numbered_arg_count; 96 unsigned int allocated; 97 struct numbered_arg *numbered; 98}; 99 100/* Locale independent test for a decimal digit. 101 Argument can be 'char' or 'unsigned char'. (Whereas the argument of 102 <ctype.h> isdigit must be an 'unsigned char'.) */ 103#undef isdigit 104#define isdigit(c) ((unsigned int) ((c) - '0') < 10) 105 106 107static int 108numbered_arg_compare (const void *p1, const void *p2) 109{ 110 unsigned int n1 = ((const struct numbered_arg *) p1)->number; 111 unsigned int n2 = ((const struct numbered_arg *) p2)->number; 112 113 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0); 114} 115 116static void * 117format_parse (const char *format, bool translated, char **invalid_reason) 118{ 119 struct spec spec; 120 unsigned int unnumbered_arg_count; 121 struct spec *result; 122 123 spec.directives = 0; 124 spec.numbered_arg_count = 0; 125 spec.allocated = 0; 126 spec.numbered = NULL; 127 unnumbered_arg_count = 0; 128 129 for (; *format != '\0';) 130 if (*format++ == '%') 131 { 132 /* A directive. */ 133 spec.directives++; 134 135 if (*format == '%') 136 format++; 137 else 138 { 139 bool brackets = false; 140 bool done = false; 141 unsigned int number = 0; 142 enum format_arg_type type = FAT_NONE; 143 144 if (*format == '|') 145 { 146 format++; 147 brackets = true; 148 } 149 150 if (isdigit (*format) && *format != '0') 151 { 152 const char *f = format; 153 unsigned int m = 0; 154 155 do 156 { 157 m = 10 * m + (*f - '0'); 158 f++; 159 } 160 while (isdigit (*f)); 161 162 if ((!brackets && *f == '%') || *f == '$') 163 { 164 if (m == 0) /* can happen if m overflows */ 165 { 166 *invalid_reason = INVALID_ARGNO_0 (spec.directives); 167 goto bad_format; 168 } 169 number = m; 170 if (*f == '%') 171 { 172 type = FAT_ANY; 173 done = true; 174 } 175 format = ++f; 176 } 177 } 178 179 if (!done) 180 { 181 /* Parse flags. */ 182 for (;;) 183 { 184 if (*format == ' ' || *format == '+' || *format == '-' 185 || *format == '#' || *format == '0' || *format == '\'' 186 || *format == '_' || *format == '=' || *format == 'h' 187 || *format == 'l') 188 format++; 189 else 190 break; 191 } 192 193 /* Parse width. */ 194 if (*format == '*') 195 { 196 unsigned int width_number = 0; 197 198 format++; 199 200 if (isdigit (*format)) 201 { 202 const char *f = format; 203 unsigned int m = 0; 204 205 do 206 { 207 m = 10 * m + (*f - '0'); 208 f++; 209 } 210 while (isdigit (*f)); 211 212 if (*f == '$') 213 { 214 if (m == 0) 215 { 216 *invalid_reason = 217 INVALID_WIDTH_ARGNO_0 (spec.directives); 218 goto bad_format; 219 } 220 width_number = m; 221 format = ++f; 222 } 223 } 224 225 if (width_number) 226 { 227 /* Numbered argument. */ 228 229 /* Numbered and unnumbered specifications are 230 exclusive. */ 231 if (unnumbered_arg_count > 0) 232 { 233 *invalid_reason = 234 INVALID_MIXES_NUMBERED_UNNUMBERED (); 235 goto bad_format; 236 } 237 238 if (spec.allocated == spec.numbered_arg_count) 239 { 240 spec.allocated = 2 * spec.allocated + 1; 241 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 242 } 243 spec.numbered[spec.numbered_arg_count].number = width_number; 244 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER; 245 spec.numbered_arg_count++; 246 } 247 else 248 { 249 /* Unnumbered argument. */ 250 251 /* Numbered and unnumbered specifications are 252 exclusive. */ 253 if (spec.numbered_arg_count > 0) 254 { 255 *invalid_reason = 256 INVALID_MIXES_NUMBERED_UNNUMBERED (); 257 goto bad_format; 258 } 259 260 if (spec.allocated == unnumbered_arg_count) 261 { 262 spec.allocated = 2 * spec.allocated + 1; 263 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 264 } 265 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; 266 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER; 267 unnumbered_arg_count++; 268 } 269 } 270 else if (isdigit (*format)) 271 { 272 do format++; while (isdigit (*format)); 273 } 274 275 /* Parse precision. */ 276 if (*format == '.') 277 { 278 format++; 279 280 if (*format == '*') 281 { 282 unsigned int precision_number = 0; 283 284 format++; 285 286 if (isdigit (*format)) 287 { 288 const char *f = format; 289 unsigned int m = 0; 290 291 do 292 { 293 m = 10 * m + (*f - '0'); 294 f++; 295 } 296 while (isdigit (*f)); 297 298 if (*f == '$') 299 { 300 if (m == 0) 301 { 302 *invalid_reason = 303 INVALID_PRECISION_ARGNO_0 (spec.directives); 304 goto bad_format; 305 } 306 precision_number = m; 307 format = ++f; 308 } 309 } 310 311 if (precision_number) 312 { 313 /* Numbered argument. */ 314 315 /* Numbered and unnumbered specifications are 316 exclusive. */ 317 if (unnumbered_arg_count > 0) 318 { 319 *invalid_reason = 320 INVALID_MIXES_NUMBERED_UNNUMBERED (); 321 goto bad_format; 322 } 323 324 if (spec.allocated == spec.numbered_arg_count) 325 { 326 spec.allocated = 2 * spec.allocated + 1; 327 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 328 } 329 spec.numbered[spec.numbered_arg_count].number = precision_number; 330 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER; 331 spec.numbered_arg_count++; 332 } 333 else 334 { 335 /* Unnumbered argument. */ 336 337 /* Numbered and unnumbered specifications are 338 exclusive. */ 339 if (spec.numbered_arg_count > 0) 340 { 341 *invalid_reason = 342 INVALID_MIXES_NUMBERED_UNNUMBERED (); 343 goto bad_format; 344 } 345 346 if (spec.allocated == unnumbered_arg_count) 347 { 348 spec.allocated = 2 * spec.allocated + 1; 349 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 350 } 351 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; 352 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER; 353 unnumbered_arg_count++; 354 } 355 } 356 else if (isdigit (*format)) 357 { 358 do format++; while (isdigit (*format)); 359 } 360 } 361 362 /* Parse size. */ 363 for (;;) 364 { 365 if (*format == 'h' || *format == 'l' || *format == 'L') 366 format++; 367 else 368 break; 369 } 370 371 switch (*format++) 372 { 373 case 'c': case 'C': 374 type = FAT_CHAR; 375 break; 376 case 's': case 'S': 377 type = FAT_ANY; 378 break; 379 case 'i': case 'd': case 'o': case 'u': case 'x': case 'X': 380 type = FAT_INTEGER; 381 break; 382 case 'e': case 'E': case 'f': case 'g': case 'G': 383 type = FAT_DOUBLE; 384 break; 385 case 'p': 386 type = FAT_POINTER; 387 break; 388 case 't': 389 type = FAT_NONE; 390 break; 391 case 'T': 392 if (*format == '\0') 393 { 394 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); 395 goto bad_format; 396 } 397 format++; 398 type = FAT_NONE; 399 break; 400 case 'n': 401 type = FAT_NONE; 402 break; 403 case '|': 404 if (brackets) 405 { 406 --format; 407 type = FAT_ANY; 408 break; 409 } 410 /*FALLTHROUGH*/ 411 default: 412 --format; 413 *invalid_reason = 414 (*format == '\0' 415 ? INVALID_UNTERMINATED_DIRECTIVE () 416 : INVALID_CONVERSION_SPECIFIER (spec.directives, 417 *format)); 418 goto bad_format; 419 } 420 if (brackets) 421 { 422 if (*format != '|') 423 { 424 *invalid_reason = 425 (*format == '\0' 426 ? INVALID_UNTERMINATED_DIRECTIVE () 427 : xasprintf (_("The directive number %u starts with | but does not end with |."), 428 spec.directives)); 429 goto bad_format; 430 } 431 format++; 432 } 433 } 434 435 if (type != FAT_NONE) 436 { 437 if (number) 438 { 439 /* Numbered argument. */ 440 441 /* Numbered and unnumbered specifications are exclusive. */ 442 if (unnumbered_arg_count > 0) 443 { 444 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 445 goto bad_format; 446 } 447 448 if (spec.allocated == spec.numbered_arg_count) 449 { 450 spec.allocated = 2 * spec.allocated + 1; 451 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 452 } 453 spec.numbered[spec.numbered_arg_count].number = number; 454 spec.numbered[spec.numbered_arg_count].type = type; 455 spec.numbered_arg_count++; 456 } 457 else 458 { 459 /* Unnumbered argument. */ 460 461 /* Numbered and unnumbered specifications are exclusive. */ 462 if (spec.numbered_arg_count > 0) 463 { 464 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 465 goto bad_format; 466 } 467 468 if (spec.allocated == unnumbered_arg_count) 469 { 470 spec.allocated = 2 * spec.allocated + 1; 471 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 472 } 473 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; 474 spec.numbered[unnumbered_arg_count].type = type; 475 unnumbered_arg_count++; 476 } 477 } 478 } 479 } 480 481 /* Convert the unnumbered argument array to numbered arguments. */ 482 if (unnumbered_arg_count > 0) 483 spec.numbered_arg_count = unnumbered_arg_count; 484 /* Sort the numbered argument array, and eliminate duplicates. */ 485 else if (spec.numbered_arg_count > 1) 486 { 487 unsigned int i, j; 488 bool err; 489 490 qsort (spec.numbered, spec.numbered_arg_count, 491 sizeof (struct numbered_arg), numbered_arg_compare); 492 493 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ 494 err = false; 495 for (i = j = 0; i < spec.numbered_arg_count; i++) 496 if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number) 497 { 498 enum format_arg_type type1 = spec.numbered[i].type; 499 enum format_arg_type type2 = spec.numbered[j-1].type; 500 enum format_arg_type type_both; 501 502 if (type1 == type2 || type2 == FAT_ANY) 503 type_both = type1; 504 else if (type1 == FAT_ANY) 505 type_both = type2; 506 else 507 { 508 /* Incompatible types. */ 509 type_both = FAT_NONE; 510 if (!err) 511 *invalid_reason = 512 INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number); 513 err = true; 514 } 515 516 spec.numbered[j-1].type = type_both; 517 } 518 else 519 { 520 if (j < i) 521 { 522 spec.numbered[j].number = spec.numbered[i].number; 523 spec.numbered[j].type = spec.numbered[i].type; 524 } 525 j++; 526 } 527 spec.numbered_arg_count = j; 528 if (err) 529 /* *invalid_reason has already been set above. */ 530 goto bad_format; 531 } 532 533 result = (struct spec *) xmalloc (sizeof (struct spec)); 534 *result = spec; 535 return result; 536 537 bad_format: 538 if (spec.numbered != NULL) 539 free (spec.numbered); 540 return NULL; 541} 542 543static void 544format_free (void *descr) 545{ 546 struct spec *spec = (struct spec *) descr; 547 548 if (spec->numbered != NULL) 549 free (spec->numbered); 550 free (spec); 551} 552 553static int 554format_get_number_of_directives (void *descr) 555{ 556 struct spec *spec = (struct spec *) descr; 557 558 return spec->directives; 559} 560 561static bool 562format_check (void *msgid_descr, void *msgstr_descr, bool equality, 563 formatstring_error_logger_t error_logger, 564 const char *pretty_msgstr) 565{ 566 struct spec *spec1 = (struct spec *) msgid_descr; 567 struct spec *spec2 = (struct spec *) msgstr_descr; 568 bool err = false; 569 570 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) 571 { 572 unsigned int i, j; 573 unsigned int n1 = spec1->numbered_arg_count; 574 unsigned int n2 = spec2->numbered_arg_count; 575 576 /* Check the argument names are the same. 577 Both arrays are sorted. We search for the first difference. */ 578 for (i = 0, j = 0; i < n1 || j < n2; ) 579 { 580 int cmp = (i >= n1 ? 1 : 581 j >= n2 ? -1 : 582 spec1->numbered[i].number > spec2->numbered[j].number ? 1 : 583 spec1->numbered[i].number < spec2->numbered[j].number ? -1 : 584 0); 585 586 if (cmp > 0) 587 { 588 if (error_logger) 589 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"), 590 spec2->numbered[j].number, pretty_msgstr); 591 err = true; 592 break; 593 } 594 else if (cmp < 0) 595 { 596 if (equality) 597 { 598 if (error_logger) 599 error_logger (_("a format specification for argument %u doesn't exist in '%s'"), 600 spec1->numbered[i].number, pretty_msgstr); 601 err = true; 602 break; 603 } 604 else 605 i++; 606 } 607 else 608 j++, i++; 609 } 610 /* Check the argument types are the same. */ 611 if (!err) 612 for (i = 0, j = 0; j < n2; ) 613 { 614 if (spec1->numbered[i].number == spec2->numbered[j].number) 615 { 616 if (spec1->numbered[i].type != spec2->numbered[j].type) 617 { 618 if (error_logger) 619 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"), 620 pretty_msgstr, spec2->numbered[j].number); 621 err = true; 622 break; 623 } 624 j++, i++; 625 } 626 else 627 i++; 628 } 629 } 630 631 return err; 632} 633 634 635struct formatstring_parser formatstring_boost = 636{ 637 format_parse, 638 format_free, 639 format_get_number_of_directives, 640 NULL, 641 format_check 642}; 643 644 645#ifdef TEST 646 647/* Test program: Print the argument list specification returned by 648 format_parse for strings read from standard input. */ 649 650#include <stdio.h> 651#include "getline.h" 652 653static void 654format_print (void *descr) 655{ 656 struct spec *spec = (struct spec *) descr; 657 unsigned int last; 658 unsigned int i; 659 660 if (spec == NULL) 661 { 662 printf ("INVALID"); 663 return; 664 } 665 666 printf ("("); 667 last = 1; 668 for (i = 0; i < spec->numbered_arg_count; i++) 669 { 670 unsigned int number = spec->numbered[i].number; 671 672 if (i > 0) 673 printf (" "); 674 if (number < last) 675 abort (); 676 for (; last < number; last++) 677 printf ("_ "); 678 switch (spec->numbered[i].type) 679 { 680 case FAT_INTEGER: 681 printf ("i"); 682 break; 683 case FAT_DOUBLE: 684 printf ("f"); 685 break; 686 case FAT_CHAR: 687 printf ("c"); 688 break; 689 case FAT_POINTER: 690 printf ("p"); 691 break; 692 case FAT_ANY: 693 printf ("*"); 694 break; 695 default: 696 abort (); 697 } 698 last = number + 1; 699 } 700 printf (")"); 701} 702 703int 704main () 705{ 706 for (;;) 707 { 708 char *line = NULL; 709 size_t line_size = 0; 710 int line_len; 711 char *invalid_reason; 712 void *descr; 713 714 line_len = getline (&line, &line_size, stdin); 715 if (line_len < 0) 716 break; 717 if (line_len > 0 && line[line_len - 1] == '\n') 718 line[--line_len] = '\0'; 719 720 invalid_reason = NULL; 721 descr = format_parse (line, false, &invalid_reason); 722 723 format_print (descr); 724 printf ("\n"); 725 if (descr == NULL) 726 printf ("%s\n", invalid_reason); 727 728 free (invalid_reason); 729 free (line); 730 } 731 732 return 0; 733} 734 735/* 736 * For Emacs M-x compile 737 * Local Variables: 738 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-boost.c ../lib/libgettextlib.la" 739 * End: 740 */ 741 742#endif /* TEST */ 743 744