1/* Perl format strings. 2 Copyright (C) 2004, 2006-2007 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2003. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18#ifdef HAVE_CONFIG_H 19# include <config.h> 20#endif 21 22#include <stdbool.h> 23#include <stdlib.h> 24 25#include "format.h" 26#include "c-ctype.h" 27#include "xalloc.h" 28#include "xvasprintf.h" 29#include "format-invalid.h" 30#include "gettext.h" 31 32#define _(str) gettext (str) 33 34/* Perl format strings are implemented in function Perl_sv_vcatpvfn in 35 perl-5.8.0/sv.c. 36 A directive 37 - starts with '%' or '%m$' where m is a positive integer starting with a 38 nonzero digit, 39 - is optionally followed by any of the characters '#', '0', '-', ' ', '+', 40 each of which acts as a flag, 41 - is optionally followed by a vector specification: 'v' or '*v' (reads an 42 argument) or '*m$v' where m is a positive integer starting with a nonzero 43 digit, 44 - is optionally followed by a width specification: '*' (reads an argument) 45 or '*m$' where m is a positive integer starting with a nonzero digit or 46 a nonempty digit sequence starting with a nonzero digit, 47 - is optionally followed by '.' and a precision specification: '*' (reads 48 an argument) or '*m$' where m is a positive integer starting with a 49 nonzero digit or a digit sequence, 50 - is optionally followed by a size specifier, one of 'h' 'l' 'll' 'L' 'q' 51 'V' 'I32' 'I64' 'I', 52 - is finished by a specifier 53 - '%', that needs no argument, 54 - 'c', that needs a small integer argument, 55 - 's', that needs a string argument, 56 - '_', that needs a scalar vector argument, 57 - 'p', that needs a pointer argument, 58 - 'i', 'd', 'D', that need an integer argument, 59 - 'u', 'U', 'b', 'o', 'O', 'x', 'X', that need an unsigned integer 60 argument, 61 - 'e', 'E', 'f', 'F', 'g', 'G', that need a floating-point argument, 62 - 'n', that needs a pointer to integer. 63 So there can be numbered argument specifications: 64 - '%m$' for the format string, 65 - '*m$v' for the vector, 66 - '*m$' for the width, 67 - '.*m$' for the precision. 68 Numbered and unnumbered argument specifications can be used in the same 69 string. The effect of '%m$' is to take argument number m, without affecting 70 the current argument number. The current argument number is incremented 71 after processing a directive with an unnumbered argument specification. 72 */ 73 74enum format_arg_type 75{ 76 FAT_NONE = 0, 77 /* Basic types */ 78 FAT_INTEGER = 1, 79 FAT_DOUBLE = 2, 80 FAT_CHAR = 3, 81 FAT_STRING = 4, 82 FAT_SCALAR_VECTOR = 5, 83 FAT_POINTER = 6, 84 FAT_COUNT_POINTER = 7, 85 /* Flags */ 86 FAT_UNSIGNED = 1 << 3, 87 FAT_SIZE_SHORT = 1 << 4, 88 FAT_SIZE_V = 2 << 4, 89 FAT_SIZE_PTR = 3 << 4, 90 FAT_SIZE_LONG = 4 << 4, 91 FAT_SIZE_LONGLONG = 5 << 4, 92 /* Bitmasks */ 93 FAT_SIZE_MASK = (FAT_SIZE_SHORT | FAT_SIZE_V | FAT_SIZE_PTR 94 | FAT_SIZE_LONG | FAT_SIZE_LONGLONG) 95}; 96#ifdef __cplusplus 97typedef int format_arg_type_t; 98#else 99typedef enum format_arg_type format_arg_type_t; 100#endif 101 102struct numbered_arg 103{ 104 unsigned int number; 105 format_arg_type_t type; 106}; 107 108struct spec 109{ 110 unsigned int directives; 111 unsigned int numbered_arg_count; 112 unsigned int allocated; 113 struct numbered_arg *numbered; 114}; 115 116/* Locale independent test for a decimal digit. 117 Argument can be 'char' or 'unsigned char'. (Whereas the argument of 118 <ctype.h> isdigit must be an 'unsigned char'.) */ 119#undef isdigit 120#define isdigit(c) ((unsigned int) ((c) - '0') < 10) 121 122/* Locale independent test for a nonzero decimal digit. */ 123#define isnonzerodigit(c) ((unsigned int) ((c) - '1') < 9) 124 125 126static int 127numbered_arg_compare (const void *p1, const void *p2) 128{ 129 unsigned int n1 = ((const struct numbered_arg *) p1)->number; 130 unsigned int n2 = ((const struct numbered_arg *) p2)->number; 131 132 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0); 133} 134 135static void * 136format_parse (const char *format, bool translated, char *fdi, 137 char **invalid_reason) 138{ 139 const char *const format_start = format; 140 unsigned int directives; 141 unsigned int numbered_arg_count; 142 unsigned int allocated; 143 struct numbered_arg *numbered; 144 unsigned int unnumbered_arg_count; 145 struct spec *result; 146 147 directives = 0; 148 numbered_arg_count = 0; 149 unnumbered_arg_count = 0; 150 allocated = 0; 151 numbered = NULL; 152 153 for (; *format != '\0';) 154 if (*format++ == '%') 155 { 156 /* A directive. */ 157 unsigned int number = 0; 158 bool vectorize = false; 159 format_arg_type_t type; 160 format_arg_type_t size; 161 162 FDI_SET (format - 1, FMTDIR_START); 163 directives++; 164 165 if (isnonzerodigit (*format)) 166 { 167 const char *f = format; 168 unsigned int m = 0; 169 170 do 171 { 172 m = 10 * m + (*f - '0'); 173 f++; 174 } 175 while (isdigit (*f)); 176 177 if (*f == '$') 178 { 179 number = m; 180 format = ++f; 181 } 182 } 183 184 /* Parse flags. */ 185 while (*format == ' ' || *format == '+' || *format == '-' 186 || *format == '#' || *format == '0') 187 format++; 188 189 /* Parse vector. */ 190 if (*format == 'v') 191 { 192 format++; 193 vectorize = true; 194 } 195 else if (*format == '*') 196 { 197 const char *f = format; 198 199 f++; 200 if (*f == 'v') 201 { 202 format = ++f; 203 vectorize = true; 204 205 /* Unnumbered argument. */ 206 if (allocated == numbered_arg_count) 207 { 208 allocated = 2 * allocated + 1; 209 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg)); 210 } 211 numbered[numbered_arg_count].number = ++unnumbered_arg_count; 212 numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR; /* or FAT_STRING? */ 213 numbered_arg_count++; 214 } 215 else if (isnonzerodigit (*f)) 216 { 217 unsigned int m = 0; 218 219 do 220 { 221 m = 10 * m + (*f - '0'); 222 f++; 223 } 224 while (isdigit (*f)); 225 226 if (*f == '$') 227 { 228 f++; 229 if (*f == 'v') 230 { 231 unsigned int vector_number = m; 232 233 format = ++f; 234 vectorize = true; 235 236 /* Numbered argument. */ 237 /* Note: As of perl-5.8.0, this is not correctly 238 implemented in perl's sv.c. */ 239 if (allocated == numbered_arg_count) 240 { 241 allocated = 2 * allocated + 1; 242 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg)); 243 } 244 numbered[numbered_arg_count].number = vector_number; 245 numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR; /* or FAT_STRING? */ 246 numbered_arg_count++; 247 } 248 } 249 } 250 } 251 252 if (vectorize) 253 { 254 /* Numbered or unnumbered argument. */ 255 if (allocated == numbered_arg_count) 256 { 257 allocated = 2 * allocated + 1; 258 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg)); 259 } 260 numbered[numbered_arg_count].number = (number ? number : ++unnumbered_arg_count); 261 numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR; 262 numbered_arg_count++; 263 } 264 265 /* Parse width. */ 266 if (*format == '*') 267 { 268 unsigned int width_number = 0; 269 270 format++; 271 272 if (isnonzerodigit (*format)) 273 { 274 const char *f = format; 275 unsigned int m = 0; 276 277 do 278 { 279 m = 10 * m + (*f - '0'); 280 f++; 281 } 282 while (isdigit (*f)); 283 284 if (*f == '$') 285 { 286 width_number = m; 287 format = ++f; 288 } 289 } 290 291 /* Numbered or unnumbered argument. */ 292 /* Note: As of perl-5.8.0, this is not correctly 293 implemented in perl's sv.c. */ 294 if (allocated == numbered_arg_count) 295 { 296 allocated = 2 * allocated + 1; 297 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg)); 298 } 299 numbered[numbered_arg_count].number = (width_number ? width_number : ++unnumbered_arg_count); 300 numbered[numbered_arg_count].type = FAT_INTEGER; 301 numbered_arg_count++; 302 } 303 else if (isnonzerodigit (*format)) 304 { 305 do format++; while (isdigit (*format)); 306 } 307 308 /* Parse precision. */ 309 if (*format == '.') 310 { 311 format++; 312 313 if (*format == '*') 314 { 315 unsigned int precision_number = 0; 316 317 format++; 318 319 if (isnonzerodigit (*format)) 320 { 321 const char *f = format; 322 unsigned int m = 0; 323 324 do 325 { 326 m = 10 * m + (*f - '0'); 327 f++; 328 } 329 while (isdigit (*f)); 330 331 if (*f == '$') 332 { 333 precision_number = m; 334 format = ++f; 335 } 336 } 337 338 /* Numbered or unnumbered argument. */ 339 if (allocated == numbered_arg_count) 340 { 341 allocated = 2 * allocated + 1; 342 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg)); 343 } 344 numbered[numbered_arg_count].number = (precision_number ? precision_number : ++unnumbered_arg_count); 345 numbered[numbered_arg_count].type = FAT_INTEGER; 346 numbered_arg_count++; 347 } 348 else 349 { 350 while (isdigit (*format)) format++; 351 } 352 } 353 354 /* Parse size. */ 355 size = 0; 356 if (*format == 'h') 357 { 358 size = FAT_SIZE_SHORT; 359 format++; 360 } 361 else if (*format == 'l') 362 { 363 if (format[1] == 'l') 364 { 365 size = FAT_SIZE_LONGLONG; 366 format += 2; 367 } 368 else 369 { 370 size = FAT_SIZE_LONG; 371 format++; 372 } 373 } 374 else if (*format == 'L' || *format == 'q') 375 { 376 size = FAT_SIZE_LONGLONG; 377 format++; 378 } 379 else if (*format == 'V') 380 { 381 size = FAT_SIZE_V; 382 format++; 383 } 384 else if (*format == 'I') 385 { 386 if (format[1] == '6' && format[2] == '4') 387 { 388 size = FAT_SIZE_LONGLONG; 389 format += 3; 390 } 391 else if (format[1] == '3' && format[2] == '2') 392 { 393 size = 0; /* FAT_SIZE_INT */ 394 format += 3; 395 } 396 else 397 { 398 size = FAT_SIZE_PTR; 399 format++; 400 } 401 } 402 403 switch (*format) 404 { 405 case '%': 406 type = FAT_NONE; 407 break; 408 case 'c': 409 type = FAT_CHAR; 410 break; 411 case 's': 412 type = FAT_STRING; 413 break; 414 case '_': 415 type = FAT_SCALAR_VECTOR; 416 break; 417 case 'D': 418 type = FAT_INTEGER | FAT_SIZE_V; 419 break; 420 case 'i': case 'd': 421 type = FAT_INTEGER | size; 422 break; 423 case 'U': case 'O': 424 type = FAT_INTEGER | FAT_UNSIGNED | FAT_SIZE_V; 425 break; 426 case 'u': case 'b': case 'o': case 'x': case 'X': 427 type = FAT_INTEGER | FAT_UNSIGNED | size; 428 break; 429 case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': 430 if (size == FAT_SIZE_SHORT || size == FAT_SIZE_LONG) 431 { 432 *invalid_reason = 433 xasprintf (_("In the directive number %u, the size specifier is incompatible with the conversion specifier '%c'."), directives, *format); 434 FDI_SET (format, FMTDIR_ERROR); 435 goto bad_format; 436 } 437 type = FAT_DOUBLE | size; 438 break; 439 case 'p': 440 type = FAT_POINTER; 441 break; 442 case 'n': 443 type = FAT_COUNT_POINTER | size; 444 break; 445 default: 446 if (*format == '\0') 447 { 448 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); 449 FDI_SET (format - 1, FMTDIR_ERROR); 450 } 451 else 452 { 453 *invalid_reason = 454 INVALID_CONVERSION_SPECIFIER (directives, *format); 455 FDI_SET (format, FMTDIR_ERROR); 456 } 457 goto bad_format; 458 } 459 460 if (type != FAT_NONE && !vectorize) 461 { 462 /* Numbered or unnumbered argument. */ 463 if (allocated == numbered_arg_count) 464 { 465 allocated = 2 * allocated + 1; 466 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg)); 467 } 468 numbered[numbered_arg_count].number = (number ? number : ++unnumbered_arg_count); 469 numbered[numbered_arg_count].type = type; 470 numbered_arg_count++; 471 } 472 473 FDI_SET (format, FMTDIR_END); 474 475 format++; 476 } 477 478 /* Sort the numbered argument array, and eliminate duplicates. */ 479 if (numbered_arg_count > 1) 480 { 481 unsigned int i, j; 482 bool err; 483 484 qsort (numbered, numbered_arg_count, 485 sizeof (struct numbered_arg), numbered_arg_compare); 486 487 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ 488 err = false; 489 for (i = j = 0; i < numbered_arg_count; i++) 490 if (j > 0 && numbered[i].number == numbered[j-1].number) 491 { 492 format_arg_type_t type1 = numbered[i].type; 493 format_arg_type_t type2 = numbered[j-1].type; 494 format_arg_type_t type_both; 495 496 if (type1 == type2) 497 type_both = type1; 498 else 499 { 500 /* Incompatible types. */ 501 type_both = FAT_NONE; 502 if (!err) 503 *invalid_reason = 504 INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number); 505 err = true; 506 } 507 508 numbered[j-1].type = type_both; 509 } 510 else 511 { 512 if (j < i) 513 { 514 numbered[j].number = numbered[i].number; 515 numbered[j].type = numbered[i].type; 516 } 517 j++; 518 } 519 numbered_arg_count = j; 520 if (err) 521 /* *invalid_reason has already been set above. */ 522 goto bad_format; 523 } 524 525 result = XMALLOC (struct spec); 526 result->directives = directives; 527 result->numbered_arg_count = numbered_arg_count; 528 result->allocated = allocated; 529 result->numbered = numbered; 530 return result; 531 532 bad_format: 533 if (numbered != NULL) 534 free (numbered); 535 return NULL; 536} 537 538static void 539format_free (void *descr) 540{ 541 struct spec *spec = (struct spec *) descr; 542 543 if (spec->numbered != NULL) 544 free (spec->numbered); 545 free (spec); 546} 547 548static int 549format_get_number_of_directives (void *descr) 550{ 551 struct spec *spec = (struct spec *) descr; 552 553 return spec->directives; 554} 555 556static bool 557format_check (void *msgid_descr, void *msgstr_descr, bool equality, 558 formatstring_error_logger_t error_logger, 559 const char *pretty_msgstr) 560{ 561 struct spec *spec1 = (struct spec *) msgid_descr; 562 struct spec *spec2 = (struct spec *) msgstr_descr; 563 bool err = false; 564 565 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) 566 { 567 unsigned int i, j; 568 unsigned int n1 = spec1->numbered_arg_count; 569 unsigned int n2 = spec2->numbered_arg_count; 570 571 /* Check the argument names are the same. 572 Both arrays are sorted. We search for the first difference. */ 573 for (i = 0, j = 0; i < n1 || j < n2; ) 574 { 575 int cmp = (i >= n1 ? 1 : 576 j >= n2 ? -1 : 577 spec1->numbered[i].number > spec2->numbered[j].number ? 1 : 578 spec1->numbered[i].number < spec2->numbered[j].number ? -1 : 579 0); 580 581 if (cmp > 0) 582 { 583 if (error_logger) 584 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"), 585 spec2->numbered[j].number, pretty_msgstr); 586 err = true; 587 break; 588 } 589 else if (cmp < 0) 590 { 591 if (equality) 592 { 593 if (error_logger) 594 error_logger (_("a format specification for argument %u doesn't exist in '%s'"), 595 spec1->numbered[i].number, pretty_msgstr); 596 err = true; 597 break; 598 } 599 else 600 i++; 601 } 602 else 603 j++, i++; 604 } 605 /* Check the argument types are the same. */ 606 if (!err) 607 for (i = 0, j = 0; j < n2; ) 608 { 609 if (spec1->numbered[i].number == spec2->numbered[j].number) 610 { 611 if (spec1->numbered[i].type != spec2->numbered[j].type) 612 { 613 if (error_logger) 614 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"), 615 pretty_msgstr, spec2->numbered[j].number); 616 err = true; 617 break; 618 } 619 j++, i++; 620 } 621 else 622 i++; 623 } 624 } 625 626 return err; 627} 628 629 630struct formatstring_parser formatstring_perl = 631{ 632 format_parse, 633 format_free, 634 format_get_number_of_directives, 635 NULL, 636 format_check 637}; 638 639 640#ifdef TEST 641 642/* Test program: Print the argument list specification returned by 643 format_parse for strings read from standard input. */ 644 645#include <stdio.h> 646 647static void 648format_print (void *descr) 649{ 650 struct spec *spec = (struct spec *) descr; 651 unsigned int last; 652 unsigned int i; 653 654 if (spec == NULL) 655 { 656 printf ("INVALID"); 657 return; 658 } 659 660 printf ("("); 661 last = 1; 662 for (i = 0; i < spec->numbered_arg_count; i++) 663 { 664 unsigned int number = spec->numbered[i].number; 665 666 if (i > 0) 667 printf (" "); 668 if (number < last) 669 abort (); 670 for (; last < number; last++) 671 printf ("_ "); 672 if (spec->numbered[i].type & FAT_UNSIGNED) 673 printf ("[unsigned]"); 674 switch (spec->numbered[i].type & FAT_SIZE_MASK) 675 { 676 case 0: 677 break; 678 case FAT_SIZE_SHORT: 679 printf ("[short]"); 680 break; 681 case FAT_SIZE_V: 682 printf ("[IV]"); 683 break; 684 case FAT_SIZE_PTR: 685 printf ("[PTR]"); 686 break; 687 case FAT_SIZE_LONG: 688 printf ("[long]"); 689 break; 690 case FAT_SIZE_LONGLONG: 691 printf ("[long long]"); 692 break; 693 default: 694 abort (); 695 } 696 switch (spec->numbered[i].type & ~(FAT_UNSIGNED | FAT_SIZE_MASK)) 697 { 698 case FAT_INTEGER: 699 printf ("i"); 700 break; 701 case FAT_DOUBLE: 702 printf ("f"); 703 break; 704 case FAT_CHAR: 705 printf ("c"); 706 break; 707 case FAT_STRING: 708 printf ("s"); 709 break; 710 case FAT_SCALAR_VECTOR: 711 printf ("sv"); 712 break; 713 case FAT_POINTER: 714 printf ("p"); 715 break; 716 case FAT_COUNT_POINTER: 717 printf ("n"); 718 break; 719 default: 720 abort (); 721 } 722 last = number + 1; 723 } 724 printf (")"); 725} 726 727int 728main () 729{ 730 for (;;) 731 { 732 char *line = NULL; 733 size_t line_size = 0; 734 int line_len; 735 char *invalid_reason; 736 void *descr; 737 738 line_len = getline (&line, &line_size, stdin); 739 if (line_len < 0) 740 break; 741 if (line_len > 0 && line[line_len - 1] == '\n') 742 line[--line_len] = '\0'; 743 744 invalid_reason = NULL; 745 descr = format_parse (line, false, NULL, &invalid_reason); 746 747 format_print (descr); 748 printf ("\n"); 749 if (descr == NULL) 750 printf ("%s\n", invalid_reason); 751 752 free (invalid_reason); 753 free (line); 754 } 755 756 return 0; 757} 758 759/* 760 * For Emacs M-x compile 761 * Local Variables: 762 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-perl.c ../gnulib-lib/libgettextlib.la" 763 * End: 764 */ 765 766#endif /* TEST */ 767