1/* Python format strings. 2 Copyright (C) 2001-2004, 2006-2007 Free Software Foundation, Inc. 3 Written by Bruno Haible <haible@clisp.cons.org>, 2001. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18#ifdef HAVE_CONFIG_H 19# include <config.h> 20#endif 21 22#include <stdbool.h> 23#include <stdlib.h> 24#include <string.h> 25 26#include "format.h" 27#include "c-ctype.h" 28#include "xalloc.h" 29#include "xvasprintf.h" 30#include "format-invalid.h" 31#include "gettext.h" 32 33#define _(str) gettext (str) 34 35/* Python format strings are described in 36 Python Library reference 37 2. Built-in Types, Exceptions and Functions 38 2.1. Built-in Types 39 2.1.5. Sequence Types 40 2.1.5.2. String Formatting Operations 41 Any string or Unicode string can act as format string via the '%' operator, 42 implemented in stringobject.c and unicodeobject.c. 43 A directive 44 - starts with '%' 45 - is optionally followed by '(ident)' where ident is any sequence of 46 characters with balanced left and right parentheses, 47 - is optionally followed by any of the characters '-' (left justification), 48 '+' (sign), ' ' (blank), '#' (alt), '0' (zero), each of which acts as a 49 flag, 50 - is optionally followed by a width specification: '*' (reads an argument) 51 or a nonempty digit sequence, 52 - is optionally followed by '.' and a precision specification: '*' (reads 53 an argument) or a nonempty digit sequence, 54 - is optionally followed by a size specifier, one of 'h' 'l' 'L'. 55 - is finished by a specifier 56 - '%', that needs no argument, 57 - 'c', that needs a character argument, 58 - 's', 'r', that need a string argument, 59 - 'i', 'd', 'u', 'o', 'x', 'X', that need an integer argument, 60 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument. 61 Use of '(ident)' and use of unnamed argument specifications are exclusive, 62 because the first requires a mapping as argument, while the second requires 63 a tuple as argument. 64 */ 65 66enum format_arg_type 67{ 68 FAT_NONE, 69 FAT_ANY, 70 FAT_CHARACTER, 71 FAT_STRING, 72 FAT_INTEGER, 73 FAT_FLOAT 74}; 75 76struct named_arg 77{ 78 char *name; 79 enum format_arg_type type; 80}; 81 82struct unnamed_arg 83{ 84 enum format_arg_type type; 85}; 86 87struct spec 88{ 89 unsigned int directives; 90 unsigned int named_arg_count; 91 unsigned int unnamed_arg_count; 92 unsigned int allocated; 93 struct named_arg *named; 94 struct unnamed_arg *unnamed; 95}; 96 97/* Locale independent test for a decimal digit. 98 Argument can be 'char' or 'unsigned char'. (Whereas the argument of 99 <ctype.h> isdigit must be an 'unsigned char'.) */ 100#undef isdigit 101#define isdigit(c) ((unsigned int) ((c) - '0') < 10) 102 103 104static int 105named_arg_compare (const void *p1, const void *p2) 106{ 107 return strcmp (((const struct named_arg *) p1)->name, 108 ((const struct named_arg *) p2)->name); 109} 110 111#define INVALID_MIXES_NAMED_UNNAMED() \ 112 xstrdup (_("The string refers to arguments both through argument names and through unnamed argument specifications.")) 113 114static void * 115format_parse (const char *format, bool translated, char *fdi, 116 char **invalid_reason) 117{ 118 const char *const format_start = format; 119 struct spec spec; 120 struct spec *result; 121 122 spec.directives = 0; 123 spec.named_arg_count = 0; 124 spec.unnamed_arg_count = 0; 125 spec.allocated = 0; 126 spec.named = NULL; 127 spec.unnamed = NULL; 128 129 for (; *format != '\0';) 130 if (*format++ == '%') 131 { 132 /* A directive. */ 133 char *name = NULL; 134 enum format_arg_type type; 135 136 FDI_SET (format - 1, FMTDIR_START); 137 spec.directives++; 138 139 if (*format == '(') 140 { 141 unsigned int depth; 142 const char *name_start; 143 const char *name_end; 144 size_t n; 145 146 name_start = ++format; 147 depth = 0; 148 for (; *format != '\0'; format++) 149 { 150 if (*format == '(') 151 depth++; 152 else if (*format == ')') 153 { 154 if (depth == 0) 155 break; 156 else 157 depth--; 158 } 159 } 160 if (*format == '\0') 161 { 162 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); 163 FDI_SET (format - 1, FMTDIR_ERROR); 164 goto bad_format; 165 } 166 name_end = format++; 167 168 n = name_end - name_start; 169 name = XNMALLOC (n + 1, char); 170 memcpy (name, name_start, n); 171 name[n] = '\0'; 172 } 173 174 while (*format == '-' || *format == '+' || *format == ' ' 175 || *format == '#' || *format == '0') 176 format++; 177 178 if (*format == '*') 179 { 180 format++; 181 182 /* Named and unnamed specifications are exclusive. */ 183 if (spec.named_arg_count > 0) 184 { 185 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED (); 186 FDI_SET (format - 1, FMTDIR_ERROR); 187 goto bad_format; 188 } 189 190 if (spec.allocated == spec.unnamed_arg_count) 191 { 192 spec.allocated = 2 * spec.allocated + 1; 193 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg)); 194 } 195 spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER; 196 spec.unnamed_arg_count++; 197 } 198 else if (isdigit (*format)) 199 { 200 do format++; while (isdigit (*format)); 201 } 202 203 if (*format == '.') 204 { 205 format++; 206 207 if (*format == '*') 208 { 209 format++; 210 211 /* Named and unnamed specifications are exclusive. */ 212 if (spec.named_arg_count > 0) 213 { 214 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED (); 215 FDI_SET (format - 1, FMTDIR_ERROR); 216 goto bad_format; 217 } 218 219 if (spec.allocated == spec.unnamed_arg_count) 220 { 221 spec.allocated = 2 * spec.allocated + 1; 222 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg)); 223 } 224 spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER; 225 spec.unnamed_arg_count++; 226 } 227 else if (isdigit (*format)) 228 { 229 do format++; while (isdigit (*format)); 230 } 231 } 232 233 if (*format == 'h' || *format == 'l' || *format == 'L') 234 format++; 235 236 switch (*format) 237 { 238 case '%': 239 type = FAT_ANY; 240 break; 241 case 'c': 242 type = FAT_CHARACTER; 243 break; 244 case 's': case 'r': 245 type = FAT_STRING; 246 break; 247 case 'i': case 'd': case 'u': case 'o': case 'x': case 'X': 248 type = FAT_INTEGER; 249 break; 250 case 'e': case 'E': case 'f': case 'g': case 'G': 251 type = FAT_FLOAT; 252 break; 253 default: 254 if (*format == '\0') 255 { 256 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); 257 FDI_SET (format - 1, FMTDIR_ERROR); 258 } 259 else 260 { 261 *invalid_reason = 262 INVALID_CONVERSION_SPECIFIER (spec.directives, *format); 263 FDI_SET (format, FMTDIR_ERROR); 264 } 265 goto bad_format; 266 } 267 268 if (name != NULL) 269 { 270 /* Named argument. */ 271 272 /* Named and unnamed specifications are exclusive. */ 273 if (spec.unnamed_arg_count > 0) 274 { 275 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED (); 276 FDI_SET (format, FMTDIR_ERROR); 277 goto bad_format; 278 } 279 280 if (spec.allocated == spec.named_arg_count) 281 { 282 spec.allocated = 2 * spec.allocated + 1; 283 spec.named = (struct named_arg *) xrealloc (spec.named, spec.allocated * sizeof (struct named_arg)); 284 } 285 spec.named[spec.named_arg_count].name = name; 286 spec.named[spec.named_arg_count].type = type; 287 spec.named_arg_count++; 288 } 289 else if (*format != '%') 290 { 291 /* Unnamed argument. */ 292 293 /* Named and unnamed specifications are exclusive. */ 294 if (spec.named_arg_count > 0) 295 { 296 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED (); 297 FDI_SET (format, FMTDIR_ERROR); 298 goto bad_format; 299 } 300 301 if (spec.allocated == spec.unnamed_arg_count) 302 { 303 spec.allocated = 2 * spec.allocated + 1; 304 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg)); 305 } 306 spec.unnamed[spec.unnamed_arg_count].type = type; 307 spec.unnamed_arg_count++; 308 } 309 310 FDI_SET (format, FMTDIR_END); 311 312 format++; 313 } 314 315 /* Sort the named argument array, and eliminate duplicates. */ 316 if (spec.named_arg_count > 1) 317 { 318 unsigned int i, j; 319 bool err; 320 321 qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg), 322 named_arg_compare); 323 324 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ 325 err = false; 326 for (i = j = 0; i < spec.named_arg_count; i++) 327 if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0) 328 { 329 enum format_arg_type type1 = spec.named[i].type; 330 enum format_arg_type type2 = spec.named[j-1].type; 331 enum format_arg_type type_both; 332 333 if (type1 == type2 || type2 == FAT_ANY) 334 type_both = type1; 335 else if (type1 == FAT_ANY) 336 type_both = type2; 337 else 338 { 339 /* Incompatible types. */ 340 type_both = FAT_NONE; 341 if (!err) 342 *invalid_reason = 343 xasprintf (_("The string refers to the argument named '%s' in incompatible ways."), spec.named[i].name); 344 err = true; 345 } 346 347 spec.named[j-1].type = type_both; 348 free (spec.named[i].name); 349 } 350 else 351 { 352 if (j < i) 353 { 354 spec.named[j].name = spec.named[i].name; 355 spec.named[j].type = spec.named[i].type; 356 } 357 j++; 358 } 359 spec.named_arg_count = j; 360 if (err) 361 /* *invalid_reason has already been set above. */ 362 goto bad_format; 363 } 364 365 result = XMALLOC (struct spec); 366 *result = spec; 367 return result; 368 369 bad_format: 370 if (spec.named != NULL) 371 { 372 unsigned int i; 373 for (i = 0; i < spec.named_arg_count; i++) 374 free (spec.named[i].name); 375 free (spec.named); 376 } 377 if (spec.unnamed != NULL) 378 free (spec.unnamed); 379 return NULL; 380} 381 382static void 383format_free (void *descr) 384{ 385 struct spec *spec = (struct spec *) descr; 386 387 if (spec->named != NULL) 388 { 389 unsigned int i; 390 for (i = 0; i < spec->named_arg_count; i++) 391 free (spec->named[i].name); 392 free (spec->named); 393 } 394 if (spec->unnamed != NULL) 395 free (spec->unnamed); 396 free (spec); 397} 398 399static int 400format_get_number_of_directives (void *descr) 401{ 402 struct spec *spec = (struct spec *) descr; 403 404 return spec->directives; 405} 406 407static bool 408format_check (void *msgid_descr, void *msgstr_descr, bool equality, 409 formatstring_error_logger_t error_logger, 410 const char *pretty_msgstr) 411{ 412 struct spec *spec1 = (struct spec *) msgid_descr; 413 struct spec *spec2 = (struct spec *) msgstr_descr; 414 bool err = false; 415 416 if (spec1->named_arg_count > 0 && spec2->unnamed_arg_count > 0) 417 { 418 if (error_logger) 419 error_logger (_("format specifications in 'msgid' expect a mapping, those in '%s' expect a tuple"), 420 pretty_msgstr); 421 err = true; 422 } 423 else if (spec1->unnamed_arg_count > 0 && spec2->named_arg_count > 0) 424 { 425 if (error_logger) 426 error_logger (_("format specifications in 'msgid' expect a tuple, those in '%s' expect a mapping"), 427 pretty_msgstr); 428 err = true; 429 } 430 else 431 { 432 if (spec1->named_arg_count + spec2->named_arg_count > 0) 433 { 434 unsigned int i, j; 435 unsigned int n1 = spec1->named_arg_count; 436 unsigned int n2 = spec2->named_arg_count; 437 438 /* Check the argument names are the same. 439 Both arrays are sorted. We search for the first difference. */ 440 for (i = 0, j = 0; i < n1 || j < n2; ) 441 { 442 int cmp = (i >= n1 ? 1 : 443 j >= n2 ? -1 : 444 strcmp (spec1->named[i].name, spec2->named[j].name)); 445 446 if (cmp > 0) 447 { 448 if (error_logger) 449 error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in 'msgid'"), 450 spec2->named[j].name, pretty_msgstr); 451 err = true; 452 break; 453 } 454 else if (cmp < 0) 455 { 456 if (equality) 457 { 458 if (error_logger) 459 error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"), 460 spec1->named[i].name, pretty_msgstr); 461 err = true; 462 break; 463 } 464 else 465 i++; 466 } 467 else 468 j++, i++; 469 } 470 /* Check the argument types are the same. */ 471 if (!err) 472 for (i = 0, j = 0; j < n2; ) 473 { 474 if (strcmp (spec1->named[i].name, spec2->named[j].name) == 0) 475 { 476 if (spec1->named[i].type != spec2->named[j].type) 477 { 478 if (error_logger) 479 error_logger (_("format specifications in 'msgid' and '%s' for argument '%s' are not the same"), 480 pretty_msgstr, spec2->named[j].name); 481 err = true; 482 break; 483 } 484 j++, i++; 485 } 486 else 487 i++; 488 } 489 } 490 491 if (spec1->unnamed_arg_count + spec2->unnamed_arg_count > 0) 492 { 493 unsigned int i; 494 495 /* Check the argument types are the same. */ 496 if (equality 497 ? spec1->unnamed_arg_count != spec2->unnamed_arg_count 498 : spec1->unnamed_arg_count < spec2->unnamed_arg_count) 499 { 500 if (error_logger) 501 error_logger (_("number of format specifications in 'msgid' and '%s' does not match"), 502 pretty_msgstr); 503 err = true; 504 } 505 else 506 for (i = 0; i < spec2->unnamed_arg_count; i++) 507 if (spec1->unnamed[i].type != spec2->unnamed[i].type) 508 { 509 if (error_logger) 510 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"), 511 pretty_msgstr, i + 1); 512 err = true; 513 } 514 } 515 } 516 517 return err; 518} 519 520 521struct formatstring_parser formatstring_python = 522{ 523 format_parse, 524 format_free, 525 format_get_number_of_directives, 526 NULL, 527 format_check 528}; 529 530 531unsigned int 532get_python_format_unnamed_arg_count (const char *string) 533{ 534 /* Parse the format string. */ 535 char *invalid_reason = NULL; 536 struct spec *descr = 537 (struct spec *) format_parse (string, false, NULL, &invalid_reason); 538 539 if (descr != NULL) 540 { 541 unsigned int result = descr->unnamed_arg_count; 542 543 format_free (descr); 544 return result; 545 } 546 else 547 { 548 free (invalid_reason); 549 return 0; 550 } 551} 552 553 554#ifdef TEST 555 556/* Test program: Print the argument list specification returned by 557 format_parse for strings read from standard input. */ 558 559#include <stdio.h> 560 561static void 562format_print (void *descr) 563{ 564 struct spec *spec = (struct spec *) descr; 565 unsigned int i; 566 567 if (spec == NULL) 568 { 569 printf ("INVALID"); 570 return; 571 } 572 573 if (spec->named_arg_count > 0) 574 { 575 if (spec->unnamed_arg_count > 0) 576 abort (); 577 578 printf ("{"); 579 for (i = 0; i < spec->named_arg_count; i++) 580 { 581 if (i > 0) 582 printf (", "); 583 printf ("'%s':", spec->named[i].name); 584 switch (spec->named[i].type) 585 { 586 case FAT_ANY: 587 printf ("*"); 588 break; 589 case FAT_CHARACTER: 590 printf ("c"); 591 break; 592 case FAT_STRING: 593 printf ("s"); 594 break; 595 case FAT_INTEGER: 596 printf ("i"); 597 break; 598 case FAT_FLOAT: 599 printf ("f"); 600 break; 601 default: 602 abort (); 603 } 604 } 605 printf ("}"); 606 } 607 else 608 { 609 printf ("("); 610 for (i = 0; i < spec->unnamed_arg_count; i++) 611 { 612 if (i > 0) 613 printf (" "); 614 switch (spec->unnamed[i].type) 615 { 616 case FAT_ANY: 617 printf ("*"); 618 break; 619 case FAT_CHARACTER: 620 printf ("c"); 621 break; 622 case FAT_STRING: 623 printf ("s"); 624 break; 625 case FAT_INTEGER: 626 printf ("i"); 627 break; 628 case FAT_FLOAT: 629 printf ("f"); 630 break; 631 default: 632 abort (); 633 } 634 } 635 printf (")"); 636 } 637} 638 639int 640main () 641{ 642 for (;;) 643 { 644 char *line = NULL; 645 size_t line_size = 0; 646 int line_len; 647 char *invalid_reason; 648 void *descr; 649 650 line_len = getline (&line, &line_size, stdin); 651 if (line_len < 0) 652 break; 653 if (line_len > 0 && line[line_len - 1] == '\n') 654 line[--line_len] = '\0'; 655 656 invalid_reason = NULL; 657 descr = format_parse (line, false, NULL, &invalid_reason); 658 659 format_print (descr); 660 printf ("\n"); 661 if (descr == NULL) 662 printf ("%s\n", invalid_reason); 663 664 free (invalid_reason); 665 free (line); 666 } 667 668 return 0; 669} 670 671/* 672 * For Emacs M-x compile 673 * Local Variables: 674 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-python.c ../gnulib-lib/libgettextlib.la" 675 * End: 676 */ 677 678#endif /* TEST */ 679