1/* Python format strings. 2 Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc. 3 Written by Bruno Haible <haible@clisp.cons.org>, 2001. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software Foundation, 17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19#ifdef HAVE_CONFIG_H 20# include <config.h> 21#endif 22 23#include <stdbool.h> 24#include <stdlib.h> 25#include <string.h> 26 27#include "format.h" 28#include "c-ctype.h" 29#include "xalloc.h" 30#include "xvasprintf.h" 31#include "format-invalid.h" 32#include "gettext.h" 33 34#define _(str) gettext (str) 35 36/* Python format strings are described in 37 Python Library reference 38 2. Built-in Types, Exceptions and Functions 39 2.1. Built-in Types 40 2.1.5. Sequence Types 41 2.1.5.2. String Formatting Operations 42 Any string or Unicode string can act as format string via the '%' operator, 43 implemented in stringobject.c and unicodeobject.c. 44 A directive 45 - starts with '%' 46 - is optionally followed by '(ident)' where ident is any sequence of 47 characters with balanced left and right parentheses, 48 - is optionally followed by any of the characters '-' (left justification), 49 '+' (sign), ' ' (blank), '#' (alt), '0' (zero), each of which acts as a 50 flag, 51 - is optionally followed by a width specification: '*' (reads an argument) 52 or a nonempty digit sequence, 53 - is optionally followed by '.' and a precision specification: '*' (reads 54 an argument) or a nonempty digit sequence, 55 - is optionally followed by a size specifier, one of 'h' 'l' 'L'. 56 - is finished by a specifier 57 - '%', that needs no argument, 58 - 'c', that needs a character argument, 59 - 's', 'r', that need a string argument, 60 - 'i', 'd', 'u', 'o', 'x', 'X', that need an integer argument, 61 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument. 62 Use of '(ident)' and use of unnamed argument specifications are exclusive, 63 because the first requires a mapping as argument, while the second requires 64 a tuple as argument. 65 */ 66 67enum format_arg_type 68{ 69 FAT_NONE, 70 FAT_ANY, 71 FAT_CHARACTER, 72 FAT_STRING, 73 FAT_INTEGER, 74 FAT_FLOAT 75}; 76 77struct named_arg 78{ 79 char *name; 80 enum format_arg_type type; 81}; 82 83struct unnamed_arg 84{ 85 enum format_arg_type type; 86}; 87 88struct spec 89{ 90 unsigned int directives; 91 unsigned int named_arg_count; 92 unsigned int unnamed_arg_count; 93 unsigned int allocated; 94 struct named_arg *named; 95 struct unnamed_arg *unnamed; 96}; 97 98/* Locale independent test for a decimal digit. 99 Argument can be 'char' or 'unsigned char'. (Whereas the argument of 100 <ctype.h> isdigit must be an 'unsigned char'.) */ 101#undef isdigit 102#define isdigit(c) ((unsigned int) ((c) - '0') < 10) 103 104 105static int 106named_arg_compare (const void *p1, const void *p2) 107{ 108 return strcmp (((const struct named_arg *) p1)->name, 109 ((const struct named_arg *) p2)->name); 110} 111 112#define INVALID_MIXES_NAMED_UNNAMED() \ 113 xstrdup (_("The string refers to arguments both through argument names and through unnamed argument specifications.")) 114 115static void * 116format_parse (const char *format, bool translated, char **invalid_reason) 117{ 118 struct spec spec; 119 struct spec *result; 120 121 spec.directives = 0; 122 spec.named_arg_count = 0; 123 spec.unnamed_arg_count = 0; 124 spec.allocated = 0; 125 spec.named = NULL; 126 spec.unnamed = NULL; 127 128 for (; *format != '\0';) 129 if (*format++ == '%') 130 { 131 /* A directive. */ 132 char *name = NULL; 133 enum format_arg_type type; 134 135 spec.directives++; 136 137 if (*format == '(') 138 { 139 unsigned int depth; 140 const char *name_start; 141 const char *name_end; 142 size_t n; 143 144 name_start = ++format; 145 depth = 0; 146 for (; *format != '\0'; format++) 147 { 148 if (*format == '(') 149 depth++; 150 else if (*format == ')') 151 { 152 if (depth == 0) 153 break; 154 else 155 depth--; 156 } 157 } 158 if (*format == '\0') 159 { 160 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); 161 goto bad_format; 162 } 163 name_end = format++; 164 165 n = name_end - name_start; 166 name = (char *) xmalloc (n + 1); 167 memcpy (name, name_start, n); 168 name[n] = '\0'; 169 } 170 171 while (*format == '-' || *format == '+' || *format == ' ' 172 || *format == '#' || *format == '0') 173 format++; 174 175 if (*format == '*') 176 { 177 format++; 178 179 /* Named and unnamed specifications are exclusive. */ 180 if (spec.named_arg_count > 0) 181 { 182 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED (); 183 goto bad_format; 184 } 185 186 if (spec.allocated == spec.unnamed_arg_count) 187 { 188 spec.allocated = 2 * spec.allocated + 1; 189 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg)); 190 } 191 spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER; 192 spec.unnamed_arg_count++; 193 } 194 else if (isdigit (*format)) 195 { 196 do format++; while (isdigit (*format)); 197 } 198 199 if (*format == '.') 200 { 201 format++; 202 203 if (*format == '*') 204 { 205 format++; 206 207 /* Named and unnamed specifications are exclusive. */ 208 if (spec.named_arg_count > 0) 209 { 210 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED (); 211 goto bad_format; 212 } 213 214 if (spec.allocated == spec.unnamed_arg_count) 215 { 216 spec.allocated = 2 * spec.allocated + 1; 217 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg)); 218 } 219 spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER; 220 spec.unnamed_arg_count++; 221 } 222 else if (isdigit (*format)) 223 { 224 do format++; while (isdigit (*format)); 225 } 226 } 227 228 if (*format == 'h' || *format == 'l' || *format == 'L') 229 format++; 230 231 switch (*format) 232 { 233 case '%': 234 type = FAT_ANY; 235 break; 236 case 'c': 237 type = FAT_CHARACTER; 238 break; 239 case 's': case 'r': 240 type = FAT_STRING; 241 break; 242 case 'i': case 'd': case 'u': case 'o': case 'x': case 'X': 243 type = FAT_INTEGER; 244 break; 245 case 'e': case 'E': case 'f': case 'g': case 'G': 246 type = FAT_FLOAT; 247 break; 248 default: 249 *invalid_reason = 250 (*format == '\0' 251 ? INVALID_UNTERMINATED_DIRECTIVE () 252 : INVALID_CONVERSION_SPECIFIER (spec.directives, *format)); 253 goto bad_format; 254 } 255 256 if (name != NULL) 257 { 258 /* Named argument. */ 259 260 /* Named and unnamed specifications are exclusive. */ 261 if (spec.unnamed_arg_count > 0) 262 { 263 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED (); 264 goto bad_format; 265 } 266 267 if (spec.allocated == spec.named_arg_count) 268 { 269 spec.allocated = 2 * spec.allocated + 1; 270 spec.named = (struct named_arg *) xrealloc (spec.named, spec.allocated * sizeof (struct named_arg)); 271 } 272 spec.named[spec.named_arg_count].name = name; 273 spec.named[spec.named_arg_count].type = type; 274 spec.named_arg_count++; 275 } 276 else if (*format != '%') 277 { 278 /* Unnamed argument. */ 279 280 /* Named and unnamed specifications are exclusive. */ 281 if (spec.named_arg_count > 0) 282 { 283 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED (); 284 goto bad_format; 285 } 286 287 if (spec.allocated == spec.unnamed_arg_count) 288 { 289 spec.allocated = 2 * spec.allocated + 1; 290 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg)); 291 } 292 spec.unnamed[spec.unnamed_arg_count].type = type; 293 spec.unnamed_arg_count++; 294 } 295 296 format++; 297 } 298 299 /* Sort the named argument array, and eliminate duplicates. */ 300 if (spec.named_arg_count > 1) 301 { 302 unsigned int i, j; 303 bool err; 304 305 qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg), 306 named_arg_compare); 307 308 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ 309 err = false; 310 for (i = j = 0; i < spec.named_arg_count; i++) 311 if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0) 312 { 313 enum format_arg_type type1 = spec.named[i].type; 314 enum format_arg_type type2 = spec.named[j-1].type; 315 enum format_arg_type type_both; 316 317 if (type1 == type2 || type2 == FAT_ANY) 318 type_both = type1; 319 else if (type1 == FAT_ANY) 320 type_both = type2; 321 else 322 { 323 /* Incompatible types. */ 324 type_both = FAT_NONE; 325 if (!err) 326 *invalid_reason = 327 xasprintf (_("The string refers to the argument named '%s' in incompatible ways."), spec.named[i].name); 328 err = true; 329 } 330 331 spec.named[j-1].type = type_both; 332 free (spec.named[i].name); 333 } 334 else 335 { 336 if (j < i) 337 { 338 spec.named[j].name = spec.named[i].name; 339 spec.named[j].type = spec.named[i].type; 340 } 341 j++; 342 } 343 spec.named_arg_count = j; 344 if (err) 345 /* *invalid_reason has already been set above. */ 346 goto bad_format; 347 } 348 349 result = (struct spec *) xmalloc (sizeof (struct spec)); 350 *result = spec; 351 return result; 352 353 bad_format: 354 if (spec.named != NULL) 355 { 356 unsigned int i; 357 for (i = 0; i < spec.named_arg_count; i++) 358 free (spec.named[i].name); 359 free (spec.named); 360 } 361 if (spec.unnamed != NULL) 362 free (spec.unnamed); 363 return NULL; 364} 365 366static void 367format_free (void *descr) 368{ 369 struct spec *spec = (struct spec *) descr; 370 371 if (spec->named != NULL) 372 { 373 unsigned int i; 374 for (i = 0; i < spec->named_arg_count; i++) 375 free (spec->named[i].name); 376 free (spec->named); 377 } 378 if (spec->unnamed != NULL) 379 free (spec->unnamed); 380 free (spec); 381} 382 383static int 384format_get_number_of_directives (void *descr) 385{ 386 struct spec *spec = (struct spec *) descr; 387 388 return spec->directives; 389} 390 391static bool 392format_check (void *msgid_descr, void *msgstr_descr, bool equality, 393 formatstring_error_logger_t error_logger, 394 const char *pretty_msgstr) 395{ 396 struct spec *spec1 = (struct spec *) msgid_descr; 397 struct spec *spec2 = (struct spec *) msgstr_descr; 398 bool err = false; 399 400 if (spec1->named_arg_count > 0 && spec2->unnamed_arg_count > 0) 401 { 402 if (error_logger) 403 error_logger (_("format specifications in 'msgid' expect a mapping, those in '%s' expect a tuple"), 404 pretty_msgstr); 405 err = true; 406 } 407 else if (spec1->unnamed_arg_count > 0 && spec2->named_arg_count > 0) 408 { 409 if (error_logger) 410 error_logger (_("format specifications in 'msgid' expect a tuple, those in '%s' expect a mapping"), 411 pretty_msgstr); 412 err = true; 413 } 414 else 415 { 416 if (spec1->named_arg_count + spec2->named_arg_count > 0) 417 { 418 unsigned int i, j; 419 unsigned int n1 = spec1->named_arg_count; 420 unsigned int n2 = spec2->named_arg_count; 421 422 /* Check the argument names are the same. 423 Both arrays are sorted. We search for the first difference. */ 424 for (i = 0, j = 0; i < n1 || j < n2; ) 425 { 426 int cmp = (i >= n1 ? 1 : 427 j >= n2 ? -1 : 428 strcmp (spec1->named[i].name, spec2->named[j].name)); 429 430 if (cmp > 0) 431 { 432 if (error_logger) 433 error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in 'msgid'"), 434 spec2->named[j].name, pretty_msgstr); 435 err = true; 436 break; 437 } 438 else if (cmp < 0) 439 { 440 if (equality) 441 { 442 if (error_logger) 443 error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"), 444 spec1->named[i].name, pretty_msgstr); 445 err = true; 446 break; 447 } 448 else 449 i++; 450 } 451 else 452 j++, i++; 453 } 454 /* Check the argument types are the same. */ 455 if (!err) 456 for (i = 0, j = 0; j < n2; ) 457 { 458 if (strcmp (spec1->named[i].name, spec2->named[j].name) == 0) 459 { 460 if (spec1->named[i].type != spec2->named[j].type) 461 { 462 if (error_logger) 463 error_logger (_("format specifications in 'msgid' and '%s' for argument '%s' are not the same"), 464 pretty_msgstr, spec2->named[j].name); 465 err = true; 466 break; 467 } 468 j++, i++; 469 } 470 else 471 i++; 472 } 473 } 474 475 if (spec1->unnamed_arg_count + spec2->unnamed_arg_count > 0) 476 { 477 unsigned int i; 478 479 /* Check the argument types are the same. */ 480 if (equality 481 ? spec1->unnamed_arg_count != spec2->unnamed_arg_count 482 : spec1->unnamed_arg_count < spec2->unnamed_arg_count) 483 { 484 if (error_logger) 485 error_logger (_("number of format specifications in 'msgid' and '%s' does not match"), 486 pretty_msgstr); 487 err = true; 488 } 489 else 490 for (i = 0; i < spec2->unnamed_arg_count; i++) 491 if (spec1->unnamed[i].type != spec2->unnamed[i].type) 492 { 493 if (error_logger) 494 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"), 495 pretty_msgstr, i + 1); 496 err = true; 497 } 498 } 499 } 500 501 return err; 502} 503 504 505struct formatstring_parser formatstring_python = 506{ 507 format_parse, 508 format_free, 509 format_get_number_of_directives, 510 NULL, 511 format_check 512}; 513 514 515unsigned int 516get_python_format_unnamed_arg_count (const char *string) 517{ 518 /* Parse the format string. */ 519 char *invalid_reason = NULL; 520 struct spec *descr = 521 (struct spec *) format_parse (string, false, &invalid_reason); 522 523 if (descr != NULL) 524 { 525 unsigned int result = descr->unnamed_arg_count; 526 527 format_free (descr); 528 return result; 529 } 530 else 531 { 532 free (invalid_reason); 533 return 0; 534 } 535} 536 537 538#ifdef TEST 539 540/* Test program: Print the argument list specification returned by 541 format_parse for strings read from standard input. */ 542 543#include <stdio.h> 544#include "getline.h" 545 546static void 547format_print (void *descr) 548{ 549 struct spec *spec = (struct spec *) descr; 550 unsigned int i; 551 552 if (spec == NULL) 553 { 554 printf ("INVALID"); 555 return; 556 } 557 558 if (spec->named_arg_count > 0) 559 { 560 if (spec->unnamed_arg_count > 0) 561 abort (); 562 563 printf ("{"); 564 for (i = 0; i < spec->named_arg_count; i++) 565 { 566 if (i > 0) 567 printf (", "); 568 printf ("'%s':", spec->named[i].name); 569 switch (spec->named[i].type) 570 { 571 case FAT_ANY: 572 printf ("*"); 573 break; 574 case FAT_CHARACTER: 575 printf ("c"); 576 break; 577 case FAT_STRING: 578 printf ("s"); 579 break; 580 case FAT_INTEGER: 581 printf ("i"); 582 break; 583 case FAT_FLOAT: 584 printf ("f"); 585 break; 586 default: 587 abort (); 588 } 589 } 590 printf ("}"); 591 } 592 else 593 { 594 printf ("("); 595 for (i = 0; i < spec->unnamed_arg_count; i++) 596 { 597 if (i > 0) 598 printf (" "); 599 switch (spec->unnamed[i].type) 600 { 601 case FAT_ANY: 602 printf ("*"); 603 break; 604 case FAT_CHARACTER: 605 printf ("c"); 606 break; 607 case FAT_STRING: 608 printf ("s"); 609 break; 610 case FAT_INTEGER: 611 printf ("i"); 612 break; 613 case FAT_FLOAT: 614 printf ("f"); 615 break; 616 default: 617 abort (); 618 } 619 } 620 printf (")"); 621 } 622} 623 624int 625main () 626{ 627 for (;;) 628 { 629 char *line = NULL; 630 size_t line_size = 0; 631 int line_len; 632 char *invalid_reason; 633 void *descr; 634 635 line_len = getline (&line, &line_size, stdin); 636 if (line_len < 0) 637 break; 638 if (line_len > 0 && line[line_len - 1] == '\n') 639 line[--line_len] = '\0'; 640 641 invalid_reason = NULL; 642 descr = format_parse (line, false, &invalid_reason); 643 644 format_print (descr); 645 printf ("\n"); 646 if (descr == NULL) 647 printf ("%s\n", invalid_reason); 648 649 free (invalid_reason); 650 free (line); 651 } 652 653 return 0; 654} 655 656/* 657 * For Emacs M-x compile 658 * Local Variables: 659 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-python.c ../lib/libgettextlib.la" 660 * End: 661 */ 662 663#endif /* TEST */ 664