1/* awk format strings. 2 Copyright (C) 2001-2004, 2006-2007 Free Software Foundation, Inc. 3 Written by Bruno Haible <haible@clisp.cons.org>, 2002. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18#ifdef HAVE_CONFIG_H 19# include <config.h> 20#endif 21 22#include <stdbool.h> 23#include <stdlib.h> 24 25#include "format.h" 26#include "c-ctype.h" 27#include "xalloc.h" 28#include "xvasprintf.h" 29#include "format-invalid.h" 30#include "gettext.h" 31 32#define _(str) gettext (str) 33 34/* awk format strings are described in the gawk-3.1 documentation and 35 implemented in gawk-3.1.0/builtin.c: format_tree(). 36 A directive 37 - starts with '%' or '%m$' where m is a positive integer, 38 - is optionally followed by any of the characters '#', '0', '-', ' ', '+', 39 each of which acts as a flag, 40 - is optionally followed by a width specification: '*' (reads an argument) 41 or '*m$' or a nonempty digit sequence, 42 - is optionally followed by '.' and a precision specification: '*' (reads 43 an argument) or '*m$' or a nonempty digit sequence, 44 - is finished by a specifier 45 - '%', that needs no argument, 46 - 'c', that need a character argument, 47 - 's', that need a string argument, 48 - 'i', 'd', that need a signed integer argument, 49 - 'o', 'u', 'x', 'X', that need an unsigned integer argument, 50 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument. 51 Numbered ('%m$' or '*m$') and unnumbered argument specifications cannot 52 be used in the same string. 53 */ 54 55enum format_arg_type 56{ 57 FAT_NONE, 58 FAT_CHARACTER, 59 FAT_STRING, 60 FAT_INTEGER, 61 FAT_UNSIGNED_INTEGER, 62 FAT_FLOAT 63}; 64 65struct numbered_arg 66{ 67 unsigned int number; 68 enum format_arg_type type; 69}; 70 71struct spec 72{ 73 unsigned int directives; 74 unsigned int numbered_arg_count; 75 unsigned int allocated; 76 struct numbered_arg *numbered; 77}; 78 79/* Locale independent test for a decimal digit. 80 Argument can be 'char' or 'unsigned char'. (Whereas the argument of 81 <ctype.h> isdigit must be an 'unsigned char'.) */ 82#undef isdigit 83#define isdigit(c) ((unsigned int) ((c) - '0') < 10) 84 85 86static int 87numbered_arg_compare (const void *p1, const void *p2) 88{ 89 unsigned int n1 = ((const struct numbered_arg *) p1)->number; 90 unsigned int n2 = ((const struct numbered_arg *) p2)->number; 91 92 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0); 93} 94 95static void * 96format_parse (const char *format, bool translated, char *fdi, 97 char **invalid_reason) 98{ 99 const char *const format_start = format; 100 struct spec spec; 101 unsigned int unnumbered_arg_count; 102 struct spec *result; 103 104 spec.directives = 0; 105 spec.numbered_arg_count = 0; 106 spec.allocated = 0; 107 spec.numbered = NULL; 108 unnumbered_arg_count = 0; 109 110 for (; *format != '\0';) 111 if (*format++ == '%') 112 { 113 /* A directive. */ 114 unsigned int number = 0; 115 enum format_arg_type type; 116 117 FDI_SET (format - 1, FMTDIR_START); 118 spec.directives++; 119 120 if (isdigit (*format)) 121 { 122 const char *f = format; 123 unsigned int m = 0; 124 125 do 126 { 127 m = 10 * m + (*f - '0'); 128 f++; 129 } 130 while (isdigit (*f)); 131 132 if (*f == '$') 133 { 134 if (m == 0) 135 { 136 *invalid_reason = INVALID_ARGNO_0 (spec.directives); 137 FDI_SET (f, FMTDIR_ERROR); 138 goto bad_format; 139 } 140 number = m; 141 format = ++f; 142 } 143 } 144 145 /* Parse flags. */ 146 while (*format == ' ' || *format == '+' || *format == '-' 147 || *format == '#' || *format == '0') 148 format++; 149 150 /* Parse width. */ 151 if (*format == '*') 152 { 153 unsigned int width_number = 0; 154 155 format++; 156 157 if (isdigit (*format)) 158 { 159 const char *f = format; 160 unsigned int m = 0; 161 162 do 163 { 164 m = 10 * m + (*f - '0'); 165 f++; 166 } 167 while (isdigit (*f)); 168 169 if (*f == '$') 170 { 171 if (m == 0) 172 { 173 *invalid_reason = 174 INVALID_WIDTH_ARGNO_0 (spec.directives); 175 FDI_SET (f, FMTDIR_ERROR); 176 goto bad_format; 177 } 178 width_number = m; 179 format = ++f; 180 } 181 } 182 183 if (width_number) 184 { 185 /* Numbered argument. */ 186 187 /* Numbered and unnumbered specifications are exclusive. */ 188 if (unnumbered_arg_count > 0) 189 { 190 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 191 FDI_SET (format - 1, FMTDIR_ERROR); 192 goto bad_format; 193 } 194 195 if (spec.allocated == spec.numbered_arg_count) 196 { 197 spec.allocated = 2 * spec.allocated + 1; 198 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 199 } 200 spec.numbered[spec.numbered_arg_count].number = width_number; 201 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER; 202 spec.numbered_arg_count++; 203 } 204 else 205 { 206 /* Unnumbered argument. */ 207 208 /* Numbered and unnumbered specifications are exclusive. */ 209 if (spec.numbered_arg_count > 0) 210 { 211 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 212 FDI_SET (format - 1, FMTDIR_ERROR); 213 goto bad_format; 214 } 215 216 if (spec.allocated == unnumbered_arg_count) 217 { 218 spec.allocated = 2 * spec.allocated + 1; 219 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 220 } 221 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; 222 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER; 223 unnumbered_arg_count++; 224 } 225 } 226 else if (isdigit (*format)) 227 { 228 do format++; while (isdigit (*format)); 229 } 230 231 /* Parse precision. */ 232 if (*format == '.') 233 { 234 format++; 235 236 if (*format == '*') 237 { 238 unsigned int precision_number = 0; 239 240 format++; 241 242 if (isdigit (*format)) 243 { 244 const char *f = format; 245 unsigned int m = 0; 246 247 do 248 { 249 m = 10 * m + (*f - '0'); 250 f++; 251 } 252 while (isdigit (*f)); 253 254 if (*f == '$') 255 { 256 if (m == 0) 257 { 258 *invalid_reason = 259 INVALID_PRECISION_ARGNO_0 (spec.directives); 260 FDI_SET (f, FMTDIR_ERROR); 261 goto bad_format; 262 } 263 precision_number = m; 264 format = ++f; 265 } 266 } 267 268 if (precision_number) 269 { 270 /* Numbered argument. */ 271 272 /* Numbered and unnumbered specifications are exclusive. */ 273 if (unnumbered_arg_count > 0) 274 { 275 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 276 FDI_SET (format - 1, FMTDIR_ERROR); 277 goto bad_format; 278 } 279 280 if (spec.allocated == spec.numbered_arg_count) 281 { 282 spec.allocated = 2 * spec.allocated + 1; 283 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 284 } 285 spec.numbered[spec.numbered_arg_count].number = precision_number; 286 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER; 287 spec.numbered_arg_count++; 288 } 289 else 290 { 291 /* Unnumbered argument. */ 292 293 /* Numbered and unnumbered specifications are exclusive. */ 294 if (spec.numbered_arg_count > 0) 295 { 296 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 297 FDI_SET (format - 1, FMTDIR_ERROR); 298 goto bad_format; 299 } 300 301 if (spec.allocated == unnumbered_arg_count) 302 { 303 spec.allocated = 2 * spec.allocated + 1; 304 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 305 } 306 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; 307 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER; 308 unnumbered_arg_count++; 309 } 310 } 311 else if (isdigit (*format)) 312 { 313 do format++; while (isdigit (*format)); 314 } 315 } 316 317 switch (*format) 318 { 319 case '%': 320 type = FAT_NONE; 321 break; 322 case 'c': 323 type = FAT_CHARACTER; 324 break; 325 case 's': 326 type = FAT_STRING; 327 break; 328 case 'i': case 'd': 329 type = FAT_INTEGER; 330 break; 331 case 'u': case 'o': case 'x': case 'X': 332 type = FAT_UNSIGNED_INTEGER; 333 break; 334 case 'e': case 'E': case 'f': case 'g': case 'G': 335 type = FAT_FLOAT; 336 break; 337 default: 338 if (*format == '\0') 339 { 340 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); 341 FDI_SET (format - 1, FMTDIR_ERROR); 342 } 343 else 344 { 345 *invalid_reason = 346 INVALID_CONVERSION_SPECIFIER (spec.directives, *format); 347 FDI_SET (format, FMTDIR_ERROR); 348 } 349 goto bad_format; 350 } 351 352 if (type != FAT_NONE) 353 { 354 if (number) 355 { 356 /* Numbered argument. */ 357 358 /* Numbered and unnumbered specifications are exclusive. */ 359 if (unnumbered_arg_count > 0) 360 { 361 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 362 FDI_SET (format, FMTDIR_ERROR); 363 goto bad_format; 364 } 365 366 if (spec.allocated == spec.numbered_arg_count) 367 { 368 spec.allocated = 2 * spec.allocated + 1; 369 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 370 } 371 spec.numbered[spec.numbered_arg_count].number = number; 372 spec.numbered[spec.numbered_arg_count].type = type; 373 spec.numbered_arg_count++; 374 } 375 else 376 { 377 /* Unnumbered argument. */ 378 379 /* Numbered and unnumbered specifications are exclusive. */ 380 if (spec.numbered_arg_count > 0) 381 { 382 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 383 FDI_SET (format, FMTDIR_ERROR); 384 goto bad_format; 385 } 386 387 if (spec.allocated == unnumbered_arg_count) 388 { 389 spec.allocated = 2 * spec.allocated + 1; 390 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 391 } 392 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; 393 spec.numbered[unnumbered_arg_count].type = type; 394 unnumbered_arg_count++; 395 } 396 } 397 398 FDI_SET (format, FMTDIR_END); 399 400 format++; 401 } 402 403 /* Convert the unnumbered argument array to numbered arguments. */ 404 if (unnumbered_arg_count > 0) 405 spec.numbered_arg_count = unnumbered_arg_count; 406 /* Sort the numbered argument array, and eliminate duplicates. */ 407 else if (spec.numbered_arg_count > 1) 408 { 409 unsigned int i, j; 410 bool err; 411 412 qsort (spec.numbered, spec.numbered_arg_count, 413 sizeof (struct numbered_arg), numbered_arg_compare); 414 415 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ 416 err = false; 417 for (i = j = 0; i < spec.numbered_arg_count; i++) 418 if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number) 419 { 420 enum format_arg_type type1 = spec.numbered[i].type; 421 enum format_arg_type type2 = spec.numbered[j-1].type; 422 enum format_arg_type type_both; 423 424 if (type1 == type2) 425 type_both = type1; 426 else 427 { 428 /* Incompatible types. */ 429 type_both = FAT_NONE; 430 if (!err) 431 *invalid_reason = 432 INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number); 433 err = true; 434 } 435 436 spec.numbered[j-1].type = type_both; 437 } 438 else 439 { 440 if (j < i) 441 { 442 spec.numbered[j].number = spec.numbered[i].number; 443 spec.numbered[j].type = spec.numbered[i].type; 444 } 445 j++; 446 } 447 spec.numbered_arg_count = j; 448 if (err) 449 /* *invalid_reason has already been set above. */ 450 goto bad_format; 451 } 452 453 result = XMALLOC (struct spec); 454 *result = spec; 455 return result; 456 457 bad_format: 458 if (spec.numbered != NULL) 459 free (spec.numbered); 460 return NULL; 461} 462 463static void 464format_free (void *descr) 465{ 466 struct spec *spec = (struct spec *) descr; 467 468 if (spec->numbered != NULL) 469 free (spec->numbered); 470 free (spec); 471} 472 473static int 474format_get_number_of_directives (void *descr) 475{ 476 struct spec *spec = (struct spec *) descr; 477 478 return spec->directives; 479} 480 481static bool 482format_check (void *msgid_descr, void *msgstr_descr, bool equality, 483 formatstring_error_logger_t error_logger, 484 const char *pretty_msgstr) 485{ 486 struct spec *spec1 = (struct spec *) msgid_descr; 487 struct spec *spec2 = (struct spec *) msgstr_descr; 488 bool err = false; 489 490 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) 491 { 492 unsigned int i, j; 493 unsigned int n1 = spec1->numbered_arg_count; 494 unsigned int n2 = spec2->numbered_arg_count; 495 496 /* Check the argument names are the same. 497 Both arrays are sorted. We search for the first difference. */ 498 for (i = 0, j = 0; i < n1 || j < n2; ) 499 { 500 int cmp = (i >= n1 ? 1 : 501 j >= n2 ? -1 : 502 spec1->numbered[i].number > spec2->numbered[j].number ? 1 : 503 spec1->numbered[i].number < spec2->numbered[j].number ? -1 : 504 0); 505 506 if (cmp > 0) 507 { 508 if (error_logger) 509 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"), 510 spec2->numbered[j].number, pretty_msgstr); 511 err = true; 512 break; 513 } 514 else if (cmp < 0) 515 { 516 if (equality) 517 { 518 if (error_logger) 519 error_logger (_("a format specification for argument %u doesn't exist in '%s'"), 520 spec1->numbered[i].number, pretty_msgstr); 521 err = true; 522 break; 523 } 524 else 525 i++; 526 } 527 else 528 j++, i++; 529 } 530 /* Check the argument types are the same. */ 531 if (!err) 532 for (i = 0, j = 0; j < n2; ) 533 { 534 if (spec1->numbered[i].number == spec2->numbered[j].number) 535 { 536 if (spec1->numbered[i].type != spec2->numbered[j].type) 537 { 538 if (error_logger) 539 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"), 540 pretty_msgstr, spec2->numbered[j].number); 541 err = true; 542 break; 543 } 544 j++, i++; 545 } 546 else 547 i++; 548 } 549 } 550 551 return err; 552} 553 554 555struct formatstring_parser formatstring_awk = 556{ 557 format_parse, 558 format_free, 559 format_get_number_of_directives, 560 NULL, 561 format_check 562}; 563 564 565#ifdef TEST 566 567/* Test program: Print the argument list specification returned by 568 format_parse for strings read from standard input. */ 569 570#include <stdio.h> 571 572static void 573format_print (void *descr) 574{ 575 struct spec *spec = (struct spec *) descr; 576 unsigned int last; 577 unsigned int i; 578 579 if (spec == NULL) 580 { 581 printf ("INVALID"); 582 return; 583 } 584 585 printf ("("); 586 last = 1; 587 for (i = 0; i < spec->numbered_arg_count; i++) 588 { 589 unsigned int number = spec->numbered[i].number; 590 591 if (i > 0) 592 printf (" "); 593 if (number < last) 594 abort (); 595 for (; last < number; last++) 596 printf ("_ "); 597 switch (spec->numbered[i].type) 598 { 599 case FAT_CHARACTER: 600 printf ("c"); 601 break; 602 case FAT_STRING: 603 printf ("s"); 604 break; 605 case FAT_INTEGER: 606 printf ("i"); 607 break; 608 case FAT_UNSIGNED_INTEGER: 609 printf ("[unsigned]i"); 610 break; 611 case FAT_FLOAT: 612 printf ("f"); 613 break; 614 default: 615 abort (); 616 } 617 last = number + 1; 618 } 619 printf (")"); 620} 621 622int 623main () 624{ 625 for (;;) 626 { 627 char *line = NULL; 628 size_t line_size = 0; 629 int line_len; 630 char *invalid_reason; 631 void *descr; 632 633 line_len = getline (&line, &line_size, stdin); 634 if (line_len < 0) 635 break; 636 if (line_len > 0 && line[line_len - 1] == '\n') 637 line[--line_len] = '\0'; 638 639 invalid_reason = NULL; 640 descr = format_parse (line, false, NULL, &invalid_reason); 641 642 format_print (descr); 643 printf ("\n"); 644 if (descr == NULL) 645 printf ("%s\n", invalid_reason); 646 647 free (invalid_reason); 648 free (line); 649 } 650 651 return 0; 652} 653 654/* 655 * For Emacs M-x compile 656 * Local Variables: 657 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-awk.c ../gnulib-lib/libgettextlib.la" 658 * End: 659 */ 660 661#endif /* TEST */ 662