1/* awk format strings. 2 Copyright (C) 2001-2004 Free Software Foundation, Inc. 3 Written by Bruno Haible <haible@clisp.cons.org>, 2002. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software Foundation, 17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 18 19#ifdef HAVE_CONFIG_H 20# include <config.h> 21#endif 22 23#include <stdbool.h> 24#include <stdlib.h> 25 26#include "format.h" 27#include "c-ctype.h" 28#include "xalloc.h" 29#include "xerror.h" 30#include "format-invalid.h" 31#include "gettext.h" 32 33#define _(str) gettext (str) 34 35/* awk format strings are described in the gawk-3.1 documentation and 36 implemented in gawk-3.1.0/builtin.c: format_tree(). 37 A directive 38 - starts with '%' or '%m$' where m is a positive integer, 39 - is optionally followed by any of the characters '#', '0', '-', ' ', '+', 40 each of which acts as a flag, 41 - is optionally followed by a width specification: '*' (reads an argument) 42 or '*m$' or a nonempty digit sequence, 43 - is optionally followed by '.' and a precision specification: '*' (reads 44 an argument) or '*m$' or a nonempty digit sequence, 45 - is finished by a specifier 46 - '%', that needs no argument, 47 - 'c', that need a character argument, 48 - 's', that need a string argument, 49 - 'i', 'd', that need a signed integer argument, 50 - 'o', 'u', 'x', 'X', that need an unsigned integer argument, 51 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument. 52 Numbered ('%m$' or '*m$') and unnumbered argument specifications cannot 53 be used in the same string. 54 */ 55 56enum format_arg_type 57{ 58 FAT_NONE, 59 FAT_CHARACTER, 60 FAT_STRING, 61 FAT_INTEGER, 62 FAT_UNSIGNED_INTEGER, 63 FAT_FLOAT 64}; 65 66struct numbered_arg 67{ 68 unsigned int number; 69 enum format_arg_type type; 70}; 71 72struct spec 73{ 74 unsigned int directives; 75 unsigned int numbered_arg_count; 76 unsigned int allocated; 77 struct numbered_arg *numbered; 78}; 79 80/* Locale independent test for a decimal digit. 81 Argument can be 'char' or 'unsigned char'. (Whereas the argument of 82 <ctype.h> isdigit must be an 'unsigned char'.) */ 83#undef isdigit 84#define isdigit(c) ((unsigned int) ((c) - '0') < 10) 85 86 87static int 88numbered_arg_compare (const void *p1, const void *p2) 89{ 90 unsigned int n1 = ((const struct numbered_arg *) p1)->number; 91 unsigned int n2 = ((const struct numbered_arg *) p2)->number; 92 93 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0); 94} 95 96static void * 97format_parse (const char *format, bool translated, char **invalid_reason) 98{ 99 struct spec spec; 100 unsigned int unnumbered_arg_count; 101 struct spec *result; 102 103 spec.directives = 0; 104 spec.numbered_arg_count = 0; 105 spec.allocated = 0; 106 spec.numbered = NULL; 107 unnumbered_arg_count = 0; 108 109 for (; *format != '\0';) 110 if (*format++ == '%') 111 { 112 /* A directive. */ 113 unsigned int number = 0; 114 enum format_arg_type type; 115 116 spec.directives++; 117 118 if (isdigit (*format)) 119 { 120 const char *f = format; 121 unsigned int m = 0; 122 123 do 124 { 125 m = 10 * m + (*f - '0'); 126 f++; 127 } 128 while (isdigit (*f)); 129 130 if (*f == '$') 131 { 132 if (m == 0) 133 { 134 *invalid_reason = INVALID_ARGNO_0 (spec.directives); 135 goto bad_format; 136 } 137 number = m; 138 format = ++f; 139 } 140 } 141 142 /* Parse flags. */ 143 while (*format == ' ' || *format == '+' || *format == '-' 144 || *format == '#' || *format == '0') 145 format++; 146 147 /* Parse width. */ 148 if (*format == '*') 149 { 150 unsigned int width_number = 0; 151 152 format++; 153 154 if (isdigit (*format)) 155 { 156 const char *f = format; 157 unsigned int m = 0; 158 159 do 160 { 161 m = 10 * m + (*f - '0'); 162 f++; 163 } 164 while (isdigit (*f)); 165 166 if (*f == '$') 167 { 168 if (m == 0) 169 { 170 *invalid_reason = 171 INVALID_WIDTH_ARGNO_0 (spec.directives); 172 goto bad_format; 173 } 174 width_number = m; 175 format = ++f; 176 } 177 } 178 179 if (width_number) 180 { 181 /* Numbered argument. */ 182 183 /* Numbered and unnumbered specifications are exclusive. */ 184 if (unnumbered_arg_count > 0) 185 { 186 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 187 goto bad_format; 188 } 189 190 if (spec.allocated == spec.numbered_arg_count) 191 { 192 spec.allocated = 2 * spec.allocated + 1; 193 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 194 } 195 spec.numbered[spec.numbered_arg_count].number = width_number; 196 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER; 197 spec.numbered_arg_count++; 198 } 199 else 200 { 201 /* Unnumbered argument. */ 202 203 /* Numbered and unnumbered specifications are exclusive. */ 204 if (spec.numbered_arg_count > 0) 205 { 206 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 207 goto bad_format; 208 } 209 210 if (spec.allocated == unnumbered_arg_count) 211 { 212 spec.allocated = 2 * spec.allocated + 1; 213 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 214 } 215 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; 216 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER; 217 unnumbered_arg_count++; 218 } 219 } 220 else if (isdigit (*format)) 221 { 222 do format++; while (isdigit (*format)); 223 } 224 225 /* Parse precision. */ 226 if (*format == '.') 227 { 228 format++; 229 230 if (*format == '*') 231 { 232 unsigned int precision_number = 0; 233 234 format++; 235 236 if (isdigit (*format)) 237 { 238 const char *f = format; 239 unsigned int m = 0; 240 241 do 242 { 243 m = 10 * m + (*f - '0'); 244 f++; 245 } 246 while (isdigit (*f)); 247 248 if (*f == '$') 249 { 250 if (m == 0) 251 { 252 *invalid_reason = 253 INVALID_PRECISION_ARGNO_0 (spec.directives); 254 goto bad_format; 255 } 256 precision_number = m; 257 format = ++f; 258 } 259 } 260 261 if (precision_number) 262 { 263 /* Numbered argument. */ 264 265 /* Numbered and unnumbered specifications are exclusive. */ 266 if (unnumbered_arg_count > 0) 267 { 268 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 269 goto bad_format; 270 } 271 272 if (spec.allocated == spec.numbered_arg_count) 273 { 274 spec.allocated = 2 * spec.allocated + 1; 275 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 276 } 277 spec.numbered[spec.numbered_arg_count].number = precision_number; 278 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER; 279 spec.numbered_arg_count++; 280 } 281 else 282 { 283 /* Unnumbered argument. */ 284 285 /* Numbered and unnumbered specifications are exclusive. */ 286 if (spec.numbered_arg_count > 0) 287 { 288 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 289 goto bad_format; 290 } 291 292 if (spec.allocated == unnumbered_arg_count) 293 { 294 spec.allocated = 2 * spec.allocated + 1; 295 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 296 } 297 spec.numbered[unnumbered_arg_count].type = unnumbered_arg_count + 1; 298 spec.numbered[unnumbered_arg_count].type = FAT_INTEGER; 299 unnumbered_arg_count++; 300 } 301 } 302 else if (isdigit (*format)) 303 { 304 do format++; while (isdigit (*format)); 305 } 306 } 307 308 switch (*format) 309 { 310 case '%': 311 type = FAT_NONE; 312 break; 313 case 'c': 314 type = FAT_CHARACTER; 315 break; 316 case 's': 317 type = FAT_STRING; 318 break; 319 case 'i': case 'd': 320 type = FAT_INTEGER; 321 break; 322 case 'u': case 'o': case 'x': case 'X': 323 type = FAT_UNSIGNED_INTEGER; 324 break; 325 case 'e': case 'E': case 'f': case 'g': case 'G': 326 type = FAT_FLOAT; 327 break; 328 default: 329 *invalid_reason = 330 (*format == '\0' 331 ? INVALID_UNTERMINATED_DIRECTIVE () 332 : INVALID_CONVERSION_SPECIFIER (spec.directives, *format)); 333 goto bad_format; 334 } 335 336 if (type != FAT_NONE) 337 { 338 if (number) 339 { 340 /* Numbered argument. */ 341 342 /* Numbered and unnumbered specifications are exclusive. */ 343 if (unnumbered_arg_count > 0) 344 { 345 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 346 goto bad_format; 347 } 348 349 if (spec.allocated == spec.numbered_arg_count) 350 { 351 spec.allocated = 2 * spec.allocated + 1; 352 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 353 } 354 spec.numbered[spec.numbered_arg_count].number = number; 355 spec.numbered[spec.numbered_arg_count].type = type; 356 spec.numbered_arg_count++; 357 } 358 else 359 { 360 /* Unnumbered argument. */ 361 362 /* Numbered and unnumbered specifications are exclusive. */ 363 if (spec.numbered_arg_count > 0) 364 { 365 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 366 goto bad_format; 367 } 368 369 if (spec.allocated == unnumbered_arg_count) 370 { 371 spec.allocated = 2 * spec.allocated + 1; 372 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 373 } 374 spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; 375 spec.numbered[unnumbered_arg_count].type = type; 376 unnumbered_arg_count++; 377 } 378 } 379 380 format++; 381 } 382 383 /* Convert the unnumbered argument array to numbered arguments. */ 384 if (unnumbered_arg_count > 0) 385 spec.numbered_arg_count = unnumbered_arg_count; 386 /* Sort the numbered argument array, and eliminate duplicates. */ 387 else if (spec.numbered_arg_count > 1) 388 { 389 unsigned int i, j; 390 bool err; 391 392 qsort (spec.numbered, spec.numbered_arg_count, 393 sizeof (struct numbered_arg), numbered_arg_compare); 394 395 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ 396 err = false; 397 for (i = j = 0; i < spec.numbered_arg_count; i++) 398 if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number) 399 { 400 enum format_arg_type type1 = spec.numbered[i].type; 401 enum format_arg_type type2 = spec.numbered[j-1].type; 402 enum format_arg_type type_both; 403 404 if (type1 == type2) 405 type_both = type1; 406 else 407 { 408 /* Incompatible types. */ 409 type_both = FAT_NONE; 410 if (!err) 411 *invalid_reason = 412 INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number); 413 err = true; 414 } 415 416 spec.numbered[j-1].type = type_both; 417 } 418 else 419 { 420 if (j < i) 421 { 422 spec.numbered[j].number = spec.numbered[i].number; 423 spec.numbered[j].type = spec.numbered[i].type; 424 } 425 j++; 426 } 427 spec.numbered_arg_count = j; 428 if (err) 429 /* *invalid_reason has already been set above. */ 430 goto bad_format; 431 } 432 433 result = (struct spec *) xmalloc (sizeof (struct spec)); 434 *result = spec; 435 return result; 436 437 bad_format: 438 if (spec.numbered != NULL) 439 free (spec.numbered); 440 return NULL; 441} 442 443static void 444format_free (void *descr) 445{ 446 struct spec *spec = (struct spec *) descr; 447 448 if (spec->numbered != NULL) 449 free (spec->numbered); 450 free (spec); 451} 452 453static int 454format_get_number_of_directives (void *descr) 455{ 456 struct spec *spec = (struct spec *) descr; 457 458 return spec->directives; 459} 460 461static bool 462format_check (void *msgid_descr, void *msgstr_descr, bool equality, 463 formatstring_error_logger_t error_logger, 464 const char *pretty_msgstr) 465{ 466 struct spec *spec1 = (struct spec *) msgid_descr; 467 struct spec *spec2 = (struct spec *) msgstr_descr; 468 bool err = false; 469 470 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) 471 { 472 unsigned int i, j; 473 unsigned int n1 = spec1->numbered_arg_count; 474 unsigned int n2 = spec2->numbered_arg_count; 475 476 /* Check the argument names are the same. 477 Both arrays are sorted. We search for the first difference. */ 478 for (i = 0, j = 0; i < n1 || j < n2; ) 479 { 480 int cmp = (i >= n1 ? 1 : 481 j >= n2 ? -1 : 482 spec1->numbered[i].number > spec2->numbered[j].number ? 1 : 483 spec1->numbered[i].number < spec2->numbered[j].number ? -1 : 484 0); 485 486 if (cmp > 0) 487 { 488 if (error_logger) 489 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"), 490 spec2->numbered[j].number, pretty_msgstr); 491 err = true; 492 break; 493 } 494 else if (cmp < 0) 495 { 496 if (equality) 497 { 498 if (error_logger) 499 error_logger (_("a format specification for argument %u doesn't exist in '%s'"), 500 spec1->numbered[i].number, pretty_msgstr); 501 err = true; 502 break; 503 } 504 else 505 i++; 506 } 507 else 508 j++, i++; 509 } 510 /* Check the argument types are the same. */ 511 if (!err) 512 for (i = 0, j = 0; j < n2; ) 513 { 514 if (spec1->numbered[i].number == spec2->numbered[j].number) 515 { 516 if (spec1->numbered[i].type != spec2->numbered[j].type) 517 { 518 if (error_logger) 519 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"), 520 pretty_msgstr, spec2->numbered[j].number); 521 err = true; 522 break; 523 } 524 j++, i++; 525 } 526 else 527 i++; 528 } 529 } 530 531 return err; 532} 533 534 535struct formatstring_parser formatstring_awk = 536{ 537 format_parse, 538 format_free, 539 format_get_number_of_directives, 540 format_check 541}; 542 543 544#ifdef TEST 545 546/* Test program: Print the argument list specification returned by 547 format_parse for strings read from standard input. */ 548 549#include <stdio.h> 550#include "getline.h" 551 552static void 553format_print (void *descr) 554{ 555 struct spec *spec = (struct spec *) descr; 556 unsigned int last; 557 unsigned int i; 558 559 if (spec == NULL) 560 { 561 printf ("INVALID"); 562 return; 563 } 564 565 printf ("("); 566 last = 1; 567 for (i = 0; i < spec->numbered_arg_count; i++) 568 { 569 unsigned int number = spec->numbered[i].number; 570 571 if (i > 0) 572 printf (" "); 573 if (number < last) 574 abort (); 575 for (; last < number; last++) 576 printf ("_ "); 577 switch (spec->numbered[i].type) 578 { 579 case FAT_CHARACTER: 580 printf ("c"); 581 break; 582 case FAT_STRING: 583 printf ("s"); 584 break; 585 case FAT_INTEGER: 586 printf ("i"); 587 break; 588 case FAT_UNSIGNED_INTEGER: 589 printf ("[unsigned]i"); 590 break; 591 case FAT_FLOAT: 592 printf ("f"); 593 break; 594 default: 595 abort (); 596 } 597 last = number + 1; 598 } 599 printf (")"); 600} 601 602int 603main () 604{ 605 for (;;) 606 { 607 char *line = NULL; 608 size_t line_size = 0; 609 int line_len; 610 char *invalid_reason; 611 void *descr; 612 613 line_len = getline (&line, &line_size, stdin); 614 if (line_len < 0) 615 break; 616 if (line_len > 0 && line[line_len - 1] == '\n') 617 line[--line_len] = '\0'; 618 619 invalid_reason = NULL; 620 descr = format_parse (line, false, &invalid_reason); 621 622 format_print (descr); 623 printf ("\n"); 624 if (descr == NULL) 625 printf ("%s\n", invalid_reason); 626 627 free (invalid_reason); 628 free (line); 629 } 630 631 return 0; 632} 633 634/* 635 * For Emacs M-x compile 636 * Local Variables: 637 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-awk.c ../lib/libgettextlib.la" 638 * End: 639 */ 640 641#endif /* TEST */ 642