1/* Tcl format strings. 2 Copyright (C) 2001-2004, 2006-2007 Free Software Foundation, Inc. 3 Written by Bruno Haible <haible@clisp.cons.org>, 2002. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18#ifdef HAVE_CONFIG_H 19# include <config.h> 20#endif 21 22#include <stdbool.h> 23#include <stdlib.h> 24 25#include "format.h" 26#include "c-ctype.h" 27#include "xalloc.h" 28#include "xvasprintf.h" 29#include "format-invalid.h" 30#include "gettext.h" 31 32#define _(str) gettext (str) 33 34/* Tcl format strings are described in the tcl8.3.3/doc/format.n manual 35 page and implemented in the function Tcl_FormatObjCmd in 36 tcl8.3.3/generic/tclCmdAH.c. 37 A directive 38 - starts with '%' or '%m$' where m is a positive integer, 39 - is optionally followed by any of the characters '#', '0', '-', ' ', '+', 40 each of which acts as a flag, 41 - is optionally followed by a width specification: '*' (reads an argument) 42 or a nonempty digit sequence, 43 - is optionally followed by '.' and a precision specification: '*' (reads 44 an argument) or a nonempty digit sequence, 45 - is optionally followed by a size specifier, 'h' or 'l'. 'l' is ignored. 46 - is finished by a specifier 47 - '%', that needs no argument, 48 - 'c', that needs a character argument, 49 - 's', that needs a string argument, 50 - 'i', 'd', that need a signed integer argument, 51 - 'o', 'u', 'x', 'X', that need an unsigned integer argument, 52 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument. 53 Numbered ('%m$') and unnumbered argument specifications cannot be used 54 in the same string. 55 */ 56 57enum format_arg_type 58{ 59 FAT_NONE, 60 FAT_CHARACTER, 61 FAT_STRING, 62 FAT_INTEGER, 63 FAT_UNSIGNED_INTEGER, 64 FAT_SHORT_INTEGER, 65 FAT_SHORT_UNSIGNED_INTEGER, 66 FAT_FLOAT 67}; 68 69struct numbered_arg 70{ 71 unsigned int number; 72 enum format_arg_type type; 73}; 74 75struct spec 76{ 77 unsigned int directives; 78 unsigned int numbered_arg_count; 79 unsigned int allocated; 80 struct numbered_arg *numbered; 81}; 82 83/* Locale independent test for a decimal digit. 84 Argument can be 'char' or 'unsigned char'. (Whereas the argument of 85 <ctype.h> isdigit must be an 'unsigned char'.) */ 86#undef isdigit 87#define isdigit(c) ((unsigned int) ((c) - '0') < 10) 88 89 90static int 91numbered_arg_compare (const void *p1, const void *p2) 92{ 93 unsigned int n1 = ((const struct numbered_arg *) p1)->number; 94 unsigned int n2 = ((const struct numbered_arg *) p2)->number; 95 96 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0); 97} 98 99static void * 100format_parse (const char *format, bool translated, char *fdi, 101 char **invalid_reason) 102{ 103 const char *const format_start = format; 104 struct spec spec; 105 struct spec *result; 106 bool seen_numbered_arg; 107 bool seen_unnumbered_arg; 108 unsigned int number; 109 110 spec.directives = 0; 111 spec.numbered_arg_count = 0; 112 spec.allocated = 0; 113 spec.numbered = NULL; 114 seen_numbered_arg = false; 115 seen_unnumbered_arg = false; 116 number = 1; 117 118 for (; *format != '\0';) 119 if (*format++ == '%') 120 { 121 /* A directive. */ 122 FDI_SET (format - 1, FMTDIR_START); 123 spec.directives++; 124 125 if (*format != '%') 126 { 127 bool is_numbered_arg; 128 bool short_flag; 129 enum format_arg_type type; 130 131 is_numbered_arg = false; 132 if (isdigit (*format)) 133 { 134 const char *f = format; 135 unsigned int m = 0; 136 137 do 138 { 139 m = 10 * m + (*f - '0'); 140 f++; 141 } 142 while (isdigit (*f)); 143 144 if (*f == '$') 145 { 146 if (m == 0) 147 { 148 *invalid_reason = INVALID_ARGNO_0 (spec.directives); 149 FDI_SET (f, FMTDIR_ERROR); 150 goto bad_format; 151 } 152 number = m; 153 format = ++f; 154 155 /* Numbered and unnumbered specifications are exclusive. */ 156 if (seen_unnumbered_arg) 157 { 158 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 159 FDI_SET (format - 1, FMTDIR_ERROR); 160 goto bad_format; 161 } 162 is_numbered_arg = true; 163 seen_numbered_arg = true; 164 } 165 } 166 167 /* Numbered and unnumbered specifications are exclusive. */ 168 if (!is_numbered_arg) 169 { 170 if (seen_numbered_arg) 171 { 172 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); 173 FDI_SET (format - 1, FMTDIR_ERROR); 174 goto bad_format; 175 } 176 seen_unnumbered_arg = true; 177 } 178 179 /* Parse flags. */ 180 while (*format == ' ' || *format == '+' || *format == '-' 181 || *format == '#' || *format == '0') 182 format++; 183 184 /* Parse width. */ 185 if (*format == '*') 186 { 187 format++; 188 189 if (spec.allocated == spec.numbered_arg_count) 190 { 191 spec.allocated = 2 * spec.allocated + 1; 192 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 193 } 194 spec.numbered[spec.numbered_arg_count].number = number; 195 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER; 196 spec.numbered_arg_count++; 197 198 number++; 199 } 200 else if (isdigit (*format)) 201 { 202 do format++; while (isdigit (*format)); 203 } 204 205 /* Parse precision. */ 206 if (*format == '.') 207 { 208 format++; 209 210 if (*format == '*') 211 { 212 format++; 213 214 if (spec.allocated == spec.numbered_arg_count) 215 { 216 spec.allocated = 2 * spec.allocated + 1; 217 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 218 } 219 spec.numbered[spec.numbered_arg_count].number = number; 220 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER; 221 spec.numbered_arg_count++; 222 223 number++; 224 } 225 else if (isdigit (*format)) 226 { 227 do format++; while (isdigit (*format)); 228 } 229 } 230 231 /* Parse optional size specification. */ 232 short_flag = false; 233 if (*format == 'h') 234 short_flag = true, format++; 235 else if (*format == 'l') 236 format++; 237 238 switch (*format) 239 { 240 case 'c': 241 type = FAT_CHARACTER; 242 break; 243 case 's': 244 type = FAT_STRING; 245 break; 246 case 'i': case 'd': 247 type = (short_flag ? FAT_SHORT_INTEGER : FAT_INTEGER); 248 break; 249 case 'u': case 'o': case 'x': case 'X': 250 type = (short_flag ? FAT_SHORT_UNSIGNED_INTEGER : FAT_UNSIGNED_INTEGER); 251 break; 252 case 'e': case 'E': case 'f': case 'g': case 'G': 253 type = FAT_FLOAT; 254 break; 255 default: 256 if (*format == '\0') 257 { 258 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); 259 FDI_SET (format - 1, FMTDIR_ERROR); 260 } 261 else 262 { 263 *invalid_reason = 264 INVALID_CONVERSION_SPECIFIER (spec.directives, *format); 265 FDI_SET (format, FMTDIR_ERROR); 266 } 267 goto bad_format; 268 } 269 270 if (spec.allocated == spec.numbered_arg_count) 271 { 272 spec.allocated = 2 * spec.allocated + 1; 273 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); 274 } 275 spec.numbered[spec.numbered_arg_count].number = number; 276 spec.numbered[spec.numbered_arg_count].type = type; 277 spec.numbered_arg_count++; 278 279 number++; 280 } 281 282 FDI_SET (format, FMTDIR_END); 283 284 format++; 285 } 286 287 /* Sort the numbered argument array, and eliminate duplicates. */ 288 if (spec.numbered_arg_count > 1) 289 { 290 unsigned int i, j; 291 bool err; 292 293 qsort (spec.numbered, spec.numbered_arg_count, 294 sizeof (struct numbered_arg), numbered_arg_compare); 295 296 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ 297 err = false; 298 for (i = j = 0; i < spec.numbered_arg_count; i++) 299 if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number) 300 { 301 enum format_arg_type type1 = spec.numbered[i].type; 302 enum format_arg_type type2 = spec.numbered[j-1].type; 303 enum format_arg_type type_both; 304 305 if (type1 == type2) 306 type_both = type1; 307 else 308 { 309 /* Incompatible types. */ 310 type_both = FAT_NONE; 311 if (!err) 312 *invalid_reason = 313 INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number); 314 err = true; 315 } 316 317 spec.numbered[j-1].type = type_both; 318 } 319 else 320 { 321 if (j < i) 322 { 323 spec.numbered[j].number = spec.numbered[i].number; 324 spec.numbered[j].type = spec.numbered[i].type; 325 } 326 j++; 327 } 328 spec.numbered_arg_count = j; 329 if (err) 330 /* *invalid_reason has already been set above. */ 331 goto bad_format; 332 } 333 334 result = XMALLOC (struct spec); 335 *result = spec; 336 return result; 337 338 bad_format: 339 if (spec.numbered != NULL) 340 free (spec.numbered); 341 return NULL; 342} 343 344static void 345format_free (void *descr) 346{ 347 struct spec *spec = (struct spec *) descr; 348 349 if (spec->numbered != NULL) 350 free (spec->numbered); 351 free (spec); 352} 353 354static int 355format_get_number_of_directives (void *descr) 356{ 357 struct spec *spec = (struct spec *) descr; 358 359 return spec->directives; 360} 361 362static bool 363format_check (void *msgid_descr, void *msgstr_descr, bool equality, 364 formatstring_error_logger_t error_logger, 365 const char *pretty_msgstr) 366{ 367 struct spec *spec1 = (struct spec *) msgid_descr; 368 struct spec *spec2 = (struct spec *) msgstr_descr; 369 bool err = false; 370 371 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) 372 { 373 unsigned int i, j; 374 unsigned int n1 = spec1->numbered_arg_count; 375 unsigned int n2 = spec2->numbered_arg_count; 376 377 /* Check the argument names are the same. 378 Both arrays are sorted. We search for the first difference. */ 379 for (i = 0, j = 0; i < n1 || j < n2; ) 380 { 381 int cmp = (i >= n1 ? 1 : 382 j >= n2 ? -1 : 383 spec1->numbered[i].number > spec2->numbered[j].number ? 1 : 384 spec1->numbered[i].number < spec2->numbered[j].number ? -1 : 385 0); 386 387 if (cmp > 0) 388 { 389 if (error_logger) 390 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"), 391 spec2->numbered[j].number, pretty_msgstr); 392 err = true; 393 break; 394 } 395 else if (cmp < 0) 396 { 397 if (equality) 398 { 399 if (error_logger) 400 error_logger (_("a format specification for argument %u doesn't exist in '%s'"), 401 spec1->numbered[i].number, pretty_msgstr); 402 err = true; 403 break; 404 } 405 else 406 i++; 407 } 408 else 409 j++, i++; 410 } 411 /* Check the argument types are the same. */ 412 if (!err) 413 for (i = 0, j = 0; j < n2; ) 414 { 415 if (spec1->numbered[i].number == spec2->numbered[j].number) 416 { 417 if (spec1->numbered[i].type != spec2->numbered[j].type) 418 { 419 if (error_logger) 420 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"), 421 pretty_msgstr, spec2->numbered[j].number); 422 err = true; 423 break; 424 } 425 j++, i++; 426 } 427 else 428 i++; 429 } 430 } 431 432 return err; 433} 434 435 436struct formatstring_parser formatstring_tcl = 437{ 438 format_parse, 439 format_free, 440 format_get_number_of_directives, 441 NULL, 442 format_check 443}; 444 445 446#ifdef TEST 447 448/* Test program: Print the argument list specification returned by 449 format_parse for strings read from standard input. */ 450 451#include <stdio.h> 452 453static void 454format_print (void *descr) 455{ 456 struct spec *spec = (struct spec *) descr; 457 unsigned int last; 458 unsigned int i; 459 460 if (spec == NULL) 461 { 462 printf ("INVALID"); 463 return; 464 } 465 466 printf ("("); 467 last = 1; 468 for (i = 0; i < spec->numbered_arg_count; i++) 469 { 470 unsigned int number = spec->numbered[i].number; 471 472 if (i > 0) 473 printf (" "); 474 if (number < last) 475 abort (); 476 for (; last < number; last++) 477 printf ("_ "); 478 switch (spec->numbered[i].type) 479 { 480 case FAT_CHARACTER: 481 printf ("c"); 482 break; 483 case FAT_STRING: 484 printf ("s"); 485 break; 486 case FAT_INTEGER: 487 printf ("i"); 488 break; 489 case FAT_UNSIGNED_INTEGER: 490 printf ("[unsigned]i"); 491 break; 492 case FAT_SHORT_INTEGER: 493 printf ("hi"); 494 break; 495 case FAT_SHORT_UNSIGNED_INTEGER: 496 printf ("[unsigned]hi"); 497 break; 498 case FAT_FLOAT: 499 printf ("f"); 500 break; 501 default: 502 abort (); 503 } 504 last = number + 1; 505 } 506 printf (")"); 507} 508 509int 510main () 511{ 512 for (;;) 513 { 514 char *line = NULL; 515 size_t line_size = 0; 516 int line_len; 517 char *invalid_reason; 518 void *descr; 519 520 line_len = getline (&line, &line_size, stdin); 521 if (line_len < 0) 522 break; 523 if (line_len > 0 && line[line_len - 1] == '\n') 524 line[--line_len] = '\0'; 525 526 invalid_reason = NULL; 527 descr = format_parse (line, false, NULL, &invalid_reason); 528 529 format_print (descr); 530 printf ("\n"); 531 if (descr == NULL) 532 printf ("%s\n", invalid_reason); 533 534 free (invalid_reason); 535 free (line); 536 } 537 538 return 0; 539} 540 541/* 542 * For Emacs M-x compile 543 * Local Variables: 544 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-tcl.c ../gnulib-lib/libgettextlib.la" 545 * End: 546 */ 547 548#endif /* TEST */ 549