1/* PHP format strings. 2 Copyright (C) 2001-2004, 2006-2007 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2002. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18#ifdef HAVE_CONFIG_H 19# include <config.h> 20#endif 21 22#include <stdbool.h> 23#include <stdlib.h> 24 25#include "format.h" 26#include "c-ctype.h" 27#include "xalloc.h" 28#include "xvasprintf.h" 29#include "format-invalid.h" 30#include "gettext.h" 31 32#define _(str) gettext (str) 33 34/* PHP format strings are described in phpdoc-4.0.6, file 35 phpdoc/manual/function.sprintf.html, and are implemented in 36 php-4.1.0/ext/standard/formatted_print.c. 37 A directive 38 - starts with '%' or '%m$' where m is a positive integer, 39 - is optionally followed by any of the characters '0', '-', ' ', or 40 "'<anychar>", each of which acts as a flag, 41 - is optionally followed by a width specification: a nonempty digit 42 sequence, 43 - is optionally followed by '.' and a precision specification: a nonempty 44 digit sequence, 45 - is optionally followed by a size specifier 'l', which is ignored, 46 - is finished by a specifier 47 - 's', that needs a string argument, 48 - 'b', 'd', 'u', 'o', 'x', 'X', that need an integer argument, 49 - 'e', 'f', that need a floating-point argument, 50 - 'c', that needs a character argument. 51 Additionally there is the directive '%%', which takes no argument. 52 Numbered and unnumbered argument specifications can be used in the same 53 string. Numbered argument specifications have no influence on the 54 "current argument index", that is incremented each time an argument is read. 55 */ 56 57enum format_arg_type 58{ 59 FAT_INTEGER, 60 FAT_FLOAT, 61 FAT_CHARACTER, 62 FAT_STRING 63}; 64 65struct numbered_arg 66{ 67 unsigned int number; 68 enum format_arg_type type; 69}; 70 71struct spec 72{ 73 unsigned int directives; 74 unsigned int numbered_arg_count; 75 unsigned int allocated; 76 struct numbered_arg *numbered; 77}; 78 79/* Locale independent test for a decimal digit. 80 Argument can be 'char' or 'unsigned char'. (Whereas the argument of 81 <ctype.h> isdigit must be an 'unsigned char'.) */ 82#undef isdigit 83#define isdigit(c) ((unsigned int) ((c) - '0') < 10) 84 85 86static int 87numbered_arg_compare (const void *p1, const void *p2) 88{ 89 unsigned int n1 = ((const struct numbered_arg *) p1)->number; 90 unsigned int n2 = ((const struct numbered_arg *) p2)->number; 91 92 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0); 93} 94 95static void * 96format_parse (const char *format, bool translated, char *fdi, 97 char **invalid_reason) 98{ 99 const char *const format_start = format; 100 unsigned int directives; 101 unsigned int numbered_arg_count; 102 unsigned int allocated; 103 struct numbered_arg *numbered; 104 unsigned int unnumbered_arg_count; 105 struct spec *result; 106 107 directives = 0; 108 numbered_arg_count = 0; 109 allocated = 0; 110 numbered = NULL; 111 unnumbered_arg_count = 0; 112 113 for (; *format != '\0';) 114 if (*format++ == '%') 115 { 116 /* A directive. */ 117 FDI_SET (format - 1, FMTDIR_START); 118 directives++; 119 120 if (*format != '%') 121 { 122 /* A complex directive. */ 123 unsigned int number; 124 enum format_arg_type type; 125 126 number = ++unnumbered_arg_count; 127 if (isdigit (*format)) 128 { 129 const char *f = format; 130 unsigned int m = 0; 131 132 do 133 { 134 m = 10 * m + (*f - '0'); 135 f++; 136 } 137 while (isdigit (*f)); 138 139 if (*f == '$') 140 { 141 if (m == 0) 142 { 143 *invalid_reason = INVALID_ARGNO_0 (directives); 144 FDI_SET (f, FMTDIR_ERROR); 145 goto bad_format; 146 } 147 number = m; 148 format = ++f; 149 --unnumbered_arg_count; 150 } 151 } 152 153 /* Parse flags. */ 154 for (;;) 155 { 156 if (*format == '0' || *format == '-' || *format == ' ') 157 format++; 158 else if (*format == '\'') 159 { 160 format++; 161 if (*format == '\0') 162 { 163 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); 164 FDI_SET (format - 1, FMTDIR_ERROR); 165 goto bad_format; 166 } 167 format++; 168 } 169 else 170 break; 171 } 172 173 /* Parse width. */ 174 if (isdigit (*format)) 175 { 176 do 177 format++; 178 while (isdigit (*format)); 179 } 180 181 /* Parse precision. */ 182 if (*format == '.') 183 { 184 format++; 185 186 if (isdigit (*format)) 187 { 188 do 189 format++; 190 while (isdigit (*format)); 191 } 192 else 193 --format; /* will jump to bad_format */ 194 } 195 196 /* Parse size. */ 197 if (*format == 'l') 198 format++; 199 200 switch (*format) 201 { 202 case 'b': case 'd': case 'u': case 'o': case 'x': case 'X': 203 type = FAT_INTEGER; 204 break; 205 case 'e': case 'f': 206 type = FAT_FLOAT; 207 break; 208 case 'c': 209 type = FAT_CHARACTER; 210 break; 211 case 's': 212 type = FAT_STRING; 213 break; 214 default: 215 if (*format == '\0') 216 { 217 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); 218 FDI_SET (format - 1, FMTDIR_ERROR); 219 } 220 else 221 { 222 *invalid_reason = 223 INVALID_CONVERSION_SPECIFIER (directives, *format); 224 FDI_SET (format, FMTDIR_ERROR); 225 } 226 goto bad_format; 227 } 228 229 if (allocated == numbered_arg_count) 230 { 231 allocated = 2 * allocated + 1; 232 numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg)); 233 } 234 numbered[numbered_arg_count].number = number; 235 numbered[numbered_arg_count].type = type; 236 numbered_arg_count++; 237 } 238 239 FDI_SET (format, FMTDIR_END); 240 241 format++; 242 } 243 244 /* Sort the numbered argument array, and eliminate duplicates. */ 245 if (numbered_arg_count > 1) 246 { 247 unsigned int i, j; 248 bool err; 249 250 qsort (numbered, numbered_arg_count, 251 sizeof (struct numbered_arg), numbered_arg_compare); 252 253 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ 254 err = false; 255 for (i = j = 0; i < numbered_arg_count; i++) 256 if (j > 0 && numbered[i].number == numbered[j-1].number) 257 { 258 enum format_arg_type type1 = numbered[i].type; 259 enum format_arg_type type2 = numbered[j-1].type; 260 enum format_arg_type type_both; 261 262 if (type1 == type2) 263 type_both = type1; 264 else 265 { 266 /* Incompatible types. */ 267 type_both = type1; 268 if (!err) 269 *invalid_reason = 270 INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number); 271 err = true; 272 } 273 274 numbered[j-1].type = type_both; 275 } 276 else 277 { 278 if (j < i) 279 { 280 numbered[j].number = numbered[i].number; 281 numbered[j].type = numbered[i].type; 282 } 283 j++; 284 } 285 numbered_arg_count = j; 286 if (err) 287 /* *invalid_reason has already been set above. */ 288 goto bad_format; 289 } 290 291 result = XMALLOC (struct spec); 292 result->directives = directives; 293 result->numbered_arg_count = numbered_arg_count; 294 result->allocated = allocated; 295 result->numbered = numbered; 296 return result; 297 298 bad_format: 299 if (numbered != NULL) 300 free (numbered); 301 return NULL; 302} 303 304static void 305format_free (void *descr) 306{ 307 struct spec *spec = (struct spec *) descr; 308 309 if (spec->numbered != NULL) 310 free (spec->numbered); 311 free (spec); 312} 313 314static int 315format_get_number_of_directives (void *descr) 316{ 317 struct spec *spec = (struct spec *) descr; 318 319 return spec->directives; 320} 321 322static bool 323format_check (void *msgid_descr, void *msgstr_descr, bool equality, 324 formatstring_error_logger_t error_logger, 325 const char *pretty_msgstr) 326{ 327 struct spec *spec1 = (struct spec *) msgid_descr; 328 struct spec *spec2 = (struct spec *) msgstr_descr; 329 bool err = false; 330 331 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) 332 { 333 unsigned int i, j; 334 unsigned int n1 = spec1->numbered_arg_count; 335 unsigned int n2 = spec2->numbered_arg_count; 336 337 /* Check the argument names are the same. 338 Both arrays are sorted. We search for the first difference. */ 339 for (i = 0, j = 0; i < n1 || j < n2; ) 340 { 341 int cmp = (i >= n1 ? 1 : 342 j >= n2 ? -1 : 343 spec1->numbered[i].number > spec2->numbered[j].number ? 1 : 344 spec1->numbered[i].number < spec2->numbered[j].number ? -1 : 345 0); 346 347 if (cmp > 0) 348 { 349 if (error_logger) 350 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"), 351 spec2->numbered[j].number, pretty_msgstr); 352 err = true; 353 break; 354 } 355 else if (cmp < 0) 356 { 357 if (equality) 358 { 359 if (error_logger) 360 error_logger (_("a format specification for argument %u doesn't exist in '%s'"), 361 spec1->numbered[i].number, pretty_msgstr); 362 err = true; 363 break; 364 } 365 else 366 i++; 367 } 368 else 369 j++, i++; 370 } 371 /* Check the argument types are the same. */ 372 if (!err) 373 for (i = 0, j = 0; j < n2; ) 374 { 375 if (spec1->numbered[i].number == spec2->numbered[j].number) 376 { 377 if (spec1->numbered[i].type != spec2->numbered[j].type) 378 { 379 if (error_logger) 380 error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"), 381 pretty_msgstr, spec2->numbered[j].number); 382 err = true; 383 break; 384 } 385 j++, i++; 386 } 387 else 388 i++; 389 } 390 } 391 392 return err; 393} 394 395 396struct formatstring_parser formatstring_php = 397{ 398 format_parse, 399 format_free, 400 format_get_number_of_directives, 401 NULL, 402 format_check 403}; 404 405 406#ifdef TEST 407 408/* Test program: Print the argument list specification returned by 409 format_parse for strings read from standard input. */ 410 411#include <stdio.h> 412 413static void 414format_print (void *descr) 415{ 416 struct spec *spec = (struct spec *) descr; 417 unsigned int last; 418 unsigned int i; 419 420 if (spec == NULL) 421 { 422 printf ("INVALID"); 423 return; 424 } 425 426 printf ("("); 427 last = 1; 428 for (i = 0; i < spec->numbered_arg_count; i++) 429 { 430 unsigned int number = spec->numbered[i].number; 431 432 if (i > 0) 433 printf (" "); 434 if (number < last) 435 abort (); 436 for (; last < number; last++) 437 printf ("_ "); 438 switch (spec->numbered[i].type) 439 { 440 case FAT_INTEGER: 441 printf ("i"); 442 break; 443 case FAT_FLOAT: 444 printf ("f"); 445 break; 446 case FAT_CHARACTER: 447 printf ("c"); 448 break; 449 case FAT_STRING: 450 printf ("s"); 451 break; 452 default: 453 abort (); 454 } 455 last = number + 1; 456 } 457 printf (")"); 458} 459 460int 461main () 462{ 463 for (;;) 464 { 465 char *line = NULL; 466 size_t line_size = 0; 467 int line_len; 468 char *invalid_reason; 469 void *descr; 470 471 line_len = getline (&line, &line_size, stdin); 472 if (line_len < 0) 473 break; 474 if (line_len > 0 && line[line_len - 1] == '\n') 475 line[--line_len] = '\0'; 476 477 invalid_reason = NULL; 478 descr = format_parse (line, false, NULL, &invalid_reason); 479 480 format_print (descr); 481 printf ("\n"); 482 if (descr == NULL) 483 printf ("%s\n", invalid_reason); 484 485 free (invalid_reason); 486 free (line); 487 } 488 489 return 0; 490} 491 492/* 493 * For Emacs M-x compile 494 * Local Variables: 495 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-php.c ../gnulib-lib/libgettextlib.la" 496 * End: 497 */ 498 499#endif /* TEST */ 500