1/* PHP format strings.
2   Copyright (C) 2001-2004, 2006-2007 Free Software Foundation, Inc.
3   Written by Bruno Haible <bruno@clisp.org>, 2002.
4
5   This program is free software: you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3 of the License, or
8   (at your option) any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18#ifdef HAVE_CONFIG_H
19# include <config.h>
20#endif
21
22#include <stdbool.h>
23#include <stdlib.h>
24
25#include "format.h"
26#include "c-ctype.h"
27#include "xalloc.h"
28#include "xvasprintf.h"
29#include "format-invalid.h"
30#include "gettext.h"
31
32#define _(str) gettext (str)
33
34/* PHP format strings are described in phpdoc-4.0.6, file
35   phpdoc/manual/function.sprintf.html, and are implemented in
36   php-4.1.0/ext/standard/formatted_print.c.
37   A directive
38   - starts with '%' or '%m$' where m is a positive integer,
39   - is optionally followed by any of the characters '0', '-', ' ', or
40     "'<anychar>", each of which acts as a flag,
41   - is optionally followed by a width specification: a nonempty digit
42     sequence,
43   - is optionally followed by '.' and a precision specification: a nonempty
44     digit sequence,
45   - is optionally followed by a size specifier 'l', which is ignored,
46   - is finished by a specifier
47       - 's', that needs a string argument,
48       - 'b', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
49       - 'e', 'f', that need a floating-point argument,
50       - 'c', that needs a character argument.
51   Additionally there is the directive '%%', which takes no argument.
52   Numbered and unnumbered argument specifications can be used in the same
53   string.  Numbered argument specifications have no influence on the
54   "current argument index", that is incremented each time an argument is read.
55 */
56
57enum format_arg_type
58{
59  FAT_INTEGER,
60  FAT_FLOAT,
61  FAT_CHARACTER,
62  FAT_STRING
63};
64
65struct numbered_arg
66{
67  unsigned int number;
68  enum format_arg_type type;
69};
70
71struct spec
72{
73  unsigned int directives;
74  unsigned int numbered_arg_count;
75  unsigned int allocated;
76  struct numbered_arg *numbered;
77};
78
79/* Locale independent test for a decimal digit.
80   Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
81   <ctype.h> isdigit must be an 'unsigned char'.)  */
82#undef isdigit
83#define isdigit(c) ((unsigned int) ((c) - '0') < 10)
84
85
86static int
87numbered_arg_compare (const void *p1, const void *p2)
88{
89  unsigned int n1 = ((const struct numbered_arg *) p1)->number;
90  unsigned int n2 = ((const struct numbered_arg *) p2)->number;
91
92  return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
93}
94
95static void *
96format_parse (const char *format, bool translated, char *fdi,
97	      char **invalid_reason)
98{
99  const char *const format_start = format;
100  unsigned int directives;
101  unsigned int numbered_arg_count;
102  unsigned int allocated;
103  struct numbered_arg *numbered;
104  unsigned int unnumbered_arg_count;
105  struct spec *result;
106
107  directives = 0;
108  numbered_arg_count = 0;
109  allocated = 0;
110  numbered = NULL;
111  unnumbered_arg_count = 0;
112
113  for (; *format != '\0';)
114    if (*format++ == '%')
115      {
116	/* A directive.  */
117	FDI_SET (format - 1, FMTDIR_START);
118	directives++;
119
120	if (*format != '%')
121	  {
122	    /* A complex directive.  */
123	    unsigned int number;
124	    enum format_arg_type type;
125
126	    number = ++unnumbered_arg_count;
127	    if (isdigit (*format))
128	      {
129		const char *f = format;
130		unsigned int m = 0;
131
132		do
133		  {
134		    m = 10 * m + (*f - '0');
135		    f++;
136		  }
137		while (isdigit (*f));
138
139		if (*f == '$')
140		  {
141		    if (m == 0)
142		      {
143			*invalid_reason = INVALID_ARGNO_0 (directives);
144			FDI_SET (f, FMTDIR_ERROR);
145			goto bad_format;
146		      }
147		    number = m;
148		    format = ++f;
149		    --unnumbered_arg_count;
150		  }
151	      }
152
153	    /* Parse flags.  */
154	    for (;;)
155	      {
156		if (*format == '0' || *format == '-' || *format == ' ')
157		  format++;
158		else if (*format == '\'')
159		  {
160		    format++;
161		    if (*format == '\0')
162		      {
163			*invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
164			FDI_SET (format - 1, FMTDIR_ERROR);
165			goto bad_format;
166		      }
167		    format++;
168		  }
169		else
170		  break;
171	      }
172
173	    /* Parse width.  */
174	    if (isdigit (*format))
175	      {
176		do
177		  format++;
178		while (isdigit (*format));
179	      }
180
181	    /* Parse precision.  */
182	    if (*format == '.')
183	      {
184		format++;
185
186		if (isdigit (*format))
187		  {
188		    do
189		      format++;
190		    while (isdigit (*format));
191		  }
192		else
193		  --format;	/* will jump to bad_format */
194	      }
195
196	    /* Parse size.  */
197	    if (*format == 'l')
198	      format++;
199
200	    switch (*format)
201	      {
202	      case 'b': case 'd': case 'u': case 'o': case 'x': case 'X':
203		type = FAT_INTEGER;
204		break;
205	      case 'e': case 'f':
206		type = FAT_FLOAT;
207		break;
208	      case 'c':
209		type = FAT_CHARACTER;
210		break;
211	      case 's':
212		type = FAT_STRING;
213		break;
214	      default:
215		if (*format == '\0')
216		  {
217		    *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
218		    FDI_SET (format - 1, FMTDIR_ERROR);
219		  }
220		else
221		  {
222		    *invalid_reason =
223		      INVALID_CONVERSION_SPECIFIER (directives, *format);
224		    FDI_SET (format, FMTDIR_ERROR);
225		  }
226		goto bad_format;
227	      }
228
229	    if (allocated == numbered_arg_count)
230	      {
231		allocated = 2 * allocated + 1;
232		numbered = (struct numbered_arg *) xrealloc (numbered, allocated * sizeof (struct numbered_arg));
233	      }
234	    numbered[numbered_arg_count].number = number;
235	    numbered[numbered_arg_count].type = type;
236	    numbered_arg_count++;
237	  }
238
239	FDI_SET (format, FMTDIR_END);
240
241	format++;
242      }
243
244  /* Sort the numbered argument array, and eliminate duplicates.  */
245  if (numbered_arg_count > 1)
246    {
247      unsigned int i, j;
248      bool err;
249
250      qsort (numbered, numbered_arg_count,
251	     sizeof (struct numbered_arg), numbered_arg_compare);
252
253      /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
254      err = false;
255      for (i = j = 0; i < numbered_arg_count; i++)
256	if (j > 0 && numbered[i].number == numbered[j-1].number)
257	  {
258	    enum format_arg_type type1 = numbered[i].type;
259	    enum format_arg_type type2 = numbered[j-1].type;
260	    enum format_arg_type type_both;
261
262	    if (type1 == type2)
263	      type_both = type1;
264	    else
265	      {
266		/* Incompatible types.  */
267		type_both = type1;
268		if (!err)
269		  *invalid_reason =
270		    INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number);
271		err = true;
272	      }
273
274	    numbered[j-1].type = type_both;
275	  }
276	else
277	  {
278	    if (j < i)
279	      {
280		numbered[j].number = numbered[i].number;
281		numbered[j].type = numbered[i].type;
282	      }
283	    j++;
284	  }
285      numbered_arg_count = j;
286      if (err)
287	/* *invalid_reason has already been set above.  */
288	goto bad_format;
289    }
290
291  result = XMALLOC (struct spec);
292  result->directives = directives;
293  result->numbered_arg_count = numbered_arg_count;
294  result->allocated = allocated;
295  result->numbered = numbered;
296  return result;
297
298 bad_format:
299  if (numbered != NULL)
300    free (numbered);
301  return NULL;
302}
303
304static void
305format_free (void *descr)
306{
307  struct spec *spec = (struct spec *) descr;
308
309  if (spec->numbered != NULL)
310    free (spec->numbered);
311  free (spec);
312}
313
314static int
315format_get_number_of_directives (void *descr)
316{
317  struct spec *spec = (struct spec *) descr;
318
319  return spec->directives;
320}
321
322static bool
323format_check (void *msgid_descr, void *msgstr_descr, bool equality,
324	      formatstring_error_logger_t error_logger,
325	      const char *pretty_msgstr)
326{
327  struct spec *spec1 = (struct spec *) msgid_descr;
328  struct spec *spec2 = (struct spec *) msgstr_descr;
329  bool err = false;
330
331  if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
332    {
333      unsigned int i, j;
334      unsigned int n1 = spec1->numbered_arg_count;
335      unsigned int n2 = spec2->numbered_arg_count;
336
337      /* Check the argument names are the same.
338	 Both arrays are sorted.  We search for the first difference.  */
339      for (i = 0, j = 0; i < n1 || j < n2; )
340	{
341	  int cmp = (i >= n1 ? 1 :
342		     j >= n2 ? -1 :
343		     spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
344		     spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
345		     0);
346
347	  if (cmp > 0)
348	    {
349	      if (error_logger)
350		error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"),
351			      spec2->numbered[j].number, pretty_msgstr);
352	      err = true;
353	      break;
354	    }
355	  else if (cmp < 0)
356	    {
357	      if (equality)
358		{
359		  if (error_logger)
360		    error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
361				  spec1->numbered[i].number, pretty_msgstr);
362		  err = true;
363		  break;
364		}
365	      else
366		i++;
367	    }
368	  else
369	    j++, i++;
370	}
371      /* Check the argument types are the same.  */
372      if (!err)
373	for (i = 0, j = 0; j < n2; )
374	  {
375	    if (spec1->numbered[i].number == spec2->numbered[j].number)
376	      {
377		if (spec1->numbered[i].type != spec2->numbered[j].type)
378		  {
379		    if (error_logger)
380		      error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
381				    pretty_msgstr, spec2->numbered[j].number);
382		    err = true;
383		    break;
384		  }
385		j++, i++;
386	      }
387	    else
388	      i++;
389	  }
390    }
391
392  return err;
393}
394
395
396struct formatstring_parser formatstring_php =
397{
398  format_parse,
399  format_free,
400  format_get_number_of_directives,
401  NULL,
402  format_check
403};
404
405
406#ifdef TEST
407
408/* Test program: Print the argument list specification returned by
409   format_parse for strings read from standard input.  */
410
411#include <stdio.h>
412
413static void
414format_print (void *descr)
415{
416  struct spec *spec = (struct spec *) descr;
417  unsigned int last;
418  unsigned int i;
419
420  if (spec == NULL)
421    {
422      printf ("INVALID");
423      return;
424    }
425
426  printf ("(");
427  last = 1;
428  for (i = 0; i < spec->numbered_arg_count; i++)
429    {
430      unsigned int number = spec->numbered[i].number;
431
432      if (i > 0)
433	printf (" ");
434      if (number < last)
435	abort ();
436      for (; last < number; last++)
437	printf ("_ ");
438      switch (spec->numbered[i].type)
439	{
440	case FAT_INTEGER:
441	  printf ("i");
442	  break;
443	case FAT_FLOAT:
444	  printf ("f");
445	  break;
446	case FAT_CHARACTER:
447	  printf ("c");
448	  break;
449	case FAT_STRING:
450	  printf ("s");
451	  break;
452	default:
453	  abort ();
454	}
455      last = number + 1;
456    }
457  printf (")");
458}
459
460int
461main ()
462{
463  for (;;)
464    {
465      char *line = NULL;
466      size_t line_size = 0;
467      int line_len;
468      char *invalid_reason;
469      void *descr;
470
471      line_len = getline (&line, &line_size, stdin);
472      if (line_len < 0)
473	break;
474      if (line_len > 0 && line[line_len - 1] == '\n')
475	line[--line_len] = '\0';
476
477      invalid_reason = NULL;
478      descr = format_parse (line, false, NULL, &invalid_reason);
479
480      format_print (descr);
481      printf ("\n");
482      if (descr == NULL)
483	printf ("%s\n", invalid_reason);
484
485      free (invalid_reason);
486      free (line);
487    }
488
489  return 0;
490}
491
492/*
493 * For Emacs M-x compile
494 * Local Variables:
495 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-php.c ../gnulib-lib/libgettextlib.la"
496 * End:
497 */
498
499#endif /* TEST */
500