1/* Boost format strings.
2   Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc.
3   Written by Bruno Haible <haible@clisp.cons.org>, 2006.
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software Foundation,
17   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18
19#ifdef HAVE_CONFIG_H
20# include <config.h>
21#endif
22
23#include <stdbool.h>
24#include <stdlib.h>
25
26#include "format.h"
27#include "c-ctype.h"
28#include "xalloc.h"
29#include "xvasprintf.h"
30#include "format-invalid.h"
31#include "gettext.h"
32
33#define _(str) gettext (str)
34
35/* Boost format strings are described in
36     boost_1_33_1/libs/format/doc/format.html
37   and implemented in
38     boost_1_33_1/boost/format/parsing.hpp.
39   A directive (other than '%%')
40   - starts with '%' or '%|'; in the latter case it must end in '|',
41   - is continued either by
42       - 'm%' where m is a positive integer, starting with a nonzero digit;
43         in this case the directive must not have started with '%|'; or
44       - the following:
45           - optional: 'm$' where m is a positive integer, starting with a
46             nonzero digit,
47           - optional: any of the characters '#', '0', '-', ' ', '+', "'",
48             '_', '=', 'h', 'l',
49           - optional: a width specification: '*' (reads an argument) or '*m$'
50             or a nonempty digit sequence,
51           - optional: a '.' and a precision specification: '*' (reads an
52             argument) or '*m$' or a nonempty digit sequence,
53           - optional: any of the characters 'h', 'l', 'L',
54           - if the directive started with '%|':
55               an optional specifier and a final '|',
56             otherwise
57               a mandatory specifier.
58             If no specifier is given, it needs an argument of any type.
59             The possible specifiers are:
60               - 'c', 'C', that need a character argument,
61               - 's', 'S', that need an argument of any type,
62               - 'i', 'd', 'o', 'u', 'x', 'X', that need an integer argument,
63               - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument,
64               - 'p', that needs a 'void *' argument,
65               - 't', that doesn't need an argument,
66               - 'TX', where X is any character, that doesn't need an argument,
67               - 'n', that needs a pointer to integer.
68             The Boost format string interpreter doesn't actually care about
69             the argument types, but we do, because it increases the likelihood
70             of detecting translator mistakes.
71   Numbered ('%m%' or '%m$' or '*m$') and unnumbered argument specifications
72   cannot be used in the same string.
73 */
74
75enum format_arg_type
76{
77  FAT_NONE		= 0,
78  /* Basic types */
79  FAT_INTEGER		= 1,
80  FAT_DOUBLE		= 2,
81  FAT_CHAR		= 3,
82  FAT_POINTER		= 4,
83  FAT_ANY		= 5
84};
85
86struct numbered_arg
87{
88  unsigned int number;
89  enum format_arg_type type;
90};
91
92struct spec
93{
94  unsigned int directives;
95  unsigned int numbered_arg_count;
96  unsigned int allocated;
97  struct numbered_arg *numbered;
98};
99
100/* Locale independent test for a decimal digit.
101   Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
102   <ctype.h> isdigit must be an 'unsigned char'.)  */
103#undef isdigit
104#define isdigit(c) ((unsigned int) ((c) - '0') < 10)
105
106
107static int
108numbered_arg_compare (const void *p1, const void *p2)
109{
110  unsigned int n1 = ((const struct numbered_arg *) p1)->number;
111  unsigned int n2 = ((const struct numbered_arg *) p2)->number;
112
113  return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
114}
115
116static void *
117format_parse (const char *format, bool translated, char **invalid_reason)
118{
119  struct spec spec;
120  unsigned int unnumbered_arg_count;
121  struct spec *result;
122
123  spec.directives = 0;
124  spec.numbered_arg_count = 0;
125  spec.allocated = 0;
126  spec.numbered = NULL;
127  unnumbered_arg_count = 0;
128
129  for (; *format != '\0';)
130    if (*format++ == '%')
131      {
132	/* A directive.  */
133	spec.directives++;
134
135	if (*format == '%')
136	  format++;
137	else
138	  {
139	    bool brackets = false;
140	    bool done = false;
141	    unsigned int number = 0;
142	    enum format_arg_type type = FAT_NONE;
143
144	    if (*format == '|')
145	      {
146		format++;
147		brackets = true;
148	      }
149
150	    if (isdigit (*format) && *format != '0')
151	      {
152		const char *f = format;
153		unsigned int m = 0;
154
155		do
156		  {
157		    m = 10 * m + (*f - '0');
158		    f++;
159		  }
160		while (isdigit (*f));
161
162		if ((!brackets && *f == '%') || *f == '$')
163		  {
164		    if (m == 0) /* can happen if m overflows */
165		      {
166			*invalid_reason = INVALID_ARGNO_0 (spec.directives);
167			goto bad_format;
168		      }
169		    number = m;
170		    if (*f == '%')
171		      {
172			type = FAT_ANY;
173			done = true;
174		      }
175		    format = ++f;
176		  }
177	      }
178
179	    if (!done)
180	      {
181		/* Parse flags.  */
182		for (;;)
183		  {
184		    if (*format == ' ' || *format == '+' || *format == '-'
185			|| *format == '#' || *format == '0' || *format == '\''
186			|| *format == '_' || *format == '=' || *format == 'h'
187			|| *format == 'l')
188		      format++;
189		    else
190		      break;
191		  }
192
193		/* Parse width.  */
194		if (*format == '*')
195		  {
196		    unsigned int width_number = 0;
197
198		    format++;
199
200		    if (isdigit (*format))
201		      {
202			const char *f = format;
203			unsigned int m = 0;
204
205			do
206			  {
207			    m = 10 * m + (*f - '0');
208			    f++;
209			  }
210			while (isdigit (*f));
211
212			if (*f == '$')
213			  {
214			    if (m == 0)
215			      {
216				*invalid_reason =
217				  INVALID_WIDTH_ARGNO_0 (spec.directives);
218				goto bad_format;
219			      }
220			    width_number = m;
221			    format = ++f;
222			  }
223		      }
224
225		    if (width_number)
226		      {
227			/* Numbered argument.  */
228
229			/* Numbered and unnumbered specifications are
230			   exclusive.  */
231			if (unnumbered_arg_count > 0)
232			  {
233			    *invalid_reason =
234			      INVALID_MIXES_NUMBERED_UNNUMBERED ();
235			    goto bad_format;
236			  }
237
238			if (spec.allocated == spec.numbered_arg_count)
239			  {
240			    spec.allocated = 2 * spec.allocated + 1;
241			    spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
242			  }
243			spec.numbered[spec.numbered_arg_count].number = width_number;
244			spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
245			spec.numbered_arg_count++;
246		      }
247		    else
248		      {
249			/* Unnumbered argument.  */
250
251			/* Numbered and unnumbered specifications are
252			   exclusive.  */
253			if (spec.numbered_arg_count > 0)
254			  {
255			    *invalid_reason =
256			      INVALID_MIXES_NUMBERED_UNNUMBERED ();
257			    goto bad_format;
258			  }
259
260			if (spec.allocated == unnumbered_arg_count)
261			  {
262			    spec.allocated = 2 * spec.allocated + 1;
263			    spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
264			  }
265			spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
266			spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
267			unnumbered_arg_count++;
268		      }
269		  }
270		else if (isdigit (*format))
271		  {
272		    do format++; while (isdigit (*format));
273		  }
274
275		/* Parse precision.  */
276		if (*format == '.')
277		  {
278		    format++;
279
280		    if (*format == '*')
281		      {
282			unsigned int precision_number = 0;
283
284			format++;
285
286			if (isdigit (*format))
287			  {
288			    const char *f = format;
289			    unsigned int m = 0;
290
291			    do
292			      {
293				m = 10 * m + (*f - '0');
294				f++;
295			      }
296			    while (isdigit (*f));
297
298			    if (*f == '$')
299			      {
300				if (m == 0)
301				  {
302				    *invalid_reason =
303				      INVALID_PRECISION_ARGNO_0 (spec.directives);
304				    goto bad_format;
305				  }
306				precision_number = m;
307				format = ++f;
308			      }
309			  }
310
311			if (precision_number)
312			  {
313			    /* Numbered argument.  */
314
315			    /* Numbered and unnumbered specifications are
316			       exclusive.  */
317			    if (unnumbered_arg_count > 0)
318			      {
319				*invalid_reason =
320				  INVALID_MIXES_NUMBERED_UNNUMBERED ();
321				goto bad_format;
322			      }
323
324			    if (spec.allocated == spec.numbered_arg_count)
325			      {
326				spec.allocated = 2 * spec.allocated + 1;
327				spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
328			      }
329			    spec.numbered[spec.numbered_arg_count].number = precision_number;
330			    spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
331			    spec.numbered_arg_count++;
332			  }
333			else
334			  {
335			    /* Unnumbered argument.  */
336
337			    /* Numbered and unnumbered specifications are
338			       exclusive.  */
339			    if (spec.numbered_arg_count > 0)
340			      {
341				*invalid_reason =
342				  INVALID_MIXES_NUMBERED_UNNUMBERED ();
343				goto bad_format;
344			      }
345
346			    if (spec.allocated == unnumbered_arg_count)
347			      {
348				spec.allocated = 2 * spec.allocated + 1;
349				spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated  * sizeof (struct numbered_arg));
350			      }
351			    spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
352			    spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
353			    unnumbered_arg_count++;
354			  }
355		      }
356		    else if (isdigit (*format))
357		      {
358			do format++; while (isdigit (*format));
359		      }
360		  }
361
362		/* Parse size.  */
363		for (;;)
364		  {
365		    if (*format == 'h' || *format == 'l' || *format == 'L')
366		      format++;
367		    else
368		      break;
369		  }
370
371		switch (*format++)
372		  {
373		  case 'c': case 'C':
374		    type = FAT_CHAR;
375		    break;
376		  case 's': case 'S':
377		    type = FAT_ANY;
378		    break;
379		  case 'i': case 'd': case 'o': case 'u': case 'x': case 'X':
380		    type = FAT_INTEGER;
381		    break;
382		  case 'e': case 'E': case 'f': case 'g': case 'G':
383		    type = FAT_DOUBLE;
384		    break;
385		  case 'p':
386		    type = FAT_POINTER;
387		    break;
388		  case 't':
389		    type = FAT_NONE;
390		    break;
391		  case 'T':
392		    if (*format == '\0')
393		      {
394			*invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
395			goto bad_format;
396		      }
397		    format++;
398		    type = FAT_NONE;
399		    break;
400		  case 'n':
401		    type = FAT_NONE;
402		    break;
403		  case '|':
404		    if (brackets)
405		      {
406			--format;
407			type = FAT_ANY;
408			break;
409		      }
410		    /*FALLTHROUGH*/
411		  default:
412		    --format;
413		    *invalid_reason =
414		      (*format == '\0'
415		       ? INVALID_UNTERMINATED_DIRECTIVE ()
416		       : INVALID_CONVERSION_SPECIFIER (spec.directives,
417						       *format));
418		    goto bad_format;
419		  }
420		if (brackets)
421		  {
422		    if (*format != '|')
423		      {
424			*invalid_reason =
425			  (*format == '\0'
426			   ? INVALID_UNTERMINATED_DIRECTIVE ()
427			   : xasprintf (_("The directive number %u starts with | but does not end with |."),
428					spec.directives));
429			goto bad_format;
430		      }
431		    format++;
432		  }
433	      }
434
435	    if (type != FAT_NONE)
436	      {
437		if (number)
438		  {
439		    /* Numbered argument.  */
440
441		    /* Numbered and unnumbered specifications are exclusive.  */
442		    if (unnumbered_arg_count > 0)
443		      {
444			*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
445			goto bad_format;
446		      }
447
448		    if (spec.allocated == spec.numbered_arg_count)
449		      {
450			spec.allocated = 2 * spec.allocated + 1;
451			spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
452		      }
453		    spec.numbered[spec.numbered_arg_count].number = number;
454		    spec.numbered[spec.numbered_arg_count].type = type;
455		    spec.numbered_arg_count++;
456		  }
457		else
458		  {
459		    /* Unnumbered argument.  */
460
461		    /* Numbered and unnumbered specifications are exclusive.  */
462		    if (spec.numbered_arg_count > 0)
463		      {
464			*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
465			goto bad_format;
466		      }
467
468		    if (spec.allocated == unnumbered_arg_count)
469		      {
470			spec.allocated = 2 * spec.allocated + 1;
471			spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
472		      }
473		    spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
474		    spec.numbered[unnumbered_arg_count].type = type;
475		    unnumbered_arg_count++;
476		  }
477	      }
478	  }
479      }
480
481  /* Convert the unnumbered argument array to numbered arguments.  */
482  if (unnumbered_arg_count > 0)
483    spec.numbered_arg_count = unnumbered_arg_count;
484  /* Sort the numbered argument array, and eliminate duplicates.  */
485  else if (spec.numbered_arg_count > 1)
486    {
487      unsigned int i, j;
488      bool err;
489
490      qsort (spec.numbered, spec.numbered_arg_count,
491	     sizeof (struct numbered_arg), numbered_arg_compare);
492
493      /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
494      err = false;
495      for (i = j = 0; i < spec.numbered_arg_count; i++)
496	if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
497	  {
498	    enum format_arg_type type1 = spec.numbered[i].type;
499	    enum format_arg_type type2 = spec.numbered[j-1].type;
500	    enum format_arg_type type_both;
501
502	    if (type1 == type2 || type2 == FAT_ANY)
503	      type_both = type1;
504	    else if (type1 == FAT_ANY)
505	      type_both = type2;
506	    else
507	      {
508		/* Incompatible types.  */
509		type_both = FAT_NONE;
510		if (!err)
511		  *invalid_reason =
512		    INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
513		err = true;
514	      }
515
516	    spec.numbered[j-1].type = type_both;
517	  }
518	else
519	  {
520	    if (j < i)
521	      {
522		spec.numbered[j].number = spec.numbered[i].number;
523		spec.numbered[j].type = spec.numbered[i].type;
524	      }
525	    j++;
526	  }
527      spec.numbered_arg_count = j;
528      if (err)
529	/* *invalid_reason has already been set above.  */
530	goto bad_format;
531    }
532
533  result = (struct spec *) xmalloc (sizeof (struct spec));
534  *result = spec;
535  return result;
536
537 bad_format:
538  if (spec.numbered != NULL)
539    free (spec.numbered);
540  return NULL;
541}
542
543static void
544format_free (void *descr)
545{
546  struct spec *spec = (struct spec *) descr;
547
548  if (spec->numbered != NULL)
549    free (spec->numbered);
550  free (spec);
551}
552
553static int
554format_get_number_of_directives (void *descr)
555{
556  struct spec *spec = (struct spec *) descr;
557
558  return spec->directives;
559}
560
561static bool
562format_check (void *msgid_descr, void *msgstr_descr, bool equality,
563	      formatstring_error_logger_t error_logger,
564	      const char *pretty_msgstr)
565{
566  struct spec *spec1 = (struct spec *) msgid_descr;
567  struct spec *spec2 = (struct spec *) msgstr_descr;
568  bool err = false;
569
570  if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
571    {
572      unsigned int i, j;
573      unsigned int n1 = spec1->numbered_arg_count;
574      unsigned int n2 = spec2->numbered_arg_count;
575
576      /* Check the argument names are the same.
577	 Both arrays are sorted.  We search for the first difference.  */
578      for (i = 0, j = 0; i < n1 || j < n2; )
579	{
580	  int cmp = (i >= n1 ? 1 :
581		     j >= n2 ? -1 :
582		     spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
583		     spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
584		     0);
585
586	  if (cmp > 0)
587	    {
588	      if (error_logger)
589		error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"),
590			      spec2->numbered[j].number, pretty_msgstr);
591	      err = true;
592	      break;
593	    }
594	  else if (cmp < 0)
595	    {
596	      if (equality)
597		{
598		  if (error_logger)
599		    error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
600				  spec1->numbered[i].number, pretty_msgstr);
601		  err = true;
602		  break;
603		}
604	      else
605		i++;
606	    }
607	  else
608	    j++, i++;
609	}
610      /* Check the argument types are the same.  */
611      if (!err)
612	for (i = 0, j = 0; j < n2; )
613	  {
614	    if (spec1->numbered[i].number == spec2->numbered[j].number)
615	      {
616		if (spec1->numbered[i].type != spec2->numbered[j].type)
617		  {
618		    if (error_logger)
619		      error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
620				    pretty_msgstr, spec2->numbered[j].number);
621		    err = true;
622		    break;
623		  }
624		j++, i++;
625	      }
626	    else
627	      i++;
628	  }
629    }
630
631  return err;
632}
633
634
635struct formatstring_parser formatstring_boost =
636{
637  format_parse,
638  format_free,
639  format_get_number_of_directives,
640  NULL,
641  format_check
642};
643
644
645#ifdef TEST
646
647/* Test program: Print the argument list specification returned by
648   format_parse for strings read from standard input.  */
649
650#include <stdio.h>
651#include "getline.h"
652
653static void
654format_print (void *descr)
655{
656  struct spec *spec = (struct spec *) descr;
657  unsigned int last;
658  unsigned int i;
659
660  if (spec == NULL)
661    {
662      printf ("INVALID");
663      return;
664    }
665
666  printf ("(");
667  last = 1;
668  for (i = 0; i < spec->numbered_arg_count; i++)
669    {
670      unsigned int number = spec->numbered[i].number;
671
672      if (i > 0)
673	printf (" ");
674      if (number < last)
675	abort ();
676      for (; last < number; last++)
677	printf ("_ ");
678      switch (spec->numbered[i].type)
679	{
680	case FAT_INTEGER:
681	  printf ("i");
682	  break;
683	case FAT_DOUBLE:
684	  printf ("f");
685	  break;
686	case FAT_CHAR:
687	  printf ("c");
688	  break;
689	case FAT_POINTER:
690	  printf ("p");
691	  break;
692	case FAT_ANY:
693	  printf ("*");
694	  break;
695	default:
696	  abort ();
697	}
698      last = number + 1;
699    }
700  printf (")");
701}
702
703int
704main ()
705{
706  for (;;)
707    {
708      char *line = NULL;
709      size_t line_size = 0;
710      int line_len;
711      char *invalid_reason;
712      void *descr;
713
714      line_len = getline (&line, &line_size, stdin);
715      if (line_len < 0)
716	break;
717      if (line_len > 0 && line[line_len - 1] == '\n')
718	line[--line_len] = '\0';
719
720      invalid_reason = NULL;
721      descr = format_parse (line, false, &invalid_reason);
722
723      format_print (descr);
724      printf ("\n");
725      if (descr == NULL)
726	printf ("%s\n", invalid_reason);
727
728      free (invalid_reason);
729      free (line);
730    }
731
732  return 0;
733}
734
735/*
736 * For Emacs M-x compile
737 * Local Variables:
738 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-boost.c ../lib/libgettextlib.la"
739 * End:
740 */
741
742#endif /* TEST */
743
744