1/* awk format strings.
2   Copyright (C) 2001-2004, 2006-2007 Free Software Foundation, Inc.
3   Written by Bruno Haible <haible@clisp.cons.org>, 2002.
4
5   This program is free software: you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3 of the License, or
8   (at your option) any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18#ifdef HAVE_CONFIG_H
19# include <config.h>
20#endif
21
22#include <stdbool.h>
23#include <stdlib.h>
24
25#include "format.h"
26#include "c-ctype.h"
27#include "xalloc.h"
28#include "xvasprintf.h"
29#include "format-invalid.h"
30#include "gettext.h"
31
32#define _(str) gettext (str)
33
34/* awk format strings are described in the gawk-3.1 documentation and
35   implemented in gawk-3.1.0/builtin.c: format_tree().
36   A directive
37   - starts with '%' or '%m$' where m is a positive integer,
38   - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
39     each of which acts as a flag,
40   - is optionally followed by a width specification: '*' (reads an argument)
41     or '*m$' or a nonempty digit sequence,
42   - is optionally followed by '.' and a precision specification: '*' (reads
43     an argument) or '*m$' or a nonempty digit sequence,
44   - is finished by a specifier
45       - '%', that needs no argument,
46       - 'c', that need a character argument,
47       - 's', that need a string argument,
48       - 'i', 'd', that need a signed integer argument,
49       - 'o', 'u', 'x', 'X', that need an unsigned integer argument,
50       - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
51   Numbered ('%m$' or '*m$') and unnumbered argument specifications cannot
52   be used in the same string.
53 */
54
55enum format_arg_type
56{
57  FAT_NONE,
58  FAT_CHARACTER,
59  FAT_STRING,
60  FAT_INTEGER,
61  FAT_UNSIGNED_INTEGER,
62  FAT_FLOAT
63};
64
65struct numbered_arg
66{
67  unsigned int number;
68  enum format_arg_type type;
69};
70
71struct spec
72{
73  unsigned int directives;
74  unsigned int numbered_arg_count;
75  unsigned int allocated;
76  struct numbered_arg *numbered;
77};
78
79/* Locale independent test for a decimal digit.
80   Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
81   <ctype.h> isdigit must be an 'unsigned char'.)  */
82#undef isdigit
83#define isdigit(c) ((unsigned int) ((c) - '0') < 10)
84
85
86static int
87numbered_arg_compare (const void *p1, const void *p2)
88{
89  unsigned int n1 = ((const struct numbered_arg *) p1)->number;
90  unsigned int n2 = ((const struct numbered_arg *) p2)->number;
91
92  return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
93}
94
95static void *
96format_parse (const char *format, bool translated, char *fdi,
97	      char **invalid_reason)
98{
99  const char *const format_start = format;
100  struct spec spec;
101  unsigned int unnumbered_arg_count;
102  struct spec *result;
103
104  spec.directives = 0;
105  spec.numbered_arg_count = 0;
106  spec.allocated = 0;
107  spec.numbered = NULL;
108  unnumbered_arg_count = 0;
109
110  for (; *format != '\0';)
111    if (*format++ == '%')
112      {
113	/* A directive.  */
114	unsigned int number = 0;
115	enum format_arg_type type;
116
117	FDI_SET (format - 1, FMTDIR_START);
118	spec.directives++;
119
120	if (isdigit (*format))
121	  {
122	    const char *f = format;
123	    unsigned int m = 0;
124
125	    do
126	      {
127		m = 10 * m + (*f - '0');
128		f++;
129	      }
130	    while (isdigit (*f));
131
132	    if (*f == '$')
133	      {
134		if (m == 0)
135		  {
136		    *invalid_reason = INVALID_ARGNO_0 (spec.directives);
137		    FDI_SET (f, FMTDIR_ERROR);
138		    goto bad_format;
139		  }
140		number = m;
141		format = ++f;
142	      }
143	  }
144
145	/* Parse flags.  */
146	while (*format == ' ' || *format == '+' || *format == '-'
147	       || *format == '#' || *format == '0')
148	  format++;
149
150	/* Parse width.  */
151	if (*format == '*')
152	  {
153	    unsigned int width_number = 0;
154
155	    format++;
156
157	    if (isdigit (*format))
158	      {
159		const char *f = format;
160		unsigned int m = 0;
161
162		do
163		  {
164		    m = 10 * m + (*f - '0');
165		    f++;
166		  }
167		while (isdigit (*f));
168
169		if (*f == '$')
170		  {
171		    if (m == 0)
172		      {
173			*invalid_reason =
174			  INVALID_WIDTH_ARGNO_0 (spec.directives);
175			FDI_SET (f, FMTDIR_ERROR);
176			goto bad_format;
177		      }
178		    width_number = m;
179		    format = ++f;
180		  }
181	      }
182
183	    if (width_number)
184	      {
185		/* Numbered argument.  */
186
187		/* Numbered and unnumbered specifications are exclusive.  */
188		if (unnumbered_arg_count > 0)
189		  {
190		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
191		    FDI_SET (format - 1, FMTDIR_ERROR);
192		    goto bad_format;
193		  }
194
195		if (spec.allocated == spec.numbered_arg_count)
196		  {
197		    spec.allocated = 2 * spec.allocated + 1;
198		    spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
199		  }
200		spec.numbered[spec.numbered_arg_count].number = width_number;
201		spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
202		spec.numbered_arg_count++;
203	      }
204	    else
205	      {
206		/* Unnumbered argument.  */
207
208		/* Numbered and unnumbered specifications are exclusive.  */
209		if (spec.numbered_arg_count > 0)
210		  {
211		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
212		    FDI_SET (format - 1, FMTDIR_ERROR);
213		    goto bad_format;
214		  }
215
216		if (spec.allocated == unnumbered_arg_count)
217		  {
218		    spec.allocated = 2 * spec.allocated + 1;
219		    spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
220		  }
221		spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
222		spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
223		unnumbered_arg_count++;
224	      }
225	  }
226	else if (isdigit (*format))
227	  {
228	    do format++; while (isdigit (*format));
229	  }
230
231	/* Parse precision.  */
232	if (*format == '.')
233	  {
234	    format++;
235
236	    if (*format == '*')
237	      {
238		unsigned int precision_number = 0;
239
240		format++;
241
242		if (isdigit (*format))
243		  {
244		    const char *f = format;
245		    unsigned int m = 0;
246
247		    do
248		      {
249			m = 10 * m + (*f - '0');
250			f++;
251		      }
252		    while (isdigit (*f));
253
254		    if (*f == '$')
255		      {
256			if (m == 0)
257			  {
258			    *invalid_reason =
259			      INVALID_PRECISION_ARGNO_0 (spec.directives);
260			    FDI_SET (f, FMTDIR_ERROR);
261			    goto bad_format;
262			  }
263			precision_number = m;
264			format = ++f;
265		      }
266		  }
267
268		if (precision_number)
269		  {
270		    /* Numbered argument.  */
271
272		    /* Numbered and unnumbered specifications are exclusive.  */
273		    if (unnumbered_arg_count > 0)
274		      {
275			*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
276			FDI_SET (format - 1, FMTDIR_ERROR);
277			goto bad_format;
278		      }
279
280		    if (spec.allocated == spec.numbered_arg_count)
281		      {
282			spec.allocated = 2 * spec.allocated + 1;
283			spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
284		      }
285		    spec.numbered[spec.numbered_arg_count].number = precision_number;
286		    spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
287		    spec.numbered_arg_count++;
288		  }
289		else
290		  {
291		    /* Unnumbered argument.  */
292
293		    /* Numbered and unnumbered specifications are exclusive.  */
294		    if (spec.numbered_arg_count > 0)
295		      {
296			*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
297			FDI_SET (format - 1, FMTDIR_ERROR);
298			goto bad_format;
299		      }
300
301		    if (spec.allocated == unnumbered_arg_count)
302		      {
303			spec.allocated = 2 * spec.allocated + 1;
304			spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
305		      }
306		    spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
307		    spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
308		    unnumbered_arg_count++;
309		  }
310	      }
311	    else if (isdigit (*format))
312	      {
313		do format++; while (isdigit (*format));
314	      }
315	  }
316
317	switch (*format)
318	  {
319	  case '%':
320	    type = FAT_NONE;
321	    break;
322	  case 'c':
323	    type = FAT_CHARACTER;
324	    break;
325	  case 's':
326	    type = FAT_STRING;
327	    break;
328	  case 'i': case 'd':
329	    type = FAT_INTEGER;
330	    break;
331	  case 'u': case 'o': case 'x': case 'X':
332	    type = FAT_UNSIGNED_INTEGER;
333	    break;
334	  case 'e': case 'E': case 'f': case 'g': case 'G':
335	    type = FAT_FLOAT;
336	    break;
337	  default:
338	    if (*format == '\0')
339	      {
340		*invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
341		FDI_SET (format - 1, FMTDIR_ERROR);
342	      }
343	    else
344	      {
345		*invalid_reason =
346		  INVALID_CONVERSION_SPECIFIER (spec.directives, *format);
347		FDI_SET (format, FMTDIR_ERROR);
348	      }
349	    goto bad_format;
350	  }
351
352	if (type != FAT_NONE)
353	  {
354	    if (number)
355	      {
356		/* Numbered argument.  */
357
358		/* Numbered and unnumbered specifications are exclusive.  */
359		if (unnumbered_arg_count > 0)
360		  {
361		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
362		    FDI_SET (format, FMTDIR_ERROR);
363		    goto bad_format;
364		  }
365
366		if (spec.allocated == spec.numbered_arg_count)
367		  {
368		    spec.allocated = 2 * spec.allocated + 1;
369		    spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
370		  }
371		spec.numbered[spec.numbered_arg_count].number = number;
372		spec.numbered[spec.numbered_arg_count].type = type;
373		spec.numbered_arg_count++;
374	      }
375	    else
376	      {
377		/* Unnumbered argument.  */
378
379		/* Numbered and unnumbered specifications are exclusive.  */
380		if (spec.numbered_arg_count > 0)
381		  {
382		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
383		    FDI_SET (format, FMTDIR_ERROR);
384		    goto bad_format;
385		  }
386
387		if (spec.allocated == unnumbered_arg_count)
388		  {
389		    spec.allocated = 2 * spec.allocated + 1;
390		    spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
391		  }
392		spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
393		spec.numbered[unnumbered_arg_count].type = type;
394		unnumbered_arg_count++;
395	      }
396	  }
397
398	FDI_SET (format, FMTDIR_END);
399
400	format++;
401      }
402
403  /* Convert the unnumbered argument array to numbered arguments.  */
404  if (unnumbered_arg_count > 0)
405    spec.numbered_arg_count = unnumbered_arg_count;
406  /* Sort the numbered argument array, and eliminate duplicates.  */
407  else if (spec.numbered_arg_count > 1)
408    {
409      unsigned int i, j;
410      bool err;
411
412      qsort (spec.numbered, spec.numbered_arg_count,
413	     sizeof (struct numbered_arg), numbered_arg_compare);
414
415      /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
416      err = false;
417      for (i = j = 0; i < spec.numbered_arg_count; i++)
418	if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
419	  {
420	    enum format_arg_type type1 = spec.numbered[i].type;
421	    enum format_arg_type type2 = spec.numbered[j-1].type;
422	    enum format_arg_type type_both;
423
424	    if (type1 == type2)
425	      type_both = type1;
426	    else
427	      {
428		/* Incompatible types.  */
429		type_both = FAT_NONE;
430		if (!err)
431		  *invalid_reason =
432		    INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
433		err = true;
434	      }
435
436	    spec.numbered[j-1].type = type_both;
437	  }
438	else
439	  {
440	    if (j < i)
441	      {
442		spec.numbered[j].number = spec.numbered[i].number;
443		spec.numbered[j].type = spec.numbered[i].type;
444	      }
445	    j++;
446	  }
447      spec.numbered_arg_count = j;
448      if (err)
449	/* *invalid_reason has already been set above.  */
450	goto bad_format;
451    }
452
453  result = XMALLOC (struct spec);
454  *result = spec;
455  return result;
456
457 bad_format:
458  if (spec.numbered != NULL)
459    free (spec.numbered);
460  return NULL;
461}
462
463static void
464format_free (void *descr)
465{
466  struct spec *spec = (struct spec *) descr;
467
468  if (spec->numbered != NULL)
469    free (spec->numbered);
470  free (spec);
471}
472
473static int
474format_get_number_of_directives (void *descr)
475{
476  struct spec *spec = (struct spec *) descr;
477
478  return spec->directives;
479}
480
481static bool
482format_check (void *msgid_descr, void *msgstr_descr, bool equality,
483	      formatstring_error_logger_t error_logger,
484	      const char *pretty_msgstr)
485{
486  struct spec *spec1 = (struct spec *) msgid_descr;
487  struct spec *spec2 = (struct spec *) msgstr_descr;
488  bool err = false;
489
490  if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
491    {
492      unsigned int i, j;
493      unsigned int n1 = spec1->numbered_arg_count;
494      unsigned int n2 = spec2->numbered_arg_count;
495
496      /* Check the argument names are the same.
497	 Both arrays are sorted.  We search for the first difference.  */
498      for (i = 0, j = 0; i < n1 || j < n2; )
499	{
500	  int cmp = (i >= n1 ? 1 :
501		     j >= n2 ? -1 :
502		     spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
503		     spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
504		     0);
505
506	  if (cmp > 0)
507	    {
508	      if (error_logger)
509		error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"),
510			      spec2->numbered[j].number, pretty_msgstr);
511	      err = true;
512	      break;
513	    }
514	  else if (cmp < 0)
515	    {
516	      if (equality)
517		{
518		  if (error_logger)
519		    error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
520				  spec1->numbered[i].number, pretty_msgstr);
521		  err = true;
522		  break;
523		}
524	      else
525		i++;
526	    }
527	  else
528	    j++, i++;
529	}
530      /* Check the argument types are the same.  */
531      if (!err)
532	for (i = 0, j = 0; j < n2; )
533	  {
534	    if (spec1->numbered[i].number == spec2->numbered[j].number)
535	      {
536		if (spec1->numbered[i].type != spec2->numbered[j].type)
537		  {
538		    if (error_logger)
539		      error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
540				    pretty_msgstr, spec2->numbered[j].number);
541		    err = true;
542		    break;
543		  }
544		j++, i++;
545	      }
546	    else
547	      i++;
548	  }
549    }
550
551  return err;
552}
553
554
555struct formatstring_parser formatstring_awk =
556{
557  format_parse,
558  format_free,
559  format_get_number_of_directives,
560  NULL,
561  format_check
562};
563
564
565#ifdef TEST
566
567/* Test program: Print the argument list specification returned by
568   format_parse for strings read from standard input.  */
569
570#include <stdio.h>
571
572static void
573format_print (void *descr)
574{
575  struct spec *spec = (struct spec *) descr;
576  unsigned int last;
577  unsigned int i;
578
579  if (spec == NULL)
580    {
581      printf ("INVALID");
582      return;
583    }
584
585  printf ("(");
586  last = 1;
587  for (i = 0; i < spec->numbered_arg_count; i++)
588    {
589      unsigned int number = spec->numbered[i].number;
590
591      if (i > 0)
592	printf (" ");
593      if (number < last)
594	abort ();
595      for (; last < number; last++)
596	printf ("_ ");
597      switch (spec->numbered[i].type)
598	{
599	case FAT_CHARACTER:
600	  printf ("c");
601	  break;
602	case FAT_STRING:
603	  printf ("s");
604	  break;
605	case FAT_INTEGER:
606	  printf ("i");
607	  break;
608	case FAT_UNSIGNED_INTEGER:
609	  printf ("[unsigned]i");
610	  break;
611	case FAT_FLOAT:
612	  printf ("f");
613	  break;
614	default:
615	  abort ();
616	}
617      last = number + 1;
618    }
619  printf (")");
620}
621
622int
623main ()
624{
625  for (;;)
626    {
627      char *line = NULL;
628      size_t line_size = 0;
629      int line_len;
630      char *invalid_reason;
631      void *descr;
632
633      line_len = getline (&line, &line_size, stdin);
634      if (line_len < 0)
635	break;
636      if (line_len > 0 && line[line_len - 1] == '\n')
637	line[--line_len] = '\0';
638
639      invalid_reason = NULL;
640      descr = format_parse (line, false, NULL, &invalid_reason);
641
642      format_print (descr);
643      printf ("\n");
644      if (descr == NULL)
645	printf ("%s\n", invalid_reason);
646
647      free (invalid_reason);
648      free (line);
649    }
650
651  return 0;
652}
653
654/*
655 * For Emacs M-x compile
656 * Local Variables:
657 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-awk.c ../gnulib-lib/libgettextlib.la"
658 * End:
659 */
660
661#endif /* TEST */
662