1/* awk format strings.
2   Copyright (C) 2001-2004 Free Software Foundation, Inc.
3   Written by Bruno Haible <haible@clisp.cons.org>, 2002.
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software Foundation,
17   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
18
19#ifdef HAVE_CONFIG_H
20# include <config.h>
21#endif
22
23#include <stdbool.h>
24#include <stdlib.h>
25
26#include "format.h"
27#include "c-ctype.h"
28#include "xalloc.h"
29#include "xerror.h"
30#include "format-invalid.h"
31#include "gettext.h"
32
33#define _(str) gettext (str)
34
35/* awk format strings are described in the gawk-3.1 documentation and
36   implemented in gawk-3.1.0/builtin.c: format_tree().
37   A directive
38   - starts with '%' or '%m$' where m is a positive integer,
39   - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
40     each of which acts as a flag,
41   - is optionally followed by a width specification: '*' (reads an argument)
42     or '*m$' or a nonempty digit sequence,
43   - is optionally followed by '.' and a precision specification: '*' (reads
44     an argument) or '*m$' or a nonempty digit sequence,
45   - is finished by a specifier
46       - '%', that needs no argument,
47       - 'c', that need a character argument,
48       - 's', that need a string argument,
49       - 'i', 'd', that need a signed integer argument,
50       - 'o', 'u', 'x', 'X', that need an unsigned integer argument,
51       - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
52   Numbered ('%m$' or '*m$') and unnumbered argument specifications cannot
53   be used in the same string.
54 */
55
56enum format_arg_type
57{
58  FAT_NONE,
59  FAT_CHARACTER,
60  FAT_STRING,
61  FAT_INTEGER,
62  FAT_UNSIGNED_INTEGER,
63  FAT_FLOAT
64};
65
66struct numbered_arg
67{
68  unsigned int number;
69  enum format_arg_type type;
70};
71
72struct spec
73{
74  unsigned int directives;
75  unsigned int numbered_arg_count;
76  unsigned int allocated;
77  struct numbered_arg *numbered;
78};
79
80/* Locale independent test for a decimal digit.
81   Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
82   <ctype.h> isdigit must be an 'unsigned char'.)  */
83#undef isdigit
84#define isdigit(c) ((unsigned int) ((c) - '0') < 10)
85
86
87static int
88numbered_arg_compare (const void *p1, const void *p2)
89{
90  unsigned int n1 = ((const struct numbered_arg *) p1)->number;
91  unsigned int n2 = ((const struct numbered_arg *) p2)->number;
92
93  return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
94}
95
96static void *
97format_parse (const char *format, bool translated, char **invalid_reason)
98{
99  struct spec spec;
100  unsigned int unnumbered_arg_count;
101  struct spec *result;
102
103  spec.directives = 0;
104  spec.numbered_arg_count = 0;
105  spec.allocated = 0;
106  spec.numbered = NULL;
107  unnumbered_arg_count = 0;
108
109  for (; *format != '\0';)
110    if (*format++ == '%')
111      {
112	/* A directive.  */
113	unsigned int number = 0;
114	enum format_arg_type type;
115
116	spec.directives++;
117
118	if (isdigit (*format))
119	  {
120	    const char *f = format;
121	    unsigned int m = 0;
122
123	    do
124	      {
125		m = 10 * m + (*f - '0');
126		f++;
127	      }
128	    while (isdigit (*f));
129
130	    if (*f == '$')
131	      {
132		if (m == 0)
133		  {
134		    *invalid_reason = INVALID_ARGNO_0 (spec.directives);
135		    goto bad_format;
136		  }
137		number = m;
138		format = ++f;
139	      }
140	  }
141
142	/* Parse flags.  */
143	while (*format == ' ' || *format == '+' || *format == '-'
144	       || *format == '#' || *format == '0')
145	  format++;
146
147	/* Parse width.  */
148	if (*format == '*')
149	  {
150	    unsigned int width_number = 0;
151
152	    format++;
153
154	    if (isdigit (*format))
155	      {
156		const char *f = format;
157		unsigned int m = 0;
158
159		do
160		  {
161		    m = 10 * m + (*f - '0');
162		    f++;
163		  }
164		while (isdigit (*f));
165
166		if (*f == '$')
167		  {
168		    if (m == 0)
169		      {
170			*invalid_reason =
171			  INVALID_WIDTH_ARGNO_0 (spec.directives);
172			goto bad_format;
173		      }
174		    width_number = m;
175		    format = ++f;
176		  }
177	      }
178
179	    if (width_number)
180	      {
181		/* Numbered argument.  */
182
183		/* Numbered and unnumbered specifications are exclusive.  */
184		if (unnumbered_arg_count > 0)
185		  {
186		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
187		    goto bad_format;
188		  }
189
190		if (spec.allocated == spec.numbered_arg_count)
191		  {
192		    spec.allocated = 2 * spec.allocated + 1;
193		    spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
194		  }
195		spec.numbered[spec.numbered_arg_count].number = width_number;
196		spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
197		spec.numbered_arg_count++;
198	      }
199	    else
200	      {
201		/* Unnumbered argument.  */
202
203		/* Numbered and unnumbered specifications are exclusive.  */
204		if (spec.numbered_arg_count > 0)
205		  {
206		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
207		    goto bad_format;
208		  }
209
210		if (spec.allocated == unnumbered_arg_count)
211		  {
212		    spec.allocated = 2 * spec.allocated + 1;
213		    spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
214		  }
215		spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
216		spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
217		unnumbered_arg_count++;
218	      }
219	  }
220	else if (isdigit (*format))
221	  {
222	    do format++; while (isdigit (*format));
223	  }
224
225	/* Parse precision.  */
226	if (*format == '.')
227	  {
228	    format++;
229
230	    if (*format == '*')
231	      {
232		unsigned int precision_number = 0;
233
234		format++;
235
236		if (isdigit (*format))
237		  {
238		    const char *f = format;
239		    unsigned int m = 0;
240
241		    do
242		      {
243			m = 10 * m + (*f - '0');
244			f++;
245		      }
246		    while (isdigit (*f));
247
248		    if (*f == '$')
249		      {
250			if (m == 0)
251			  {
252			    *invalid_reason =
253			      INVALID_PRECISION_ARGNO_0 (spec.directives);
254			    goto bad_format;
255			  }
256			precision_number = m;
257			format = ++f;
258		      }
259		  }
260
261		if (precision_number)
262		  {
263		    /* Numbered argument.  */
264
265		    /* Numbered and unnumbered specifications are exclusive.  */
266		    if (unnumbered_arg_count > 0)
267		      {
268			*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
269			goto bad_format;
270		      }
271
272		    if (spec.allocated == spec.numbered_arg_count)
273		      {
274			spec.allocated = 2 * spec.allocated + 1;
275			spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
276		      }
277		    spec.numbered[spec.numbered_arg_count].number = precision_number;
278		    spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
279		    spec.numbered_arg_count++;
280		  }
281		else
282		  {
283		    /* Unnumbered argument.  */
284
285		    /* Numbered and unnumbered specifications are exclusive.  */
286		    if (spec.numbered_arg_count > 0)
287		      {
288			*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
289			goto bad_format;
290		      }
291
292		    if (spec.allocated == unnumbered_arg_count)
293		      {
294			spec.allocated = 2 * spec.allocated + 1;
295			spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
296		      }
297		    spec.numbered[unnumbered_arg_count].type = unnumbered_arg_count + 1;
298		    spec.numbered[unnumbered_arg_count].type = FAT_INTEGER;
299		    unnumbered_arg_count++;
300		  }
301	      }
302	    else if (isdigit (*format))
303	      {
304		do format++; while (isdigit (*format));
305	      }
306	  }
307
308	switch (*format)
309	  {
310	  case '%':
311	    type = FAT_NONE;
312	    break;
313	  case 'c':
314	    type = FAT_CHARACTER;
315	    break;
316	  case 's':
317	    type = FAT_STRING;
318	    break;
319	  case 'i': case 'd':
320	    type = FAT_INTEGER;
321	    break;
322	  case 'u': case 'o': case 'x': case 'X':
323	    type = FAT_UNSIGNED_INTEGER;
324	    break;
325	  case 'e': case 'E': case 'f': case 'g': case 'G':
326	    type = FAT_FLOAT;
327	    break;
328	  default:
329	    *invalid_reason =
330	      (*format == '\0'
331	       ? INVALID_UNTERMINATED_DIRECTIVE ()
332	       : INVALID_CONVERSION_SPECIFIER (spec.directives, *format));
333	    goto bad_format;
334	  }
335
336	if (type != FAT_NONE)
337	  {
338	    if (number)
339	      {
340		/* Numbered argument.  */
341
342		/* Numbered and unnumbered specifications are exclusive.  */
343		if (unnumbered_arg_count > 0)
344		  {
345		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
346		    goto bad_format;
347		  }
348
349		if (spec.allocated == spec.numbered_arg_count)
350		  {
351		    spec.allocated = 2 * spec.allocated + 1;
352		    spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
353		  }
354		spec.numbered[spec.numbered_arg_count].number = number;
355		spec.numbered[spec.numbered_arg_count].type = type;
356		spec.numbered_arg_count++;
357	      }
358	    else
359	      {
360		/* Unnumbered argument.  */
361
362		/* Numbered and unnumbered specifications are exclusive.  */
363		if (spec.numbered_arg_count > 0)
364		  {
365		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
366		    goto bad_format;
367		  }
368
369		if (spec.allocated == unnumbered_arg_count)
370		  {
371		    spec.allocated = 2 * spec.allocated + 1;
372		    spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
373		  }
374		spec.numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
375		spec.numbered[unnumbered_arg_count].type = type;
376		unnumbered_arg_count++;
377	      }
378	  }
379
380	format++;
381      }
382
383  /* Convert the unnumbered argument array to numbered arguments.  */
384  if (unnumbered_arg_count > 0)
385    spec.numbered_arg_count = unnumbered_arg_count;
386  /* Sort the numbered argument array, and eliminate duplicates.  */
387  else if (spec.numbered_arg_count > 1)
388    {
389      unsigned int i, j;
390      bool err;
391
392      qsort (spec.numbered, spec.numbered_arg_count,
393	     sizeof (struct numbered_arg), numbered_arg_compare);
394
395      /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
396      err = false;
397      for (i = j = 0; i < spec.numbered_arg_count; i++)
398	if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
399	  {
400	    enum format_arg_type type1 = spec.numbered[i].type;
401	    enum format_arg_type type2 = spec.numbered[j-1].type;
402	    enum format_arg_type type_both;
403
404	    if (type1 == type2)
405	      type_both = type1;
406	    else
407	      {
408		/* Incompatible types.  */
409		type_both = FAT_NONE;
410		if (!err)
411		  *invalid_reason =
412		    INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
413		err = true;
414	      }
415
416	    spec.numbered[j-1].type = type_both;
417	  }
418	else
419	  {
420	    if (j < i)
421	      {
422		spec.numbered[j].number = spec.numbered[i].number;
423		spec.numbered[j].type = spec.numbered[i].type;
424	      }
425	    j++;
426	  }
427      spec.numbered_arg_count = j;
428      if (err)
429	/* *invalid_reason has already been set above.  */
430	goto bad_format;
431    }
432
433  result = (struct spec *) xmalloc (sizeof (struct spec));
434  *result = spec;
435  return result;
436
437 bad_format:
438  if (spec.numbered != NULL)
439    free (spec.numbered);
440  return NULL;
441}
442
443static void
444format_free (void *descr)
445{
446  struct spec *spec = (struct spec *) descr;
447
448  if (spec->numbered != NULL)
449    free (spec->numbered);
450  free (spec);
451}
452
453static int
454format_get_number_of_directives (void *descr)
455{
456  struct spec *spec = (struct spec *) descr;
457
458  return spec->directives;
459}
460
461static bool
462format_check (void *msgid_descr, void *msgstr_descr, bool equality,
463	      formatstring_error_logger_t error_logger,
464	      const char *pretty_msgstr)
465{
466  struct spec *spec1 = (struct spec *) msgid_descr;
467  struct spec *spec2 = (struct spec *) msgstr_descr;
468  bool err = false;
469
470  if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
471    {
472      unsigned int i, j;
473      unsigned int n1 = spec1->numbered_arg_count;
474      unsigned int n2 = spec2->numbered_arg_count;
475
476      /* Check the argument names are the same.
477	 Both arrays are sorted.  We search for the first difference.  */
478      for (i = 0, j = 0; i < n1 || j < n2; )
479	{
480	  int cmp = (i >= n1 ? 1 :
481		     j >= n2 ? -1 :
482		     spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
483		     spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
484		     0);
485
486	  if (cmp > 0)
487	    {
488	      if (error_logger)
489		error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in 'msgid'"),
490			      spec2->numbered[j].number, pretty_msgstr);
491	      err = true;
492	      break;
493	    }
494	  else if (cmp < 0)
495	    {
496	      if (equality)
497		{
498		  if (error_logger)
499		    error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
500				  spec1->numbered[i].number, pretty_msgstr);
501		  err = true;
502		  break;
503		}
504	      else
505		i++;
506	    }
507	  else
508	    j++, i++;
509	}
510      /* Check the argument types are the same.  */
511      if (!err)
512	for (i = 0, j = 0; j < n2; )
513	  {
514	    if (spec1->numbered[i].number == spec2->numbered[j].number)
515	      {
516		if (spec1->numbered[i].type != spec2->numbered[j].type)
517		  {
518		    if (error_logger)
519		      error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
520				    pretty_msgstr, spec2->numbered[j].number);
521		    err = true;
522		    break;
523		  }
524		j++, i++;
525	      }
526	    else
527	      i++;
528	  }
529    }
530
531  return err;
532}
533
534
535struct formatstring_parser formatstring_awk =
536{
537  format_parse,
538  format_free,
539  format_get_number_of_directives,
540  format_check
541};
542
543
544#ifdef TEST
545
546/* Test program: Print the argument list specification returned by
547   format_parse for strings read from standard input.  */
548
549#include <stdio.h>
550#include "getline.h"
551
552static void
553format_print (void *descr)
554{
555  struct spec *spec = (struct spec *) descr;
556  unsigned int last;
557  unsigned int i;
558
559  if (spec == NULL)
560    {
561      printf ("INVALID");
562      return;
563    }
564
565  printf ("(");
566  last = 1;
567  for (i = 0; i < spec->numbered_arg_count; i++)
568    {
569      unsigned int number = spec->numbered[i].number;
570
571      if (i > 0)
572	printf (" ");
573      if (number < last)
574	abort ();
575      for (; last < number; last++)
576	printf ("_ ");
577      switch (spec->numbered[i].type)
578	{
579	case FAT_CHARACTER:
580	  printf ("c");
581	  break;
582	case FAT_STRING:
583	  printf ("s");
584	  break;
585	case FAT_INTEGER:
586	  printf ("i");
587	  break;
588	case FAT_UNSIGNED_INTEGER:
589	  printf ("[unsigned]i");
590	  break;
591	case FAT_FLOAT:
592	  printf ("f");
593	  break;
594	default:
595	  abort ();
596	}
597      last = number + 1;
598    }
599  printf (")");
600}
601
602int
603main ()
604{
605  for (;;)
606    {
607      char *line = NULL;
608      size_t line_size = 0;
609      int line_len;
610      char *invalid_reason;
611      void *descr;
612
613      line_len = getline (&line, &line_size, stdin);
614      if (line_len < 0)
615	break;
616      if (line_len > 0 && line[line_len - 1] == '\n')
617	line[--line_len] = '\0';
618
619      invalid_reason = NULL;
620      descr = format_parse (line, false, &invalid_reason);
621
622      format_print (descr);
623      printf ("\n");
624      if (descr == NULL)
625	printf ("%s\n", invalid_reason);
626
627      free (invalid_reason);
628      free (line);
629    }
630
631  return 0;
632}
633
634/*
635 * For Emacs M-x compile
636 * Local Variables:
637 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-awk.c ../lib/libgettextlib.la"
638 * End:
639 */
640
641#endif /* TEST */
642