1/* C format strings.
2   Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc.
3   Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software Foundation,
17   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18
19#ifdef HAVE_CONFIG_H
20# include <config.h>
21#endif
22
23#include <stdbool.h>
24#include <stdlib.h>
25
26#include "format.h"
27#include "c-ctype.h"
28#include "xalloc.h"
29#include "xvasprintf.h"
30#include "format-invalid.h"
31#include "gettext.h"
32
33#define _(str) gettext (str)
34
35/* C format strings are described in POSIX (IEEE P1003.1 2001), section
36   XSH 3 fprintf().  See also Linux fprintf(3) manual page.
37   A directive
38   - starts with '%' or '%m$' where m is a positive integer,
39   - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
40     "'", or - only in msgstr strings - the string "I", each of which acts as
41     a flag,
42   - is optionally followed by a width specification: '*' (reads an argument)
43     or '*m$' or a nonempty digit sequence,
44   - is optionally followed by '.' and a precision specification: '*' (reads
45     an argument) or '*m$' or a nonempty digit sequence,
46   - is either continued like this:
47       - is optionally followed by a size specifier, one of 'hh' 'h' 'l' 'll'
48         'L' 'q' 'j' 'z' 't',
49       - is finished by a specifier
50           - '%', that needs no argument,
51           - 'c', 'C', that need a character argument,
52           - 's', 'S', that need a string argument,
53           - 'i', 'd', that need a signed integer argument,
54           - 'o', 'u', 'x', 'X', that need an unsigned integer argument,
55           - 'e', 'E', 'f', 'F', 'g', 'G', 'a', 'A', that need a floating-point
56             argument,
57           - 'p', that needs a 'void *' argument,
58           - 'n', that needs a pointer to integer.
59     or is finished by a specifier '<' inttypes-macro '>' where inttypes-macro
60     is an ISO C 99 section 7.8.1 format directive.
61   Numbered ('%m$' or '*m$') and unnumbered argument specifications cannot
62   be used in the same string.  When numbered argument specifications are
63   used, specifying the Nth argument requires that all the leading arguments,
64   from the first to the (N-1)th, are specified in the format string.
65 */
66
67enum format_arg_type
68{
69  FAT_NONE		= 0,
70  /* Basic types */
71  FAT_INTEGER		= 1,
72  FAT_DOUBLE		= 2,
73  FAT_CHAR		= 3,
74  FAT_STRING		= 4,
75  FAT_OBJC_OBJECT	= 5,
76  FAT_POINTER		= 6,
77  FAT_COUNT_POINTER	= 7,
78  /* Flags */
79  FAT_UNSIGNED		= 1 << 3,
80  FAT_SIZE_SHORT	= 1 << 4,
81  FAT_SIZE_CHAR		= 2 << 4,
82  FAT_SIZE_LONG		= 1 << 6,
83  FAT_SIZE_LONGLONG	= 2 << 6,
84  FAT_SIZE_8_T		= 1 << 8,
85  FAT_SIZE_16_T		= 1 << 9,
86  FAT_SIZE_32_T		= 1 << 10,
87  FAT_SIZE_64_T		= 1 << 11,
88  FAT_SIZE_LEAST8_T	= 1 << 12,
89  FAT_SIZE_LEAST16_T	= 1 << 13,
90  FAT_SIZE_LEAST32_T	= 1 << 14,
91  FAT_SIZE_LEAST64_T	= 1 << 15,
92  FAT_SIZE_FAST8_T	= 1 << 16,
93  FAT_SIZE_FAST16_T	= 1 << 17,
94  FAT_SIZE_FAST32_T	= 1 << 18,
95  FAT_SIZE_FAST64_T	= 1 << 19,
96  FAT_SIZE_INTMAX_T	= 1 << 20,
97  FAT_SIZE_INTPTR_T	= 1 << 21,
98  FAT_SIZE_SIZE_T	= 1 << 22,
99  FAT_SIZE_PTRDIFF_T	= 1 << 23,
100  FAT_WIDE		= FAT_SIZE_LONG,
101  /* Meaningful combinations of basic types and flags:
102  'signed char'			= FAT_INTEGER | FAT_SIZE_CHAR,
103  'unsigned char'		= FAT_INTEGER | FAT_SIZE_CHAR | FAT_UNSIGNED,
104  'short'			= FAT_INTEGER | FAT_SIZE_SHORT,
105  'unsigned short'		= FAT_INTEGER | FAT_SIZE_SHORT | FAT_UNSIGNED,
106  'int'				= FAT_INTEGER,
107  'unsigned int'		= FAT_INTEGER | FAT_UNSIGNED,
108  'long int'			= FAT_INTEGER | FAT_SIZE_LONG,
109  'unsigned long int'		= FAT_INTEGER | FAT_SIZE_LONG | FAT_UNSIGNED,
110  'long long int'		= FAT_INTEGER | FAT_SIZE_LONGLONG,
111  'unsigned long long int'	= FAT_INTEGER | FAT_SIZE_LONGLONG | FAT_UNSIGNED,
112  'double'			= FAT_DOUBLE,
113  'long double'			= FAT_DOUBLE | FAT_SIZE_LONGLONG,
114  'char'/'int'			= FAT_CHAR,
115  'wchar_t'/'wint_t'		= FAT_CHAR | FAT_SIZE_LONG,
116  'const char *'		= FAT_STRING,
117  'const wchar_t *'		= FAT_STRING | FAT_SIZE_LONG,
118  'void *'			= FAT_POINTER,
119  FAT_COUNT_SCHAR_POINTER	= FAT_COUNT_POINTER | FAT_SIZE_CHAR,
120  FAT_COUNT_SHORT_POINTER	= FAT_COUNT_POINTER | FAT_SIZE_SHORT,
121  FAT_COUNT_INT_POINTER		= FAT_COUNT_POINTER,
122  FAT_COUNT_LONGINT_POINTER	= FAT_COUNT_POINTER | FAT_SIZE_LONG,
123  FAT_COUNT_LONGLONGINT_POINTER	= FAT_COUNT_POINTER | FAT_SIZE_LONGLONG,
124  */
125  /* Bitmasks */
126  FAT_SIZE_MASK		= (FAT_SIZE_SHORT | FAT_SIZE_CHAR
127			   | FAT_SIZE_LONG | FAT_SIZE_LONGLONG
128			   | FAT_SIZE_8_T | FAT_SIZE_16_T
129			   | FAT_SIZE_32_T | FAT_SIZE_64_T
130			   | FAT_SIZE_LEAST8_T | FAT_SIZE_LEAST16_T
131			   | FAT_SIZE_LEAST32_T | FAT_SIZE_LEAST64_T
132			   | FAT_SIZE_FAST8_T | FAT_SIZE_FAST16_T
133			   | FAT_SIZE_FAST32_T | FAT_SIZE_FAST64_T
134			   | FAT_SIZE_INTMAX_T | FAT_SIZE_INTPTR_T
135			   | FAT_SIZE_SIZE_T | FAT_SIZE_PTRDIFF_T)
136};
137
138struct numbered_arg
139{
140  unsigned int number;
141  enum format_arg_type type;
142};
143
144struct unnumbered_arg
145{
146  enum format_arg_type type;
147};
148
149struct spec
150{
151  unsigned int directives;
152  unsigned int unnumbered_arg_count;
153  unsigned int allocated;
154  struct unnumbered_arg *unnumbered;
155  bool unlikely_intentional;
156  unsigned int sysdep_directives_count;
157  const char **sysdep_directives;
158};
159
160/* Locale independent test for a decimal digit.
161   Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
162   <ctype.h> isdigit must be an 'unsigned char'.)  */
163#undef isdigit
164#define isdigit(c) ((unsigned int) ((c) - '0') < 10)
165
166
167static int
168numbered_arg_compare (const void *p1, const void *p2)
169{
170  unsigned int n1 = ((const struct numbered_arg *) p1)->number;
171  unsigned int n2 = ((const struct numbered_arg *) p2)->number;
172
173  return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
174}
175
176#define INVALID_C99_MACRO(directive_number) \
177  xasprintf (_("In the directive number %u, the token after '<' is not the name of a format specifier macro. The valid macro names are listed in ISO C 99 section 7.8.1."), directive_number)
178
179static void *
180format_parse (const char *format, bool translated, bool objc_extensions,
181	      char **invalid_reason)
182{
183  struct spec spec;
184  unsigned int numbered_arg_count;
185  struct numbered_arg *numbered;
186  struct spec *result;
187
188  spec.directives = 0;
189  numbered_arg_count = 0;
190  spec.unnumbered_arg_count = 0;
191  spec.allocated = 0;
192  numbered = NULL;
193  spec.unnumbered = NULL;
194  spec.unlikely_intentional = false;
195  spec.sysdep_directives_count = 0;
196  spec.sysdep_directives = NULL;
197
198  for (; *format != '\0';)
199    if (*format++ == '%')
200      {
201	/* A directive.  */
202	unsigned int number = 0;
203	enum format_arg_type type;
204	enum format_arg_type size;
205
206	spec.directives++;
207
208	if (isdigit (*format))
209	  {
210	    const char *f = format;
211	    unsigned int m = 0;
212
213	    do
214	      {
215		m = 10 * m + (*f - '0');
216		f++;
217	      }
218	    while (isdigit (*f));
219
220	    if (*f == '$')
221	      {
222		if (m == 0)
223		  {
224		    *invalid_reason = INVALID_ARGNO_0 (spec.directives);
225		    goto bad_format;
226		  }
227		number = m;
228		format = ++f;
229	      }
230	  }
231
232	/* Parse flags.  */
233	for (;;)
234	  {
235	    if (*format == ' ' || *format == '+' || *format == '-'
236		|| *format == '#' || *format == '0' || *format == '\'')
237	      format++;
238	    else if (translated && *format == 'I')
239	      {
240		spec.sysdep_directives =
241		  (const char **)
242		  xrealloc (spec.sysdep_directives,
243			    2 * (spec.sysdep_directives_count + 1)
244			    * sizeof (const char *));
245		spec.sysdep_directives[2 * spec.sysdep_directives_count] = format;
246		spec.sysdep_directives[2 * spec.sysdep_directives_count + 1] = format + 1;
247		spec.sysdep_directives_count++;
248		format++;
249	      }
250	    else
251	      break;
252	  }
253
254	/* Parse width.  */
255	if (*format == '*')
256	  {
257	    unsigned int width_number = 0;
258
259	    format++;
260
261	    if (isdigit (*format))
262	      {
263		const char *f = format;
264		unsigned int m = 0;
265
266		do
267		  {
268		    m = 10 * m + (*f - '0');
269		    f++;
270		  }
271		while (isdigit (*f));
272
273		if (*f == '$')
274		  {
275		    if (m == 0)
276		      {
277			*invalid_reason =
278			  INVALID_WIDTH_ARGNO_0 (spec.directives);
279			goto bad_format;
280		      }
281		    width_number = m;
282		    format = ++f;
283		  }
284	      }
285
286	    if (width_number)
287	      {
288		/* Numbered argument.  */
289
290		/* Numbered and unnumbered specifications are exclusive.  */
291		if (spec.unnumbered_arg_count > 0)
292		  {
293		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
294		    goto bad_format;
295		  }
296
297		if (spec.allocated == numbered_arg_count)
298		  {
299		    spec.allocated = 2 * spec.allocated + 1;
300		    numbered = (struct numbered_arg *) xrealloc (numbered, spec.allocated * sizeof (struct numbered_arg));
301		  }
302		numbered[numbered_arg_count].number = width_number;
303		numbered[numbered_arg_count].type = FAT_INTEGER;
304		numbered_arg_count++;
305	      }
306	    else
307	      {
308		/* Unnumbered argument.  */
309
310		/* Numbered and unnumbered specifications are exclusive.  */
311		if (numbered_arg_count > 0)
312		  {
313		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
314		    goto bad_format;
315		  }
316
317		if (spec.allocated == spec.unnumbered_arg_count)
318		  {
319		    spec.allocated = 2 * spec.allocated + 1;
320		    spec.unnumbered = (struct unnumbered_arg *) xrealloc (spec.unnumbered, spec.allocated * sizeof (struct unnumbered_arg));
321		  }
322		spec.unnumbered[spec.unnumbered_arg_count].type = FAT_INTEGER;
323		spec.unnumbered_arg_count++;
324	      }
325	  }
326	else if (isdigit (*format))
327	  {
328	    do format++; while (isdigit (*format));
329	  }
330
331	/* Parse precision.  */
332	if (*format == '.')
333	  {
334	    format++;
335
336	    if (*format == '*')
337	      {
338		unsigned int precision_number = 0;
339
340		format++;
341
342		if (isdigit (*format))
343		  {
344		    const char *f = format;
345		    unsigned int m = 0;
346
347		    do
348		      {
349			m = 10 * m + (*f - '0');
350			f++;
351		      }
352		    while (isdigit (*f));
353
354		    if (*f == '$')
355		      {
356			if (m == 0)
357			  {
358			    *invalid_reason =
359			      INVALID_PRECISION_ARGNO_0 (spec.directives);
360			    goto bad_format;
361			  }
362			precision_number = m;
363			format = ++f;
364		      }
365		  }
366
367		if (precision_number)
368		  {
369		    /* Numbered argument.  */
370
371		    /* Numbered and unnumbered specifications are exclusive.  */
372		    if (spec.unnumbered_arg_count > 0)
373		      {
374			*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
375			goto bad_format;
376		      }
377
378		    if (spec.allocated == numbered_arg_count)
379		      {
380			spec.allocated = 2 * spec.allocated + 1;
381			numbered = (struct numbered_arg *) xrealloc (numbered, spec.allocated * sizeof (struct numbered_arg));
382		      }
383		    numbered[numbered_arg_count].number = precision_number;
384		    numbered[numbered_arg_count].type = FAT_INTEGER;
385		    numbered_arg_count++;
386		  }
387		else
388		  {
389		    /* Unnumbered argument.  */
390
391		    /* Numbered and unnumbered specifications are exclusive.  */
392		    if (numbered_arg_count > 0)
393		      {
394			*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
395			goto bad_format;
396		      }
397
398		    if (spec.allocated == spec.unnumbered_arg_count)
399		      {
400			spec.allocated = 2 * spec.allocated + 1;
401			spec.unnumbered = (struct unnumbered_arg *) xrealloc (spec.unnumbered, spec.allocated * sizeof (struct unnumbered_arg));
402		      }
403		    spec.unnumbered[spec.unnumbered_arg_count].type = FAT_INTEGER;
404		    spec.unnumbered_arg_count++;
405		  }
406	      }
407	    else if (isdigit (*format))
408	      {
409		do format++; while (isdigit (*format));
410	      }
411	  }
412
413	if (*format == '<')
414	  {
415	    spec.sysdep_directives =
416	      (const char **)
417	      xrealloc (spec.sysdep_directives,
418			2 * (spec.sysdep_directives_count + 1)
419			* sizeof (const char *));
420	    spec.sysdep_directives[2 * spec.sysdep_directives_count] = format;
421
422	    format++;
423	    /* Parse ISO C 99 section 7.8.1 format string directive.
424	       Syntax:
425	       P R I { d | i | o | u | x | X }
426	       { { | LEAST | FAST } { 8 | 16 | 32 | 64 } | MAX | PTR }  */
427	    if (*format != 'P')
428	      {
429		*invalid_reason = INVALID_C99_MACRO (spec.directives);
430		goto bad_format;
431	      }
432	    format++;
433	    if (*format != 'R')
434	      {
435		*invalid_reason = INVALID_C99_MACRO (spec.directives);
436		goto bad_format;
437	      }
438	    format++;
439	    if (*format != 'I')
440	      {
441		*invalid_reason = INVALID_C99_MACRO (spec.directives);
442		goto bad_format;
443	      }
444	    format++;
445
446	    switch (*format)
447	      {
448	      case 'i': case 'd':
449		type = FAT_INTEGER;
450		break;
451	      case 'u': case 'o': case 'x': case 'X':
452		type = FAT_INTEGER | FAT_UNSIGNED;
453		break;
454	      default:
455		*invalid_reason = INVALID_C99_MACRO (spec.directives);
456		goto bad_format;
457	      }
458	    format++;
459
460	    if (format[0] == 'M' && format[1] == 'A' && format[2] == 'X')
461	      {
462		type |= FAT_SIZE_INTMAX_T;
463		format += 3;
464	      }
465	    else if (format[0] == 'P' && format[1] == 'T' && format[2] == 'R')
466	      {
467		type |= FAT_SIZE_INTPTR_T;
468		format += 3;
469	      }
470	    else
471	      {
472		if (format[0] == 'L' && format[1] == 'E' && format[2] == 'A'
473		    && format[3] == 'S' && format[4] == 'T')
474		  {
475		    format += 5;
476		    if (format[0] == '8')
477		      {
478			type |= FAT_SIZE_LEAST8_T;
479			format++;
480		      }
481		    else if (format[0] == '1' && format[1] == '6')
482		      {
483			type |= FAT_SIZE_LEAST16_T;
484			format += 2;
485		      }
486		    else if (format[0] == '3' && format[1] == '2')
487		      {
488			type |= FAT_SIZE_LEAST32_T;
489			format += 2;
490		      }
491		    else if (format[0] == '6' && format[1] == '4')
492		      {
493			type |= FAT_SIZE_LEAST64_T;
494			format += 2;
495		      }
496		    else
497		      {
498			*invalid_reason = INVALID_C99_MACRO (spec.directives);
499			goto bad_format;
500		      }
501		  }
502		else if (format[0] == 'F' && format[1] == 'A'
503			 && format[2] == 'S' && format[3] == 'T')
504		  {
505		    format += 4;
506		    if (format[0] == '8')
507		      {
508			type |= FAT_SIZE_FAST8_T;
509			format++;
510		      }
511		    else if (format[0] == '1' && format[1] == '6')
512		      {
513			type |= FAT_SIZE_FAST16_T;
514			format += 2;
515		      }
516		    else if (format[0] == '3' && format[1] == '2')
517		      {
518			type |= FAT_SIZE_FAST32_T;
519			format += 2;
520		      }
521		    else if (format[0] == '6' && format[1] == '4')
522		      {
523			type |= FAT_SIZE_FAST64_T;
524			format += 2;
525		      }
526		    else
527		      {
528			*invalid_reason = INVALID_C99_MACRO (spec.directives);
529			goto bad_format;
530		      }
531		  }
532		else
533		  {
534		    if (format[0] == '8')
535		      {
536			type |= FAT_SIZE_8_T;
537			format++;
538		      }
539		    else if (format[0] == '1' && format[1] == '6')
540		      {
541			type |= FAT_SIZE_16_T;
542			format += 2;
543		      }
544		    else if (format[0] == '3' && format[1] == '2')
545		      {
546			type |= FAT_SIZE_32_T;
547			format += 2;
548		      }
549		    else if (format[0] == '6' && format[1] == '4')
550		      {
551			type |= FAT_SIZE_64_T;
552			format += 2;
553		      }
554		    else
555		      {
556			*invalid_reason = INVALID_C99_MACRO (spec.directives);
557			goto bad_format;
558		      }
559		  }
560	      }
561
562	    if (*format != '>')
563	      {
564		*invalid_reason =
565		  xasprintf (_("In the directive number %u, the token after '<' is not followed by '>'."), spec.directives);
566		goto bad_format;
567	      }
568
569	    spec.sysdep_directives[2 * spec.sysdep_directives_count + 1] = format + 1;
570	    spec.sysdep_directives_count++;
571	  }
572	else
573	  {
574	    /* Parse size.  */
575	    size = 0;
576	    for (;; format++)
577	      {
578		if (*format == 'h')
579		  {
580		    if (size & (FAT_SIZE_SHORT | FAT_SIZE_CHAR))
581		      size = FAT_SIZE_CHAR;
582		    else
583		      size = FAT_SIZE_SHORT;
584		  }
585		else if (*format == 'l')
586		  {
587		    if (size & (FAT_SIZE_LONG | FAT_SIZE_LONGLONG))
588		      size = FAT_SIZE_LONGLONG;
589		    else
590		      size = FAT_SIZE_LONG;
591		  }
592		else if (*format == 'L')
593		  size = FAT_SIZE_LONGLONG;
594		else if (*format == 'q')
595		  /* Old BSD 4.4 convention.  */
596		  size = FAT_SIZE_LONGLONG;
597		else if (*format == 'j')
598		  size = FAT_SIZE_INTMAX_T;
599		else if (*format == 'z' || *format == 'Z')
600		  /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
601		     because the warning facility in gcc-2.95.2 understands
602		     only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784).  */
603		  size = FAT_SIZE_SIZE_T;
604		else if (*format == 't')
605		  size = FAT_SIZE_PTRDIFF_T;
606		else
607		  break;
608	      }
609
610	    switch (*format)
611	      {
612	      case '%':
613		/* Programmers writing _("%2%") most often will not want to
614		   use this string as a c-format string, but rather as a
615		   literal or as a different kind of format string.  */
616		if (format[-1] != '%')
617		  spec.unlikely_intentional = true;
618		type = FAT_NONE;
619		break;
620	      case 'm': /* glibc extension */
621		type = FAT_NONE;
622		break;
623	      case 'c':
624		type = FAT_CHAR;
625		type |= (size & (FAT_SIZE_LONG | FAT_SIZE_LONGLONG)
626			 ? FAT_WIDE : 0);
627		break;
628	      case 'C': /* obsolete */
629		type = FAT_CHAR | FAT_WIDE;
630		break;
631	      case 's':
632		type = FAT_STRING;
633		type |= (size & (FAT_SIZE_LONG | FAT_SIZE_LONGLONG)
634			 ? FAT_WIDE : 0);
635		break;
636	      case 'S': /* obsolete */
637		type = FAT_STRING | FAT_WIDE;
638		break;
639	      case 'i': case 'd':
640		type = FAT_INTEGER;
641		type |= (size & FAT_SIZE_MASK);
642		break;
643	      case 'u': case 'o': case 'x': case 'X':
644		type = FAT_INTEGER | FAT_UNSIGNED;
645		type |= (size & FAT_SIZE_MASK);
646		break;
647	      case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
648	      case 'a': case 'A':
649		type = FAT_DOUBLE;
650		type |= (size & FAT_SIZE_LONGLONG);
651		break;
652	      case '@':
653		if (objc_extensions)
654		  {
655		    type = FAT_OBJC_OBJECT;
656		    break;
657		  }
658		goto other;
659	      case 'p':
660		type = FAT_POINTER;
661		break;
662	      case 'n':
663		type = FAT_COUNT_POINTER;
664		type |= (size & FAT_SIZE_MASK);
665		break;
666	      other:
667	      default:
668		*invalid_reason =
669		  (*format == '\0'
670		   ? INVALID_UNTERMINATED_DIRECTIVE ()
671		   : INVALID_CONVERSION_SPECIFIER (spec.directives, *format));
672		goto bad_format;
673	      }
674	  }
675
676	if (type != FAT_NONE)
677	  {
678	    if (number)
679	      {
680		/* Numbered argument.  */
681
682		/* Numbered and unnumbered specifications are exclusive.  */
683		if (spec.unnumbered_arg_count > 0)
684		  {
685		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
686		    goto bad_format;
687		  }
688
689		if (spec.allocated == numbered_arg_count)
690		  {
691		    spec.allocated = 2 * spec.allocated + 1;
692		    numbered = (struct numbered_arg *) xrealloc (numbered, spec.allocated * sizeof (struct numbered_arg));
693		  }
694		numbered[numbered_arg_count].number = number;
695		numbered[numbered_arg_count].type = type;
696		numbered_arg_count++;
697	      }
698	    else
699	      {
700		/* Unnumbered argument.  */
701
702		/* Numbered and unnumbered specifications are exclusive.  */
703		if (numbered_arg_count > 0)
704		  {
705		    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
706		    goto bad_format;
707		  }
708
709		if (spec.allocated == spec.unnumbered_arg_count)
710		  {
711		    spec.allocated = 2 * spec.allocated + 1;
712		    spec.unnumbered = (struct unnumbered_arg *) xrealloc (spec.unnumbered, spec.allocated * sizeof (struct unnumbered_arg));
713		  }
714		spec.unnumbered[spec.unnumbered_arg_count].type = type;
715		spec.unnumbered_arg_count++;
716	      }
717	  }
718
719	format++;
720      }
721
722  /* Sort the numbered argument array, and eliminate duplicates.  */
723  if (numbered_arg_count > 1)
724    {
725      unsigned int i, j;
726      bool err;
727
728      qsort (numbered, numbered_arg_count,
729	     sizeof (struct numbered_arg), numbered_arg_compare);
730
731      /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
732      err = false;
733      for (i = j = 0; i < numbered_arg_count; i++)
734	if (j > 0 && numbered[i].number == numbered[j-1].number)
735	  {
736	    enum format_arg_type type1 = numbered[i].type;
737	    enum format_arg_type type2 = numbered[j-1].type;
738	    enum format_arg_type type_both;
739
740	    if (type1 == type2)
741	      type_both = type1;
742	    else
743	      {
744		/* Incompatible types.  */
745		type_both = FAT_NONE;
746		if (!err)
747		  *invalid_reason =
748		    INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number);
749		err = true;
750	      }
751
752	    numbered[j-1].type = type_both;
753	  }
754	else
755	  {
756	    if (j < i)
757	      {
758		numbered[j].number = numbered[i].number;
759		numbered[j].type = numbered[i].type;
760	      }
761	    j++;
762	  }
763      numbered_arg_count = j;
764      if (err)
765	/* *invalid_reason has already been set above.  */
766	goto bad_format;
767    }
768
769  /* Verify that the format strings uses all arguments up to the highest
770     numbered one.  */
771  if (numbered_arg_count > 0)
772    {
773      unsigned int i;
774
775      for (i = 0; i < numbered_arg_count; i++)
776	if (numbered[i].number != i + 1)
777	  {
778	    *invalid_reason =
779	      xasprintf (_("The string refers to argument number %u but ignores argument number %u."), numbered[i].number, i + 1);
780	    goto bad_format;
781	  }
782
783      /* So now the numbered arguments array is equivalent to a sequence
784	 of unnumbered arguments.  */
785      spec.unnumbered_arg_count = numbered_arg_count;
786      spec.allocated = spec.unnumbered_arg_count;
787      spec.unnumbered = (struct unnumbered_arg *) xmalloc (spec.allocated * sizeof (struct unnumbered_arg));
788      for (i = 0; i < spec.unnumbered_arg_count; i++)
789	spec.unnumbered[i].type = numbered[i].type;
790      free (numbered);
791      numbered_arg_count = 0;
792    }
793
794  result = (struct spec *) xmalloc (sizeof (struct spec));
795  *result = spec;
796  return result;
797
798 bad_format:
799  if (numbered != NULL)
800    free (numbered);
801  if (spec.unnumbered != NULL)
802    free (spec.unnumbered);
803  if (spec.sysdep_directives != NULL)
804    free (spec.sysdep_directives);
805  return NULL;
806}
807
808static void *
809format_c_parse (const char *format, bool translated, char **invalid_reason)
810{
811  return format_parse (format, translated, false, invalid_reason);
812}
813
814static void *
815format_objc_parse (const char *format, bool translated, char **invalid_reason)
816{
817  return format_parse (format, translated, true, invalid_reason);
818}
819
820static void
821format_free (void *descr)
822{
823  struct spec *spec = (struct spec *) descr;
824
825  if (spec->unnumbered != NULL)
826    free (spec->unnumbered);
827  if (spec->sysdep_directives != NULL)
828    free (spec->sysdep_directives);
829  free (spec);
830}
831
832static bool
833format_is_unlikely_intentional (void *descr)
834{
835  struct spec *spec = (struct spec *) descr;
836
837  return spec->unlikely_intentional;
838}
839
840static int
841format_get_number_of_directives (void *descr)
842{
843  struct spec *spec = (struct spec *) descr;
844
845  return spec->directives;
846}
847
848static bool
849format_check (void *msgid_descr, void *msgstr_descr, bool equality,
850	      formatstring_error_logger_t error_logger,
851	      const char *pretty_msgstr)
852{
853  struct spec *spec1 = (struct spec *) msgid_descr;
854  struct spec *spec2 = (struct spec *) msgstr_descr;
855  bool err = false;
856  unsigned int i;
857
858  /* Check the argument types are the same.  */
859  if (equality
860      ? spec1->unnumbered_arg_count != spec2->unnumbered_arg_count
861      : spec1->unnumbered_arg_count < spec2->unnumbered_arg_count)
862    {
863      if (error_logger)
864	error_logger (_("number of format specifications in 'msgid' and '%s' does not match"),
865		      pretty_msgstr);
866      err = true;
867    }
868  else
869    for (i = 0; i < spec2->unnumbered_arg_count; i++)
870      if (spec1->unnumbered[i].type != spec2->unnumbered[i].type)
871	{
872	  if (error_logger)
873	    error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
874			  pretty_msgstr, i + 1);
875	  err = true;
876	}
877
878  return err;
879}
880
881
882struct formatstring_parser formatstring_c =
883{
884  format_c_parse,
885  format_free,
886  format_get_number_of_directives,
887  format_is_unlikely_intentional,
888  format_check
889};
890
891
892struct formatstring_parser formatstring_objc =
893{
894  format_objc_parse,
895  format_free,
896  format_get_number_of_directives,
897  format_is_unlikely_intentional,
898  format_check
899};
900
901
902void
903get_sysdep_c_format_directives (const char *string, bool translated,
904				struct interval **intervalsp, size_t *lengthp)
905{
906  /* Parse the format string with all possible extensions turned on.  (The
907     caller has already verified that the format string is valid for the
908     particular language.)  */
909  char *invalid_reason = NULL;
910  struct spec *descr =
911    (struct spec *) format_parse (string, translated, true, &invalid_reason);
912
913  if (descr != NULL && descr->sysdep_directives_count > 0)
914    {
915      unsigned int n = descr->sysdep_directives_count;
916      struct interval *intervals =
917	(struct interval *) xmalloc (n * sizeof (struct interval));
918      unsigned int i;
919
920      for (i = 0; i < n; i++)
921	{
922	  intervals[i].startpos = descr->sysdep_directives[2 * i] - string;
923	  intervals[i].endpos = descr->sysdep_directives[2 * i + 1] - string;
924	}
925      *intervalsp = intervals;
926      *lengthp = n;
927    }
928  else
929    {
930      *intervalsp = NULL;
931      *lengthp = 0;
932    }
933
934  if (descr != NULL)
935    format_free (descr);
936  else
937    free (invalid_reason);
938}
939
940
941#ifdef TEST
942
943/* Test program: Print the argument list specification returned by
944   format_parse for strings read from standard input.  */
945
946#include <stdio.h>
947#include "getline.h"
948
949static void
950format_print (void *descr)
951{
952  struct spec *spec = (struct spec *) descr;
953  unsigned int i;
954
955  if (spec == NULL)
956    {
957      printf ("INVALID");
958      return;
959    }
960
961  printf ("(");
962  for (i = 0; i < spec->unnumbered_arg_count; i++)
963    {
964      if (i > 0)
965	printf (" ");
966      if (spec->unnumbered[i].type & FAT_UNSIGNED)
967	printf ("[unsigned]");
968      switch (spec->unnumbered[i].type & FAT_SIZE_MASK)
969	{
970	case 0:
971	  break;
972	case FAT_SIZE_SHORT:
973	  printf ("[short]");
974	  break;
975	case FAT_SIZE_CHAR:
976	  printf ("[char]");
977	  break;
978	case FAT_SIZE_LONG:
979	  printf ("[long]");
980	  break;
981	case FAT_SIZE_LONGLONG:
982	  printf ("[long long]");
983	  break;
984	case FAT_SIZE_8_T:
985	  printf ("[int8_t]");
986	  break;
987	case FAT_SIZE_16_T:
988	  printf ("[int16_t]");
989	  break;
990	case FAT_SIZE_32_T:
991	  printf ("[int32_t]");
992	  break;
993	case FAT_SIZE_64_T:
994	  printf ("[int64_t]");
995	  break;
996	case FAT_SIZE_LEAST8_T:
997	  printf ("[int_least8_t]");
998	  break;
999	case FAT_SIZE_LEAST16_T:
1000	  printf ("[int_least16_t]");
1001	  break;
1002	case FAT_SIZE_LEAST32_T:
1003	  printf ("[int_least32_t]");
1004	  break;
1005	case FAT_SIZE_LEAST64_T:
1006	  printf ("[int_least64_t]");
1007	  break;
1008	case FAT_SIZE_FAST8_T:
1009	  printf ("[int_fast8_t]");
1010	  break;
1011	case FAT_SIZE_FAST16_T:
1012	  printf ("[int_fast16_t]");
1013	  break;
1014	case FAT_SIZE_FAST32_T:
1015	  printf ("[int_fast32_t]");
1016	  break;
1017	case FAT_SIZE_FAST64_T:
1018	  printf ("[int_fast64_t]");
1019	  break;
1020	case FAT_SIZE_INTMAX_T:
1021	  printf ("[intmax_t]");
1022	  break;
1023	case FAT_SIZE_INTPTR_T:
1024	  printf ("[intptr_t]");
1025	  break;
1026	case FAT_SIZE_SIZE_T:
1027	  printf ("[size_t]");
1028	  break;
1029	case FAT_SIZE_PTRDIFF_T:
1030	  printf ("[ptrdiff_t]");
1031	  break;
1032	default:
1033	  abort ();
1034	}
1035      switch (spec->unnumbered[i].type & ~(FAT_UNSIGNED | FAT_SIZE_MASK))
1036	{
1037	case FAT_INTEGER:
1038	  printf ("i");
1039	  break;
1040	case FAT_DOUBLE:
1041	  printf ("f");
1042	  break;
1043	case FAT_CHAR:
1044	  printf ("c");
1045	  break;
1046	case FAT_STRING:
1047	  printf ("s");
1048	  break;
1049	case FAT_OBJC_OBJECT:
1050	  printf ("@");
1051	  break;
1052	case FAT_POINTER:
1053	  printf ("p");
1054	  break;
1055	case FAT_COUNT_POINTER:
1056	  printf ("n");
1057	  break;
1058	default:
1059	  abort ();
1060	}
1061    }
1062  printf (")");
1063}
1064
1065int
1066main ()
1067{
1068  for (;;)
1069    {
1070      char *line = NULL;
1071      size_t line_size = 0;
1072      int line_len;
1073      char *invalid_reason;
1074      void *descr;
1075
1076      line_len = getline (&line, &line_size, stdin);
1077      if (line_len < 0)
1078	break;
1079      if (line_len > 0 && line[line_len - 1] == '\n')
1080	line[--line_len] = '\0';
1081
1082      invalid_reason = NULL;
1083      descr = format_c_parse (line, false, &invalid_reason);
1084
1085      format_print (descr);
1086      printf ("\n");
1087      if (descr == NULL)
1088	printf ("%s\n", invalid_reason);
1089
1090      free (invalid_reason);
1091      free (line);
1092    }
1093
1094  return 0;
1095}
1096
1097/*
1098 * For Emacs M-x compile
1099 * Local Variables:
1100 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-c.c ../lib/libgettextlib.la"
1101 * End:
1102 */
1103
1104#endif /* TEST */
1105