1/* Python format strings.
2   Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc.
3   Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4
5   This program is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program; if not, write to the Free Software Foundation,
17   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18
19#ifdef HAVE_CONFIG_H
20# include <config.h>
21#endif
22
23#include <stdbool.h>
24#include <stdlib.h>
25#include <string.h>
26
27#include "format.h"
28#include "c-ctype.h"
29#include "xalloc.h"
30#include "xvasprintf.h"
31#include "format-invalid.h"
32#include "gettext.h"
33
34#define _(str) gettext (str)
35
36/* Python format strings are described in
37     Python Library reference
38     2. Built-in Types, Exceptions and Functions
39     2.1. Built-in Types
40     2.1.5. Sequence Types
41     2.1.5.2. String Formatting Operations
42   Any string or Unicode string can act as format string via the '%' operator,
43   implemented in stringobject.c and unicodeobject.c.
44   A directive
45   - starts with '%'
46   - is optionally followed by '(ident)' where ident is any sequence of
47     characters with balanced left and right parentheses,
48   - is optionally followed by any of the characters '-' (left justification),
49     '+' (sign), ' ' (blank), '#' (alt), '0' (zero), each of which acts as a
50     flag,
51   - is optionally followed by a width specification: '*' (reads an argument)
52     or a nonempty digit sequence,
53   - is optionally followed by '.' and a precision specification: '*' (reads
54     an argument) or a nonempty digit sequence,
55   - is optionally followed by a size specifier, one of 'h' 'l' 'L'.
56   - is finished by a specifier
57       - '%', that needs no argument,
58       - 'c', that needs a character argument,
59       - 's', 'r', that need a string argument,
60       - 'i', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
61       - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
62   Use of '(ident)' and use of unnamed argument specifications are exclusive,
63   because the first requires a mapping as argument, while the second requires
64   a tuple as argument.
65 */
66
67enum format_arg_type
68{
69  FAT_NONE,
70  FAT_ANY,
71  FAT_CHARACTER,
72  FAT_STRING,
73  FAT_INTEGER,
74  FAT_FLOAT
75};
76
77struct named_arg
78{
79  char *name;
80  enum format_arg_type type;
81};
82
83struct unnamed_arg
84{
85  enum format_arg_type type;
86};
87
88struct spec
89{
90  unsigned int directives;
91  unsigned int named_arg_count;
92  unsigned int unnamed_arg_count;
93  unsigned int allocated;
94  struct named_arg *named;
95  struct unnamed_arg *unnamed;
96};
97
98/* Locale independent test for a decimal digit.
99   Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
100   <ctype.h> isdigit must be an 'unsigned char'.)  */
101#undef isdigit
102#define isdigit(c) ((unsigned int) ((c) - '0') < 10)
103
104
105static int
106named_arg_compare (const void *p1, const void *p2)
107{
108  return strcmp (((const struct named_arg *) p1)->name,
109		 ((const struct named_arg *) p2)->name);
110}
111
112#define INVALID_MIXES_NAMED_UNNAMED() \
113  xstrdup (_("The string refers to arguments both through argument names and through unnamed argument specifications."))
114
115static void *
116format_parse (const char *format, bool translated, char **invalid_reason)
117{
118  struct spec spec;
119  struct spec *result;
120
121  spec.directives = 0;
122  spec.named_arg_count = 0;
123  spec.unnamed_arg_count = 0;
124  spec.allocated = 0;
125  spec.named = NULL;
126  spec.unnamed = NULL;
127
128  for (; *format != '\0';)
129    if (*format++ == '%')
130      {
131	/* A directive.  */
132	char *name = NULL;
133	enum format_arg_type type;
134
135	spec.directives++;
136
137	if (*format == '(')
138	  {
139	    unsigned int depth;
140	    const char *name_start;
141	    const char *name_end;
142	    size_t n;
143
144	    name_start = ++format;
145	    depth = 0;
146	    for (; *format != '\0'; format++)
147	      {
148		if (*format == '(')
149		  depth++;
150		else if (*format == ')')
151		  {
152		    if (depth == 0)
153		      break;
154		    else
155		      depth--;
156		  }
157	      }
158	    if (*format == '\0')
159	      {
160		*invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
161		goto bad_format;
162	      }
163	    name_end = format++;
164
165	    n = name_end - name_start;
166	    name = (char *) xmalloc (n + 1);
167	    memcpy (name, name_start, n);
168	    name[n] = '\0';
169	  }
170
171	while (*format == '-' || *format == '+' || *format == ' '
172	       || *format == '#' || *format == '0')
173	  format++;
174
175	if (*format == '*')
176	  {
177	    format++;
178
179	    /* Named and unnamed specifications are exclusive.  */
180	    if (spec.named_arg_count > 0)
181	      {
182		*invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
183		goto bad_format;
184	      }
185
186	    if (spec.allocated == spec.unnamed_arg_count)
187	      {
188		spec.allocated = 2 * spec.allocated + 1;
189		spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
190	      }
191	    spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
192	    spec.unnamed_arg_count++;
193	  }
194	else if (isdigit (*format))
195	  {
196	    do format++; while (isdigit (*format));
197	  }
198
199	if (*format == '.')
200	  {
201	    format++;
202
203	    if (*format == '*')
204	      {
205		format++;
206
207		/* Named and unnamed specifications are exclusive.  */
208		if (spec.named_arg_count > 0)
209		  {
210		    *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
211		    goto bad_format;
212		  }
213
214		if (spec.allocated == spec.unnamed_arg_count)
215		  {
216		    spec.allocated = 2 * spec.allocated + 1;
217		    spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
218		  }
219		spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
220		spec.unnamed_arg_count++;
221	      }
222	    else if (isdigit (*format))
223	      {
224		do format++; while (isdigit (*format));
225	      }
226	  }
227
228	if (*format == 'h' || *format == 'l' || *format == 'L')
229	  format++;
230
231	switch (*format)
232	  {
233	  case '%':
234	    type = FAT_ANY;
235	    break;
236	  case 'c':
237	    type = FAT_CHARACTER;
238	    break;
239	  case 's': case 'r':
240	    type = FAT_STRING;
241	    break;
242	  case 'i': case 'd': case 'u': case 'o': case 'x': case 'X':
243	    type = FAT_INTEGER;
244	    break;
245	  case 'e': case 'E': case 'f': case 'g': case 'G':
246	    type = FAT_FLOAT;
247	    break;
248	  default:
249	    *invalid_reason =
250	      (*format == '\0'
251	       ? INVALID_UNTERMINATED_DIRECTIVE ()
252	       : INVALID_CONVERSION_SPECIFIER (spec.directives, *format));
253	    goto bad_format;
254	  }
255
256	if (name != NULL)
257	  {
258	    /* Named argument.  */
259
260	    /* Named and unnamed specifications are exclusive.  */
261	    if (spec.unnamed_arg_count > 0)
262	      {
263		*invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
264		goto bad_format;
265	      }
266
267	    if (spec.allocated == spec.named_arg_count)
268	      {
269		spec.allocated = 2 * spec.allocated + 1;
270		spec.named = (struct named_arg *) xrealloc (spec.named, spec.allocated * sizeof (struct named_arg));
271	      }
272	    spec.named[spec.named_arg_count].name = name;
273	    spec.named[spec.named_arg_count].type = type;
274	    spec.named_arg_count++;
275	  }
276	else if (*format != '%')
277	  {
278	    /* Unnamed argument.  */
279
280	    /* Named and unnamed specifications are exclusive.  */
281	    if (spec.named_arg_count > 0)
282	      {
283		*invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
284		goto bad_format;
285	      }
286
287	    if (spec.allocated == spec.unnamed_arg_count)
288	      {
289		spec.allocated = 2 * spec.allocated + 1;
290		spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, spec.allocated * sizeof (struct unnamed_arg));
291	      }
292	    spec.unnamed[spec.unnamed_arg_count].type = type;
293	    spec.unnamed_arg_count++;
294	  }
295
296	format++;
297      }
298
299  /* Sort the named argument array, and eliminate duplicates.  */
300  if (spec.named_arg_count > 1)
301    {
302      unsigned int i, j;
303      bool err;
304
305      qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
306	     named_arg_compare);
307
308      /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
309      err = false;
310      for (i = j = 0; i < spec.named_arg_count; i++)
311	if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
312	  {
313	    enum format_arg_type type1 = spec.named[i].type;
314	    enum format_arg_type type2 = spec.named[j-1].type;
315	    enum format_arg_type type_both;
316
317	    if (type1 == type2 || type2 == FAT_ANY)
318	      type_both = type1;
319	    else if (type1 == FAT_ANY)
320	      type_both = type2;
321	    else
322	      {
323		/* Incompatible types.  */
324		type_both = FAT_NONE;
325		if (!err)
326		  *invalid_reason =
327		    xasprintf (_("The string refers to the argument named '%s' in incompatible ways."), spec.named[i].name);
328		err = true;
329	      }
330
331	    spec.named[j-1].type = type_both;
332	    free (spec.named[i].name);
333	  }
334	else
335	  {
336	    if (j < i)
337	      {
338		spec.named[j].name = spec.named[i].name;
339		spec.named[j].type = spec.named[i].type;
340	      }
341	    j++;
342	  }
343      spec.named_arg_count = j;
344      if (err)
345	/* *invalid_reason has already been set above.  */
346	goto bad_format;
347    }
348
349  result = (struct spec *) xmalloc (sizeof (struct spec));
350  *result = spec;
351  return result;
352
353 bad_format:
354  if (spec.named != NULL)
355    {
356      unsigned int i;
357      for (i = 0; i < spec.named_arg_count; i++)
358	free (spec.named[i].name);
359      free (spec.named);
360    }
361  if (spec.unnamed != NULL)
362    free (spec.unnamed);
363  return NULL;
364}
365
366static void
367format_free (void *descr)
368{
369  struct spec *spec = (struct spec *) descr;
370
371  if (spec->named != NULL)
372    {
373      unsigned int i;
374      for (i = 0; i < spec->named_arg_count; i++)
375	free (spec->named[i].name);
376      free (spec->named);
377    }
378  if (spec->unnamed != NULL)
379    free (spec->unnamed);
380  free (spec);
381}
382
383static int
384format_get_number_of_directives (void *descr)
385{
386  struct spec *spec = (struct spec *) descr;
387
388  return spec->directives;
389}
390
391static bool
392format_check (void *msgid_descr, void *msgstr_descr, bool equality,
393	      formatstring_error_logger_t error_logger,
394	      const char *pretty_msgstr)
395{
396  struct spec *spec1 = (struct spec *) msgid_descr;
397  struct spec *spec2 = (struct spec *) msgstr_descr;
398  bool err = false;
399
400  if (spec1->named_arg_count > 0 && spec2->unnamed_arg_count > 0)
401    {
402      if (error_logger)
403	error_logger (_("format specifications in 'msgid' expect a mapping, those in '%s' expect a tuple"),
404		      pretty_msgstr);
405      err = true;
406    }
407  else if (spec1->unnamed_arg_count > 0 && spec2->named_arg_count > 0)
408    {
409      if (error_logger)
410	error_logger (_("format specifications in 'msgid' expect a tuple, those in '%s' expect a mapping"),
411		      pretty_msgstr);
412      err = true;
413    }
414  else
415    {
416      if (spec1->named_arg_count + spec2->named_arg_count > 0)
417	{
418	  unsigned int i, j;
419	  unsigned int n1 = spec1->named_arg_count;
420	  unsigned int n2 = spec2->named_arg_count;
421
422	  /* Check the argument names are the same.
423	     Both arrays are sorted.  We search for the first difference.  */
424	  for (i = 0, j = 0; i < n1 || j < n2; )
425	    {
426	      int cmp = (i >= n1 ? 1 :
427			 j >= n2 ? -1 :
428			 strcmp (spec1->named[i].name, spec2->named[j].name));
429
430	      if (cmp > 0)
431		{
432		  if (error_logger)
433		    error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in 'msgid'"),
434				  spec2->named[j].name, pretty_msgstr);
435		  err = true;
436		  break;
437		}
438	      else if (cmp < 0)
439		{
440		  if (equality)
441		    {
442		      if (error_logger)
443			error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
444				      spec1->named[i].name, pretty_msgstr);
445		      err = true;
446		      break;
447		    }
448		  else
449		    i++;
450		}
451	      else
452		j++, i++;
453	    }
454	  /* Check the argument types are the same.  */
455	  if (!err)
456	    for (i = 0, j = 0; j < n2; )
457	      {
458		if (strcmp (spec1->named[i].name, spec2->named[j].name) == 0)
459		  {
460		    if (spec1->named[i].type != spec2->named[j].type)
461		      {
462			if (error_logger)
463			  error_logger (_("format specifications in 'msgid' and '%s' for argument '%s' are not the same"),
464					pretty_msgstr, spec2->named[j].name);
465			err = true;
466			break;
467		      }
468		    j++, i++;
469		  }
470		else
471		  i++;
472	      }
473	}
474
475      if (spec1->unnamed_arg_count + spec2->unnamed_arg_count > 0)
476	{
477	  unsigned int i;
478
479	  /* Check the argument types are the same.  */
480	  if (equality
481	      ? spec1->unnamed_arg_count != spec2->unnamed_arg_count
482	      : spec1->unnamed_arg_count < spec2->unnamed_arg_count)
483	    {
484	      if (error_logger)
485		error_logger (_("number of format specifications in 'msgid' and '%s' does not match"),
486			      pretty_msgstr);
487	      err = true;
488	    }
489	  else
490	    for (i = 0; i < spec2->unnamed_arg_count; i++)
491	      if (spec1->unnamed[i].type != spec2->unnamed[i].type)
492		{
493		  if (error_logger)
494		    error_logger (_("format specifications in 'msgid' and '%s' for argument %u are not the same"),
495				  pretty_msgstr, i + 1);
496		  err = true;
497		}
498	}
499    }
500
501  return err;
502}
503
504
505struct formatstring_parser formatstring_python =
506{
507  format_parse,
508  format_free,
509  format_get_number_of_directives,
510  NULL,
511  format_check
512};
513
514
515unsigned int
516get_python_format_unnamed_arg_count (const char *string)
517{
518  /* Parse the format string.  */
519  char *invalid_reason = NULL;
520  struct spec *descr =
521    (struct spec *) format_parse (string, false, &invalid_reason);
522
523  if (descr != NULL)
524    {
525      unsigned int result = descr->unnamed_arg_count;
526
527      format_free (descr);
528      return result;
529    }
530  else
531    {
532      free (invalid_reason);
533      return 0;
534    }
535}
536
537
538#ifdef TEST
539
540/* Test program: Print the argument list specification returned by
541   format_parse for strings read from standard input.  */
542
543#include <stdio.h>
544#include "getline.h"
545
546static void
547format_print (void *descr)
548{
549  struct spec *spec = (struct spec *) descr;
550  unsigned int i;
551
552  if (spec == NULL)
553    {
554      printf ("INVALID");
555      return;
556    }
557
558  if (spec->named_arg_count > 0)
559    {
560      if (spec->unnamed_arg_count > 0)
561	abort ();
562
563      printf ("{");
564      for (i = 0; i < spec->named_arg_count; i++)
565	{
566	  if (i > 0)
567	    printf (", ");
568	  printf ("'%s':", spec->named[i].name);
569	  switch (spec->named[i].type)
570	    {
571	    case FAT_ANY:
572	      printf ("*");
573	      break;
574	    case FAT_CHARACTER:
575	      printf ("c");
576	      break;
577	    case FAT_STRING:
578	      printf ("s");
579	      break;
580	    case FAT_INTEGER:
581	      printf ("i");
582	      break;
583	    case FAT_FLOAT:
584	      printf ("f");
585	      break;
586	    default:
587	      abort ();
588	    }
589	}
590      printf ("}");
591    }
592  else
593    {
594      printf ("(");
595      for (i = 0; i < spec->unnamed_arg_count; i++)
596	{
597	  if (i > 0)
598	    printf (" ");
599	  switch (spec->unnamed[i].type)
600	    {
601	    case FAT_ANY:
602	      printf ("*");
603	      break;
604	    case FAT_CHARACTER:
605	      printf ("c");
606	      break;
607	    case FAT_STRING:
608	      printf ("s");
609	      break;
610	    case FAT_INTEGER:
611	      printf ("i");
612	      break;
613	    case FAT_FLOAT:
614	      printf ("f");
615	      break;
616	    default:
617	      abort ();
618	    }
619	}
620      printf (")");
621    }
622}
623
624int
625main ()
626{
627  for (;;)
628    {
629      char *line = NULL;
630      size_t line_size = 0;
631      int line_len;
632      char *invalid_reason;
633      void *descr;
634
635      line_len = getline (&line, &line_size, stdin);
636      if (line_len < 0)
637	break;
638      if (line_len > 0 && line[line_len - 1] == '\n')
639	line[--line_len] = '\0';
640
641      invalid_reason = NULL;
642      descr = format_parse (line, false, &invalid_reason);
643
644      format_print (descr);
645      printf ("\n");
646      if (descr == NULL)
647	printf ("%s\n", invalid_reason);
648
649      free (invalid_reason);
650      free (line);
651    }
652
653  return 0;
654}
655
656/*
657 * For Emacs M-x compile
658 * Local Variables:
659 * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-python.c ../lib/libgettextlib.la"
660 * End:
661 */
662
663#endif /* TEST */
664