1/* Parse a printf-style format string.
2
3   Copyright (C) 1986-2023 Free Software Foundation, Inc.
4
5   This file is part of GDB.
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3 of the License, or
10   (at your option) any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19
20#include "common-defs.h"
21#include "format.h"
22
23format_pieces::format_pieces (const char **arg, bool gdb_extensions)
24{
25  const char *s;
26  const char *string;
27  const char *prev_start;
28  const char *percent_loc;
29  char *sub_start, *current_substring;
30  enum argclass this_argclass;
31
32  s = *arg;
33
34  if (gdb_extensions)
35    {
36      string = *arg;
37      *arg += strlen (*arg);
38    }
39  else
40    {
41      /* Parse the format-control string and copy it into the string STRING,
42	 processing some kinds of escape sequence.  */
43
44      char *f = (char *) alloca (strlen (s) + 1);
45      string = f;
46
47      while ((gdb_extensions || *s != '"') && *s != '\0')
48	{
49	  int c = *s++;
50	  switch (c)
51	    {
52	    case '\0':
53	      continue;
54
55	    case '\\':
56	      switch (c = *s++)
57		{
58		case '\\':
59		  *f++ = '\\';
60		  break;
61		case 'a':
62		  *f++ = '\a';
63		  break;
64		case 'b':
65		  *f++ = '\b';
66		  break;
67		case 'e':
68		  *f++ = '\e';
69		  break;
70		case 'f':
71		  *f++ = '\f';
72		  break;
73		case 'n':
74		  *f++ = '\n';
75		  break;
76		case 'r':
77		  *f++ = '\r';
78		  break;
79		case 't':
80		  *f++ = '\t';
81		  break;
82		case 'v':
83		  *f++ = '\v';
84		  break;
85		case '"':
86		  *f++ = '"';
87		  break;
88		default:
89		  /* ??? TODO: handle other escape sequences.  */
90		  error (_("Unrecognized escape character \\%c in format string."),
91			 c);
92		}
93	      break;
94
95	    default:
96	      *f++ = c;
97	    }
98	}
99
100      /* Terminate our escape-processed copy.  */
101      *f++ = '\0';
102
103      /* Whether the format string ended with double-quote or zero, we're
104	 done with it; it's up to callers to complain about syntax.  */
105      *arg = s;
106    }
107
108  /* Need extra space for the '\0's.  Doubling the size is sufficient.  */
109
110  current_substring = (char *) xmalloc (strlen (string) * 2 + 1000);
111  m_storage.reset (current_substring);
112
113  /* Now scan the string for %-specs and see what kinds of args they want.
114     argclass classifies the %-specs so we can give printf-type functions
115     something of the right size.  */
116
117  const char *f = string;
118  prev_start = string;
119  while (*f)
120    if (*f++ == '%')
121      {
122	int seen_hash = 0, seen_zero = 0, lcount = 0, seen_prec = 0;
123	int seen_space = 0, seen_plus = 0;
124	int seen_big_l = 0, seen_h = 0, seen_big_h = 0;
125	int seen_big_d = 0, seen_double_big_d = 0;
126	int seen_size_t = 0;
127	int bad = 0;
128	int n_int_args = 0;
129	bool seen_i64 = false;
130
131	/* Skip over "%%", it will become part of a literal piece.  */
132	if (*f == '%')
133	  {
134	    f++;
135	    continue;
136	  }
137
138	sub_start = current_substring;
139
140	strncpy (current_substring, prev_start, f - 1 - prev_start);
141	current_substring += f - 1 - prev_start;
142	*current_substring++ = '\0';
143
144	if (*sub_start != '\0')
145	  m_pieces.emplace_back (sub_start, literal_piece, 0);
146
147	percent_loc = f - 1;
148
149	/* Check the validity of the format specifier, and work
150	   out what argument it expects.  We only accept C89
151	   format strings, with the exception of long long (which
152	   we autoconf for).  */
153
154	/* The first part of a format specifier is a set of flag
155	   characters.  */
156	while (*f != '\0' && strchr ("0-+ #", *f))
157	  {
158	    if (*f == '#')
159	      seen_hash = 1;
160	    else if (*f == '0')
161	      seen_zero = 1;
162	    else if (*f == ' ')
163	      seen_space = 1;
164	    else if (*f == '+')
165	      seen_plus = 1;
166	    f++;
167	  }
168
169	/* The next part of a format specifier is a width.  */
170	if (gdb_extensions && *f == '*')
171	  {
172	    ++f;
173	    ++n_int_args;
174	  }
175	else
176	  {
177	    while (*f != '\0' && strchr ("0123456789", *f))
178	      f++;
179	  }
180
181	/* The next part of a format specifier is a precision.  */
182	if (*f == '.')
183	  {
184	    seen_prec = 1;
185	    f++;
186	    if (gdb_extensions && *f == '*')
187	      {
188		++f;
189		++n_int_args;
190	      }
191	    else
192	      {
193		while (*f != '\0' && strchr ("0123456789", *f))
194		  f++;
195	      }
196	  }
197
198	/* The next part of a format specifier is a length modifier.  */
199	switch (*f)
200	  {
201	  case 'h':
202	    seen_h = 1;
203	    f++;
204	    break;
205	  case 'l':
206	    f++;
207	    lcount++;
208	    if (*f == 'l')
209	      {
210		f++;
211		lcount++;
212	      }
213	    break;
214	  case 'L':
215	    seen_big_l = 1;
216	    f++;
217	    break;
218	  case 'H':
219	    /* Decimal32 modifier.  */
220	    seen_big_h = 1;
221	    f++;
222	    break;
223	  case 'D':
224	    /* Decimal64 and Decimal128 modifiers.  */
225	    f++;
226
227	    /* Check for a Decimal128.  */
228	    if (*f == 'D')
229	      {
230		f++;
231		seen_double_big_d = 1;
232	      }
233	    else
234	      seen_big_d = 1;
235	    break;
236	  case 'z':
237	    /* For size_t or ssize_t.  */
238	    seen_size_t = 1;
239	    f++;
240	    break;
241	  case 'I':
242	    /* Support the Windows '%I64' extension, because an
243	       earlier call to format_pieces might have converted %lld
244	       to %I64d.  */
245	    if (f[1] == '6' && f[2] == '4')
246	      {
247		f += 3;
248		lcount = 2;
249		seen_i64 = true;
250	      }
251	    break;
252	}
253
254	switch (*f)
255	  {
256	  case 'u':
257	    if (seen_hash)
258	      bad = 1;
259	    /* FALLTHROUGH */
260
261	  case 'o':
262	  case 'x':
263	  case 'X':
264	    if (seen_space || seen_plus)
265	      bad = 1;
266	  /* FALLTHROUGH */
267
268	  case 'd':
269	  case 'i':
270	    if (seen_size_t)
271	      this_argclass = size_t_arg;
272	    else if (lcount == 0)
273	      this_argclass = int_arg;
274	    else if (lcount == 1)
275	      this_argclass = long_arg;
276	    else
277	      this_argclass = long_long_arg;
278
279	    if (seen_big_l)
280	      bad = 1;
281	    break;
282
283	  case 'c':
284	    this_argclass = lcount == 0 ? int_arg : wide_char_arg;
285	    if (lcount > 1 || seen_h || seen_big_l)
286	      bad = 1;
287	    if (seen_prec || seen_zero || seen_space || seen_plus)
288	      bad = 1;
289	    break;
290
291	  case 'p':
292	    this_argclass = ptr_arg;
293	    if (lcount || seen_h || seen_big_l)
294	      bad = 1;
295	    if (seen_prec)
296	      bad = 1;
297	    if (seen_hash || seen_zero || seen_space || seen_plus)
298	      bad = 1;
299
300	    if (gdb_extensions)
301	      {
302		switch (f[1])
303		  {
304		  case 's':
305		  case 'F':
306		  case '[':
307		  case ']':
308		    f++;
309		    break;
310		  }
311	      }
312
313	    break;
314
315	  case 's':
316	    this_argclass = lcount == 0 ? string_arg : wide_string_arg;
317	    if (lcount > 1 || seen_h || seen_big_l)
318	      bad = 1;
319	    if (seen_zero || seen_space || seen_plus)
320	      bad = 1;
321	    break;
322
323	  case 'e':
324	  case 'f':
325	  case 'g':
326	  case 'E':
327	  case 'G':
328	    if (seen_double_big_d)
329	      this_argclass = dec128float_arg;
330	    else if (seen_big_d)
331	      this_argclass = dec64float_arg;
332	    else if (seen_big_h)
333	      this_argclass = dec32float_arg;
334	    else if (seen_big_l)
335	      this_argclass = long_double_arg;
336	    else
337	      this_argclass = double_arg;
338
339	    if (lcount || seen_h)
340	      bad = 1;
341	    break;
342
343	  case '*':
344	    error (_("`*' not supported for precision or width in printf"));
345
346	  case 'n':
347	    error (_("Format specifier `n' not supported in printf"));
348
349	  case '\0':
350	    error (_("Incomplete format specifier at end of format string"));
351
352	  default:
353	    error (_("Unrecognized format specifier '%c' in printf"), *f);
354	  }
355
356	if (bad)
357	  error (_("Inappropriate modifiers to "
358		   "format specifier '%c' in printf"),
359		 *f);
360
361	f++;
362
363	sub_start = current_substring;
364
365	if (lcount > 1 && !seen_i64 && USE_PRINTF_I64)
366	  {
367	    /* Windows' printf does support long long, but not the usual way.
368	       Convert %lld to %I64d.  */
369	    int length_before_ll = f - percent_loc - 1 - lcount;
370
371	    strncpy (current_substring, percent_loc, length_before_ll);
372	    strcpy (current_substring + length_before_ll, "I64");
373	    current_substring[length_before_ll + 3] =
374	      percent_loc[length_before_ll + lcount];
375	    current_substring += length_before_ll + 4;
376	  }
377	else if (this_argclass == wide_string_arg
378		 || this_argclass == wide_char_arg)
379	  {
380	    /* Convert %ls or %lc to %s.  */
381	    int length_before_ls = f - percent_loc - 2;
382
383	    strncpy (current_substring, percent_loc, length_before_ls);
384	    strcpy (current_substring + length_before_ls, "s");
385	    current_substring += length_before_ls + 2;
386	  }
387	else
388	  {
389	    strncpy (current_substring, percent_loc, f - percent_loc);
390	    current_substring += f - percent_loc;
391	  }
392
393	*current_substring++ = '\0';
394
395	prev_start = f;
396
397	m_pieces.emplace_back (sub_start, this_argclass, n_int_args);
398      }
399
400  /* Record the remainder of the string.  */
401
402  if (f > prev_start)
403    {
404      sub_start = current_substring;
405
406      strncpy (current_substring, prev_start, f - prev_start);
407      current_substring += f - prev_start;
408      *current_substring++ = '\0';
409
410      m_pieces.emplace_back (sub_start, literal_piece, 0);
411    }
412}
413