1/* Copyright (C) 2002-2022 Free Software Foundation, Inc.
2   Contributed by Andy Vaught
3   F2003 I/O support contributed by Jerry DeLisle
4
5This file is part of the GNU Fortran runtime library (libgfortran).
6
7Libgfortran is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or (at your option)
10any later version.
11
12Libgfortran is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
20
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24<http://www.gnu.org/licenses/>.  */
25
26
27/* format.c-- parse a FORMAT string into a binary format suitable for
28   interpretation during I/O statements.  */
29
30#include "io.h"
31#include "format.h"
32#include <string.h>
33
34
35static const fnode colon_node = { FMT_COLON, 0, NULL, NULL, {{ 0, 0, 0 }}, 0,
36				  NULL };
37
38/* Error messages. */
39
40static const char posint_required[] = "Positive integer required in format",
41  period_required[] = "Period required in format",
42  nonneg_required[] = "Nonnegative width required in format",
43  unexpected_element[] = "Unexpected element '%c' in format\n",
44  unexpected_end[] = "Unexpected end of format string",
45  bad_string[] = "Unterminated character constant in format",
46  bad_hollerith[] = "Hollerith constant extends past the end of the format",
47  reversion_error[] = "Exhausted data descriptors in format",
48  zero_width[] = "Zero width in format descriptor";
49
50/* The following routines support caching format data from parsed format strings
51   into a hash table.  This avoids repeatedly parsing duplicate format strings
52   or format strings in I/O statements that are repeated in loops.  */
53
54
55/* Traverse the table and free all data.  */
56
57void
58free_format_hash_table (gfc_unit *u)
59{
60  size_t i;
61
62  /* free_format_data handles any NULL pointers.  */
63  for (i = 0; i < FORMAT_HASH_SIZE; i++)
64    {
65      if (u->format_hash_table[i].hashed_fmt != NULL)
66	{
67	  free_format_data (u->format_hash_table[i].hashed_fmt);
68	  free (u->format_hash_table[i].key);
69	}
70      u->format_hash_table[i].key = NULL;
71      u->format_hash_table[i].key_len = 0;
72      u->format_hash_table[i].hashed_fmt = NULL;
73    }
74}
75
76/* Traverse the format_data structure and reset the fnode counters.  */
77
78static void
79reset_node (fnode *fn)
80{
81  fnode *f;
82
83  fn->count = 0;
84  fn->current = NULL;
85
86  if (fn->format != FMT_LPAREN)
87    return;
88
89  for (f = fn->u.child; f; f = f->next)
90    {
91      if (f->format == FMT_RPAREN)
92	break;
93      reset_node (f);
94    }
95}
96
97static void
98reset_fnode_counters (st_parameter_dt *dtp)
99{
100  fnode *f;
101  format_data *fmt;
102
103  fmt = dtp->u.p.fmt;
104
105  /* Clear this pointer at the head so things start at the right place.  */
106  fmt->array.array[0].current = NULL;
107
108  for (f = fmt->array.array[0].u.child; f; f = f->next)
109    reset_node (f);
110}
111
112
113/* A simple hashing function to generate an index into the hash table.  */
114
115static uint32_t
116format_hash (st_parameter_dt *dtp)
117{
118  char *key;
119  gfc_charlen_type key_len;
120  uint32_t hash = 0;
121  gfc_charlen_type i;
122
123  /* Hash the format string. Super simple, but what the heck!  */
124  key = dtp->format;
125  key_len = dtp->format_len;
126  for (i = 0; i < key_len; i++)
127    hash ^= key[i];
128  hash &= (FORMAT_HASH_SIZE - 1);
129  return hash;
130}
131
132
133static void
134save_parsed_format (st_parameter_dt *dtp)
135{
136  uint32_t hash;
137  gfc_unit *u;
138
139  hash = format_hash (dtp);
140  u = dtp->u.p.current_unit;
141
142  /* Index into the hash table.  We are simply replacing whatever is there
143     relying on probability.  */
144  if (u->format_hash_table[hash].hashed_fmt != NULL)
145    free_format_data (u->format_hash_table[hash].hashed_fmt);
146  u->format_hash_table[hash].hashed_fmt = NULL;
147
148  free (u->format_hash_table[hash].key);
149  u->format_hash_table[hash].key = dtp->format;
150
151  u->format_hash_table[hash].key_len = dtp->format_len;
152  u->format_hash_table[hash].hashed_fmt = dtp->u.p.fmt;
153}
154
155
156static format_data *
157find_parsed_format (st_parameter_dt *dtp)
158{
159  uint32_t hash;
160  gfc_unit *u;
161
162  hash = format_hash (dtp);
163  u = dtp->u.p.current_unit;
164
165  if (u->format_hash_table[hash].key != NULL)
166    {
167      /* See if it matches.  */
168      if (u->format_hash_table[hash].key_len == dtp->format_len)
169	{
170	  /* So far so good.  */
171	  if (strncmp (u->format_hash_table[hash].key,
172	      dtp->format, dtp->format_len) == 0)
173	    return u->format_hash_table[hash].hashed_fmt;
174	}
175    }
176  return NULL;
177}
178
179
180/* next_char()-- Return the next character in the format string.
181   Returns -1 when the string is done.  If the literal flag is set,
182   spaces are significant, otherwise they are not. */
183
184static int
185next_char (format_data *fmt, int literal)
186{
187  int c;
188
189  do
190    {
191      if (fmt->format_string_len == 0)
192	return -1;
193
194      fmt->format_string_len--;
195      c = safe_toupper (*fmt->format_string++);
196      fmt->error_element = c;
197    }
198  while ((c == ' ' || c == '\t') && !literal);
199
200  return c;
201}
202
203
204/* unget_char()-- Back up one character position. */
205
206#define unget_char(fmt) \
207  { fmt->format_string--; fmt->format_string_len++; }
208
209
210/* get_fnode()-- Allocate a new format node, inserting it into the
211   current singly linked list.  These are initially allocated from the
212   static buffer. */
213
214static fnode *
215get_fnode (format_data *fmt, fnode **head, fnode **tail, format_token t)
216{
217  fnode *f;
218
219  if (fmt->avail == &fmt->last->array[FARRAY_SIZE])
220    {
221      fmt->last->next = xmalloc (sizeof (fnode_array));
222      fmt->last = fmt->last->next;
223      fmt->last->next = NULL;
224      fmt->avail = &fmt->last->array[0];
225    }
226  f = fmt->avail++;
227  memset (f, '\0', sizeof (fnode));
228
229  if (*head == NULL)
230    *head = *tail = f;
231  else
232    {
233      (*tail)->next = f;
234      *tail = f;
235    }
236
237  f->format = t;
238  f->repeat = -1;
239  f->source = fmt->format_string;
240  return f;
241}
242
243
244/* free_format()-- Free allocated format string.  */
245void
246free_format (st_parameter_dt *dtp)
247{
248  if ((dtp->common.flags & IOPARM_DT_HAS_FORMAT) && dtp->format)
249    {
250      free (dtp->format);
251      dtp->format = NULL;
252    }
253}
254
255
256/* free_format_data()-- Free all allocated format data.  */
257
258void
259free_format_data (format_data *fmt)
260{
261  fnode_array *fa, *fa_next;
262  fnode *fnp;
263
264  if (fmt == NULL)
265    return;
266
267  /* Free vlist descriptors in the fnode_array if one was allocated.  */
268  for (fnp = fmt->array.array; fnp < &fmt->array.array[FARRAY_SIZE] &&
269       fnp->format != FMT_NONE; fnp++)
270    if (fnp->format == FMT_DT)
271	{
272	  if (GFC_DESCRIPTOR_DATA(fnp->u.udf.vlist))
273	    free (GFC_DESCRIPTOR_DATA(fnp->u.udf.vlist));
274	  free (fnp->u.udf.vlist);
275	}
276
277  for (fa = fmt->array.next; fa; fa = fa_next)
278    {
279      fa_next = fa->next;
280      free (fa);
281    }
282
283  free (fmt);
284  fmt = NULL;
285}
286
287
288/* format_lex()-- Simple lexical analyzer for getting the next token
289   in a FORMAT string.  We support a one-level token pushback in the
290   fmt->saved_token variable. */
291
292static format_token
293format_lex (format_data *fmt)
294{
295  format_token token;
296  int negative_flag;
297  int c;
298  char delim;
299
300  if (fmt->saved_token != FMT_NONE)
301    {
302      token = fmt->saved_token;
303      fmt->saved_token = FMT_NONE;
304      return token;
305    }
306
307  negative_flag = 0;
308  c = next_char (fmt, 0);
309
310  switch (c)
311    {
312    case '*':
313       token = FMT_STAR;
314       break;
315
316    case '(':
317      token = FMT_LPAREN;
318      break;
319
320    case ')':
321      token = FMT_RPAREN;
322      break;
323
324    case '-':
325      negative_flag = 1;
326      /* Fall Through */
327
328    case '+':
329      c = next_char (fmt, 0);
330      if (!safe_isdigit (c))
331	{
332	  token = FMT_UNKNOWN;
333	  break;
334	}
335
336      fmt->value = c - '0';
337
338      for (;;)
339	{
340	  c = next_char (fmt, 0);
341	  if (!safe_isdigit (c))
342	    break;
343
344	  fmt->value = 10 * fmt->value + c - '0';
345	}
346
347      unget_char (fmt);
348
349      if (negative_flag)
350	fmt->value = -fmt->value;
351      token = FMT_SIGNED_INT;
352      break;
353
354    case '0':
355    case '1':
356    case '2':
357    case '3':
358    case '4':
359    case '5':
360    case '6':
361    case '7':
362    case '8':
363    case '9':
364      fmt->value = c - '0';
365
366      for (;;)
367	{
368	  c = next_char (fmt, 0);
369	  if (!safe_isdigit (c))
370	    break;
371
372	  fmt->value = 10 * fmt->value + c - '0';
373	}
374
375      unget_char (fmt);
376      token = (fmt->value == 0) ? FMT_ZERO : FMT_POSINT;
377      break;
378
379    case '.':
380      token = FMT_PERIOD;
381      break;
382
383    case ',':
384      token = FMT_COMMA;
385      break;
386
387    case ':':
388      token = FMT_COLON;
389      break;
390
391    case '/':
392      token = FMT_SLASH;
393      break;
394
395    case '$':
396      token = FMT_DOLLAR;
397      break;
398
399    case 'T':
400      switch (next_char (fmt, 0))
401	{
402	case 'L':
403	  token = FMT_TL;
404	  break;
405	case 'R':
406	  token = FMT_TR;
407	  break;
408	default:
409	  token = FMT_T;
410	  unget_char (fmt);
411	  break;
412	}
413
414      break;
415
416    case 'X':
417      token = FMT_X;
418      break;
419
420    case 'S':
421      switch (next_char (fmt, 0))
422	{
423	case 'S':
424	  token = FMT_SS;
425	  break;
426	case 'P':
427	  token = FMT_SP;
428	  break;
429	default:
430	  token = FMT_S;
431	  unget_char (fmt);
432	  break;
433	}
434
435      break;
436
437    case 'B':
438      switch (next_char (fmt, 0))
439	{
440	case 'N':
441	  token = FMT_BN;
442	  break;
443	case 'Z':
444	  token = FMT_BZ;
445	  break;
446	default:
447	  token = FMT_B;
448	  unget_char (fmt);
449	  break;
450	}
451
452      break;
453
454    case '\'':
455    case '"':
456      delim = c;
457
458      fmt->string = fmt->format_string;
459      fmt->value = 0;		/* This is the length of the string */
460
461      for (;;)
462	{
463	  c = next_char (fmt, 1);
464	  if (c == -1)
465	    {
466	      token = FMT_BADSTRING;
467	      fmt->error = bad_string;
468	      break;
469	    }
470
471	  if (c == delim)
472	    {
473	      c = next_char (fmt, 1);
474
475	      if (c == -1)
476		{
477		  token = FMT_BADSTRING;
478		  fmt->error = bad_string;
479		  break;
480		}
481
482	      if (c != delim)
483		{
484		  unget_char (fmt);
485		  token = FMT_STRING;
486		  break;
487		}
488	    }
489
490	  fmt->value++;
491	}
492
493      break;
494
495    case 'P':
496      token = FMT_P;
497      break;
498
499    case 'I':
500      token = FMT_I;
501      break;
502
503    case 'O':
504      token = FMT_O;
505      break;
506
507    case 'Z':
508      token = FMT_Z;
509      break;
510
511    case 'F':
512      token = FMT_F;
513      break;
514
515    case 'E':
516      switch (next_char (fmt, 0))
517	{
518	case 'N':
519	  token = FMT_EN;
520	  break;
521	case 'S':
522	  token = FMT_ES;
523	  break;
524	default:
525	  token = FMT_E;
526	  unget_char (fmt);
527	  break;
528	}
529      break;
530
531    case 'G':
532      token = FMT_G;
533      break;
534
535    case 'H':
536      token = FMT_H;
537      break;
538
539    case 'L':
540      token = FMT_L;
541      break;
542
543    case 'A':
544      token = FMT_A;
545      break;
546
547    case 'D':
548      switch (next_char (fmt, 0))
549	{
550	case 'P':
551	  token = FMT_DP;
552	  break;
553	case 'C':
554	  token = FMT_DC;
555	  break;
556	case 'T':
557	  token = FMT_DT;
558	  break;
559	default:
560	  token = FMT_D;
561	  unget_char (fmt);
562	  break;
563	}
564      break;
565
566    case 'R':
567      switch (next_char (fmt, 0))
568	{
569	case 'C':
570	  token = FMT_RC;
571	  break;
572	case 'D':
573	  token = FMT_RD;
574	  break;
575	case 'N':
576	  token = FMT_RN;
577	  break;
578	case 'P':
579	  token = FMT_RP;
580	  break;
581	case 'U':
582	  token = FMT_RU;
583	  break;
584	case 'Z':
585	  token = FMT_RZ;
586	  break;
587	default:
588	  unget_char (fmt);
589	  token = FMT_UNKNOWN;
590	  break;
591	}
592      break;
593
594    case -1:
595      token = FMT_END;
596      break;
597
598    default:
599      token = FMT_UNKNOWN;
600      break;
601    }
602
603  return token;
604}
605
606
607/* parse_format_list()-- Parse a format list.  Assumes that a left
608   paren has already been seen.  Returns a list representing the
609   parenthesis node which contains the rest of the list. */
610
611static fnode *
612parse_format_list (st_parameter_dt *dtp, bool *seen_dd)
613{
614  fnode *head, *tail;
615  format_token t, u, t2;
616  int repeat;
617  format_data *fmt = dtp->u.p.fmt;
618  bool seen_data_desc = false;
619  int standard;
620
621  head = tail = NULL;
622
623  /* Get the next format item */
624 format_item:
625  t = format_lex (fmt);
626 format_item_1:
627  switch (t)
628    {
629    case FMT_STAR:
630      t = format_lex (fmt);
631      if (t != FMT_LPAREN)
632	{
633	  fmt->error = "Left parenthesis required after '*'";
634	  goto finished;
635	}
636      get_fnode (fmt, &head, &tail, FMT_LPAREN);
637      tail->repeat = -2;  /* Signifies unlimited format.  */
638      tail->u.child = parse_format_list (dtp, &seen_data_desc);
639      *seen_dd = seen_data_desc;
640      if (fmt->error != NULL)
641	goto finished;
642      if (!seen_data_desc)
643	{
644	  fmt->error = "'*' requires at least one associated data descriptor";
645	  goto finished;
646	}
647      goto between_desc;
648
649    case FMT_POSINT:
650      repeat = fmt->value;
651
652      t = format_lex (fmt);
653      switch (t)
654	{
655	case FMT_LPAREN:
656	  get_fnode (fmt, &head, &tail, FMT_LPAREN);
657	  tail->repeat = repeat;
658	  tail->u.child = parse_format_list (dtp, &seen_data_desc);
659	  *seen_dd = seen_data_desc;
660	  if (fmt->error != NULL)
661	    goto finished;
662
663	  goto between_desc;
664
665	case FMT_SLASH:
666	  get_fnode (fmt, &head, &tail, FMT_SLASH);
667	  tail->repeat = repeat;
668	  goto optional_comma;
669
670	case FMT_X:
671	  get_fnode (fmt, &head, &tail, FMT_X);
672	  tail->repeat = 1;
673	  tail->u.k = fmt->value;
674	  goto between_desc;
675
676	case FMT_P:
677	  goto p_descriptor;
678
679	default:
680	  goto data_desc;
681	}
682
683    case FMT_LPAREN:
684      get_fnode (fmt, &head, &tail, FMT_LPAREN);
685      tail->repeat = 1;
686      tail->u.child = parse_format_list (dtp, &seen_data_desc);
687      *seen_dd = seen_data_desc;
688      if (fmt->error != NULL)
689	goto finished;
690
691      goto between_desc;
692
693    case FMT_SIGNED_INT:	/* Signed integer can only precede a P format.  */
694    case FMT_ZERO:		/* Same for zero.  */
695      t = format_lex (fmt);
696      if (t != FMT_P)
697	{
698	  fmt->error = "Expected P edit descriptor in format";
699	  goto finished;
700	}
701
702    p_descriptor:
703      get_fnode (fmt, &head, &tail, FMT_P);
704      tail->u.k = fmt->value;
705      tail->repeat = 1;
706
707      t = format_lex (fmt);
708      if (t == FMT_F || t == FMT_EN || t == FMT_ES || t == FMT_D
709	  || t == FMT_G || t == FMT_E)
710	{
711	  repeat = 1;
712	  goto data_desc;
713	}
714
715      if (t != FMT_COMMA && t != FMT_RPAREN && t != FMT_SLASH
716	  && t != FMT_POSINT)
717	{
718	  fmt->error = "Comma required after P descriptor";
719	  goto finished;
720	}
721
722      fmt->saved_token = t;
723      goto optional_comma;
724
725    case FMT_P:		/* P and X require a prior number */
726      fmt->error = "P descriptor requires leading scale factor";
727      goto finished;
728
729    case FMT_X:
730/*
731   EXTENSION!
732
733   If we would be pedantic in the library, we would have to reject
734   an X descriptor without an integer prefix:
735
736      fmt->error = "X descriptor requires leading space count";
737      goto finished;
738
739   However, this is an extension supported by many Fortran compilers,
740   including Cray, HP, AIX, and IRIX.  Therefore, we allow it in the
741   runtime library, and make the front end reject it if the compiler
742   is in pedantic mode.  The interpretation of 'X' is '1X'.
743*/
744      get_fnode (fmt, &head, &tail, FMT_X);
745      tail->repeat = 1;
746      tail->u.k = 1;
747      goto between_desc;
748
749    case FMT_STRING:
750      get_fnode (fmt, &head, &tail, FMT_STRING);
751      tail->u.string.p = fmt->string;
752      tail->u.string.length = fmt->value;
753      tail->repeat = 1;
754      goto optional_comma;
755
756    case FMT_RC:
757    case FMT_RD:
758    case FMT_RN:
759    case FMT_RP:
760    case FMT_RU:
761    case FMT_RZ:
762      notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: Round "
763		  "descriptor not allowed");
764      get_fnode (fmt, &head, &tail, t);
765      tail->repeat = 1;
766      goto between_desc;
767
768    case FMT_DC:
769    case FMT_DP:
770      notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: DC or DP "
771		  "descriptor not allowed");
772    /* Fall through.  */
773    case FMT_S:
774    case FMT_SS:
775    case FMT_SP:
776    case FMT_BN:
777    case FMT_BZ:
778      get_fnode (fmt, &head, &tail, t);
779      tail->repeat = 1;
780      goto between_desc;
781
782    case FMT_COLON:
783      get_fnode (fmt, &head, &tail, FMT_COLON);
784      tail->repeat = 1;
785      goto optional_comma;
786
787    case FMT_SLASH:
788      get_fnode (fmt, &head, &tail, FMT_SLASH);
789      tail->repeat = 1;
790      tail->u.r = 1;
791      goto optional_comma;
792
793    case FMT_DOLLAR:
794      get_fnode (fmt, &head, &tail, FMT_DOLLAR);
795      tail->repeat = 1;
796      notify_std (&dtp->common, GFC_STD_GNU, "Extension: $ descriptor");
797      goto between_desc;
798
799    case FMT_T:
800    case FMT_TL:
801    case FMT_TR:
802      t2 = format_lex (fmt);
803      if (t2 != FMT_POSINT)
804	{
805	  fmt->error = posint_required;
806	  goto finished;
807	}
808      get_fnode (fmt, &head, &tail, t);
809      tail->u.n = fmt->value;
810      tail->repeat = 1;
811      goto between_desc;
812
813    case FMT_I:
814    case FMT_B:
815    case FMT_O:
816    case FMT_Z:
817    case FMT_E:
818    case FMT_EN:
819    case FMT_ES:
820    case FMT_D:
821    case FMT_DT:
822    case FMT_L:
823    case FMT_A:
824    case FMT_F:
825    case FMT_G:
826      repeat = 1;
827      *seen_dd = true;
828      goto data_desc;
829
830    case FMT_H:
831      get_fnode (fmt, &head, &tail, FMT_STRING);
832      if (fmt->format_string_len < 1)
833	{
834	  fmt->error = bad_hollerith;
835	  goto finished;
836	}
837
838      tail->u.string.p = fmt->format_string;
839      tail->u.string.length = 1;
840      tail->repeat = 1;
841
842      fmt->format_string++;
843      fmt->format_string_len--;
844
845      goto between_desc;
846
847    case FMT_END:
848      fmt->error = unexpected_end;
849      goto finished;
850
851    case FMT_BADSTRING:
852      goto finished;
853
854    case FMT_RPAREN:
855      goto finished;
856
857    default:
858      fmt->error = unexpected_element;
859      goto finished;
860    }
861
862  /* In this state, t must currently be a data descriptor.  Deal with
863     things that can/must follow the descriptor */
864 data_desc:
865
866  switch (t)
867    {
868    case FMT_L:
869      *seen_dd = true;
870      t = format_lex (fmt);
871      if (t != FMT_POSINT)
872	{
873	  if (t == FMT_ZERO)
874	    {
875	      if (notification_std(GFC_STD_GNU) == NOTIFICATION_ERROR)
876		{
877		  fmt->error = "Extension: Zero width after L descriptor";
878		  goto finished;
879		}
880	      else
881		notify_std (&dtp->common, GFC_STD_GNU,
882			    "Zero width after L descriptor");
883	    }
884	  else
885	    {
886	      fmt->saved_token = t;
887	      notify_std (&dtp->common, GFC_STD_GNU,
888			  "Positive width required with L descriptor");
889	    }
890	  fmt->value = 1;	/* Default width */
891	}
892      get_fnode (fmt, &head, &tail, FMT_L);
893      tail->u.n = fmt->value;
894      tail->repeat = repeat;
895      break;
896
897    case FMT_A:
898      *seen_dd = true;
899      t = format_lex (fmt);
900      if (t == FMT_ZERO)
901	{
902	  fmt->error = zero_width;
903	  goto finished;
904	}
905
906      if (t != FMT_POSINT)
907	{
908	  fmt->saved_token = t;
909	  fmt->value = -1;		/* Width not present */
910	}
911
912      get_fnode (fmt, &head, &tail, FMT_A);
913      tail->repeat = repeat;
914      tail->u.n = fmt->value;
915      break;
916
917    case FMT_D:
918    case FMT_E:
919    case FMT_F:
920    case FMT_G:
921    case FMT_EN:
922    case FMT_ES:
923      *seen_dd = true;
924      get_fnode (fmt, &head, &tail, t);
925      tail->repeat = repeat;
926
927      u = format_lex (fmt);
928
929      /* Processing for zero width formats.  */
930      if (u == FMT_ZERO)
931	{
932          if (t == FMT_F)
933	    standard = GFC_STD_F95;
934	  else if (t == FMT_G)
935	    standard = GFC_STD_F2008;
936	  else
937	    standard = GFC_STD_F2018;
938
939	  if (notification_std (standard) == NOTIFICATION_ERROR
940	      || dtp->u.p.mode == READING)
941	    {
942	      fmt->error = zero_width;
943	      goto finished;
944	    }
945	  tail->u.real.w = 0;
946
947	  /* Look for the dot seperator.  */
948	  u = format_lex (fmt);
949	  if (u != FMT_PERIOD)
950	    {
951	      fmt->saved_token = u;
952	      break;
953	    }
954
955	  /* Look for the precision.  */
956	  u = format_lex (fmt);
957	  if (u != FMT_ZERO && u != FMT_POSINT)
958	    {
959	      fmt->error = nonneg_required;
960	      goto finished;
961	    }
962	  tail->u.real.d = fmt->value;
963
964	  /* Look for optional exponent, not allowed for FMT_D */
965	  if (t == FMT_D)
966	    break;
967	  u = format_lex (fmt);
968	  if (u != FMT_E)
969	    fmt->saved_token = u;
970	  else
971	    {
972	      u = format_lex (fmt);
973	      if (u != FMT_POSINT)
974		{
975		  if (u == FMT_ZERO)
976		    {
977		      notify_std (&dtp->common, GFC_STD_F2018,
978				  "Positive exponent width required");
979		    }
980		  else
981		    {
982		      fmt->error = "Positive exponent width required in "
983				   "format string at %L";
984		      goto finished;
985		    }
986		}
987	      tail->u.real.e = fmt->value;
988	    }
989	  break;
990	}
991
992      /* Processing for positive width formats.  */
993      if (u == FMT_POSINT)
994	{
995	  tail->u.real.w = fmt->value;
996
997	  /* Look for the dot separator. Because of legacy behaviors
998	     we do some look ahead for missing things.  */
999	  t2 = t;
1000	  t = format_lex (fmt);
1001	  if (t != FMT_PERIOD)
1002	    {
1003	      /* We treat a missing decimal descriptor as 0.  Note: This is only
1004		 allowed if -std=legacy, otherwise an error occurs.  */
1005	      if (compile_options.warn_std != 0)
1006		{
1007		  fmt->error = period_required;
1008		  goto finished;
1009		}
1010	      fmt->saved_token = t;
1011	      tail->u.real.d = 0;
1012	      tail->u.real.e = -1;
1013	      break;
1014	    }
1015
1016	  /* If we made it here, we should have the dot so look for the
1017	     precision.  */
1018	  t = format_lex (fmt);
1019	  if (t != FMT_ZERO && t != FMT_POSINT)
1020	    {
1021	      fmt->error = nonneg_required;
1022	      goto finished;
1023	    }
1024	  tail->u.real.d = fmt->value;
1025	  tail->u.real.e = -1;
1026
1027	  /* Done with D and F formats.  */
1028	  if (t2 == FMT_D || t2 == FMT_F)
1029	    {
1030	      *seen_dd = true;
1031	      break;
1032	    }
1033
1034	  /* Look for optional exponent */
1035	  u = format_lex (fmt);
1036	  if (u != FMT_E)
1037	    fmt->saved_token = u;
1038	  else
1039	    {
1040	      u = format_lex (fmt);
1041	      if (u != FMT_POSINT)
1042		{
1043		  if (u == FMT_ZERO)
1044		    {
1045		      notify_std (&dtp->common, GFC_STD_F2018,
1046				  "Positive exponent width required");
1047		    }
1048		  else
1049		    {
1050		      fmt->error = "Positive exponent width required in "
1051				   "format string at %L";
1052		      goto finished;
1053		    }
1054		}
1055	      tail->u.real.e = fmt->value;
1056	    }
1057	  break;
1058	}
1059
1060      /* Old DEC codes may not have width or precision specified.  */
1061      if (dtp->u.p.mode == WRITING && (dtp->common.flags & IOPARM_DT_DEC_EXT))
1062	{
1063	  tail->u.real.w = DEFAULT_WIDTH;
1064	  tail->u.real.d = 0;
1065	  tail->u.real.e = -1;
1066	  fmt->saved_token = u;
1067	}
1068      break;
1069
1070    case FMT_DT:
1071      *seen_dd = true;
1072      get_fnode (fmt, &head, &tail, t);
1073      tail->repeat = repeat;
1074
1075      t = format_lex (fmt);
1076
1077      /* Initialize the vlist to a zero size, rank-one array.  */
1078      tail->u.udf.vlist= xmalloc (sizeof(gfc_array_i4)
1079				  + sizeof (descriptor_dimension));
1080      GFC_DESCRIPTOR_DATA(tail->u.udf.vlist) = NULL;
1081      GFC_DIMENSION_SET(tail->u.udf.vlist->dim[0],1, 0, 0);
1082
1083      if (t == FMT_STRING)
1084        {
1085	  /* Get pointer to the optional format string.  */
1086	  tail->u.udf.string = fmt->string;
1087	  tail->u.udf.string_len = fmt->value;
1088	  t = format_lex (fmt);
1089	}
1090      if (t == FMT_LPAREN)
1091        {
1092	  /* Temporary buffer to hold the vlist values.  */
1093	  GFC_INTEGER_4 temp[FARRAY_SIZE];
1094	  int i = 0;
1095	loop:
1096	  t = format_lex (fmt);
1097	  if (t != FMT_POSINT)
1098	    {
1099	      fmt->error = posint_required;
1100	      goto finished;
1101	    }
1102	  /* Save the positive integer value.  */
1103	  temp[i++] = fmt->value;
1104	  t = format_lex (fmt);
1105	  if (t == FMT_COMMA)
1106	    goto loop;
1107	  if (t == FMT_RPAREN)
1108	    {
1109	      /* We have parsed the complete vlist so initialize the
1110	         array descriptor and save it in the format node.  */
1111	      gfc_full_array_i4 *vp = tail->u.udf.vlist;
1112	      GFC_DESCRIPTOR_DATA(vp) = xmalloc (i * sizeof(GFC_INTEGER_4));
1113	      GFC_DIMENSION_SET(vp->dim[0],1, i, 1);
1114	      memcpy (GFC_DESCRIPTOR_DATA(vp), temp, i * sizeof(GFC_INTEGER_4));
1115	      break;
1116	    }
1117	  fmt->error = unexpected_element;
1118	  goto finished;
1119	}
1120      fmt->saved_token = t;
1121      break;
1122    case FMT_H:
1123      if (repeat > fmt->format_string_len)
1124	{
1125	  fmt->error = bad_hollerith;
1126	  goto finished;
1127	}
1128
1129      get_fnode (fmt, &head, &tail, FMT_STRING);
1130      tail->u.string.p = fmt->format_string;
1131      tail->u.string.length = repeat;
1132      tail->repeat = 1;
1133
1134      fmt->format_string += fmt->value;
1135      fmt->format_string_len -= repeat;
1136
1137      break;
1138
1139    case FMT_I:
1140    case FMT_B:
1141    case FMT_O:
1142    case FMT_Z:
1143      *seen_dd = true;
1144      get_fnode (fmt, &head, &tail, t);
1145      tail->repeat = repeat;
1146
1147      t = format_lex (fmt);
1148
1149      if (dtp->u.p.mode == READING)
1150	{
1151	  if (t != FMT_POSINT)
1152	    {
1153	      if (dtp->common.flags & IOPARM_DT_DEC_EXT)
1154		{
1155		  tail->u.integer.w = DEFAULT_WIDTH;
1156		  tail->u.integer.m = -1;
1157		  fmt->saved_token = t;
1158		  break;
1159		}
1160	      fmt->error = posint_required;
1161	      goto finished;
1162	    }
1163	}
1164      else
1165	{
1166	  if (t != FMT_ZERO && t != FMT_POSINT)
1167	    {
1168	      if (dtp->common.flags & IOPARM_DT_DEC_EXT)
1169		{
1170		  tail->u.integer.w = DEFAULT_WIDTH;
1171		  tail->u.integer.m = -1;
1172		  fmt->saved_token = t;
1173		  break;
1174		}
1175	      fmt->error = nonneg_required;
1176	      goto finished;
1177	    }
1178	}
1179
1180      tail->u.integer.w = fmt->value;
1181      tail->u.integer.m = -1;
1182
1183      t = format_lex (fmt);
1184      if (t != FMT_PERIOD)
1185	{
1186	  fmt->saved_token = t;
1187	}
1188      else
1189	{
1190	  t = format_lex (fmt);
1191	  if (t != FMT_ZERO && t != FMT_POSINT)
1192	    {
1193	      fmt->error = nonneg_required;
1194	      goto finished;
1195	    }
1196
1197	  tail->u.integer.m = fmt->value;
1198	}
1199
1200      if (tail->u.integer.w != 0 && tail->u.integer.m > tail->u.integer.w)
1201	{
1202	  fmt->error = "Minimum digits exceeds field width";
1203	  goto finished;
1204	}
1205
1206      break;
1207
1208    default:
1209      fmt->error = unexpected_element;
1210      goto finished;
1211    }
1212
1213  /* Between a descriptor and what comes next */
1214 between_desc:
1215  t = format_lex (fmt);
1216  switch (t)
1217    {
1218    case FMT_COMMA:
1219      goto format_item;
1220
1221    case FMT_RPAREN:
1222      goto finished;
1223
1224    case FMT_SLASH:
1225    case FMT_COLON:
1226      get_fnode (fmt, &head, &tail, t);
1227      tail->repeat = 1;
1228      goto optional_comma;
1229
1230    case FMT_END:
1231      fmt->error = unexpected_end;
1232      goto finished;
1233
1234    default:
1235      /* Assume a missing comma, this is a GNU extension */
1236      goto format_item_1;
1237    }
1238
1239  /* Optional comma is a weird between state where we've just finished
1240     reading a colon, slash or P descriptor. */
1241 optional_comma:
1242  t = format_lex (fmt);
1243  switch (t)
1244    {
1245    case FMT_COMMA:
1246      break;
1247
1248    case FMT_RPAREN:
1249      goto finished;
1250
1251    default:			/* Assume that we have another format item */
1252      fmt->saved_token = t;
1253      break;
1254    }
1255
1256  goto format_item;
1257
1258 finished:
1259
1260  return head;
1261}
1262
1263
1264/* format_error()-- Generate an error message for a format statement.
1265   If the node that gives the location of the error is NULL, the error
1266   is assumed to happen at parse time, and the current location of the
1267   parser is shown.
1268
1269   We generate a message showing where the problem is.  We take extra
1270   care to print only the relevant part of the format if it is longer
1271   than a standard 80 column display. */
1272
1273void
1274format_error (st_parameter_dt *dtp, const fnode *f, const char *message)
1275{
1276  int width, i, offset;
1277#define BUFLEN 300
1278  char *p, buffer[BUFLEN];
1279  format_data *fmt = dtp->u.p.fmt;
1280
1281  if (f != NULL)
1282    p = f->source;
1283  else                /* This should not happen.  */
1284    p = dtp->format;
1285
1286  if (message == unexpected_element)
1287    snprintf (buffer, BUFLEN, message, fmt->error_element);
1288  else
1289    snprintf (buffer, BUFLEN, "%s\n", message);
1290
1291  /* Get the offset into the format string where the error occurred.  */
1292  offset = dtp->format_len - (fmt->reversion_ok ?
1293			      (int) strlen(p) : fmt->format_string_len);
1294
1295  width = dtp->format_len;
1296
1297  if (width > 80)
1298    width = 80;
1299
1300  /* Show the format */
1301
1302  p = strchr (buffer, '\0');
1303
1304  if (dtp->format)
1305    memcpy (p, dtp->format, width);
1306
1307  p += width;
1308  *p++ = '\n';
1309
1310  /* Show where the problem is */
1311
1312  for (i = 1; i < offset; i++)
1313    *p++ = ' ';
1314
1315  *p++ = '^';
1316  *p = '\0';
1317
1318  generate_error (&dtp->common, LIBERROR_FORMAT, buffer);
1319}
1320
1321
1322/* revert()-- Do reversion of the format.  Control reverts to the left
1323   parenthesis that matches the rightmost right parenthesis.  From our
1324   tree structure, we are looking for the rightmost parenthesis node
1325   at the second level, the first level always being a single
1326   parenthesis node.  If this node doesn't exit, we use the top
1327   level. */
1328
1329static void
1330revert (st_parameter_dt *dtp)
1331{
1332  fnode *f, *r;
1333  format_data *fmt = dtp->u.p.fmt;
1334
1335  dtp->u.p.reversion_flag = 1;
1336
1337  r = NULL;
1338
1339  for (f = fmt->array.array[0].u.child; f; f = f->next)
1340    if (f->format == FMT_LPAREN)
1341      r = f;
1342
1343  /* If r is NULL because no node was found, the whole tree will be used */
1344
1345  fmt->array.array[0].current = r;
1346  fmt->array.array[0].count = 0;
1347}
1348
1349/* parse_format()-- Parse a format string.  */
1350
1351void
1352parse_format (st_parameter_dt *dtp)
1353{
1354  format_data *fmt;
1355  bool format_cache_ok, seen_data_desc = false;
1356
1357  /* Don't cache for internal units and set an arbitrary limit on the
1358     size of format strings we will cache.  (Avoids memory issues.)
1359     Also, the format_hash_table resides in the current_unit, so
1360     child_dtio procedures would overwrite the parent table  */
1361  format_cache_ok = !is_internal_unit (dtp)
1362		    && (dtp->u.p.current_unit->child_dtio == 0);
1363
1364  /* Lookup format string to see if it has already been parsed.  */
1365  if (format_cache_ok)
1366    {
1367      dtp->u.p.fmt = find_parsed_format (dtp);
1368
1369      if (dtp->u.p.fmt != NULL)
1370	{
1371	  dtp->u.p.fmt->reversion_ok = 0;
1372	  dtp->u.p.fmt->saved_token = FMT_NONE;
1373	  dtp->u.p.fmt->saved_format = NULL;
1374	  reset_fnode_counters (dtp);
1375	  return;
1376	}
1377    }
1378
1379  /* Not found so proceed as follows.  */
1380
1381  char *fmt_string = fc_strdup_notrim (dtp->format, dtp->format_len);
1382  dtp->format = fmt_string;
1383
1384  dtp->u.p.fmt = fmt = xmalloc (sizeof (format_data));
1385  fmt->format_string = dtp->format;
1386  fmt->format_string_len = dtp->format_len;
1387
1388  fmt->string = NULL;
1389  fmt->saved_token = FMT_NONE;
1390  fmt->error = NULL;
1391  fmt->value = 0;
1392
1393  /* Initialize variables used during traversal of the tree.  */
1394
1395  fmt->reversion_ok = 0;
1396  fmt->saved_format = NULL;
1397
1398  /* Initialize the fnode_array.  */
1399
1400  memset (&(fmt->array), 0, sizeof(fmt->array));
1401
1402  /* Allocate the first format node as the root of the tree.  */
1403
1404  fmt->last = &fmt->array;
1405  fmt->last->next = NULL;
1406  fmt->avail = &fmt->array.array[0];
1407
1408  memset (fmt->avail, 0, sizeof (*fmt->avail));
1409  fmt->avail->format = FMT_LPAREN;
1410  fmt->avail->repeat = 1;
1411  fmt->avail++;
1412
1413  if (format_lex (fmt) == FMT_LPAREN)
1414    fmt->array.array[0].u.child = parse_format_list (dtp, &seen_data_desc);
1415  else
1416    fmt->error = "Missing initial left parenthesis in format";
1417
1418  if (format_cache_ok)
1419    save_parsed_format (dtp);
1420  else
1421    dtp->u.p.format_not_saved = 1;
1422
1423  if (fmt->error)
1424    format_error (dtp, NULL, fmt->error);
1425}
1426
1427
1428/* next_format0()-- Get the next format node without worrying about
1429   reversion.  Returns NULL when we hit the end of the list.
1430   Parenthesis nodes are incremented after the list has been
1431   exhausted, other nodes are incremented before they are returned. */
1432
1433static const fnode *
1434next_format0 (fnode *f)
1435{
1436  const fnode *r;
1437
1438  if (f == NULL)
1439    return NULL;
1440
1441  if (f->format != FMT_LPAREN)
1442    {
1443      f->count++;
1444      if (f->count <= f->repeat)
1445	return f;
1446
1447      f->count = 0;
1448      return NULL;
1449    }
1450
1451  /* Deal with a parenthesis node with unlimited format.  */
1452
1453  if (f->repeat == -2)  /* -2 signifies unlimited.  */
1454  for (;;)
1455    {
1456      if (f->current == NULL)
1457	f->current = f->u.child;
1458
1459      for (; f->current != NULL; f->current = f->current->next)
1460	{
1461	  r = next_format0 (f->current);
1462	  if (r != NULL)
1463	    return r;
1464	}
1465    }
1466
1467  /* Deal with a parenthesis node with specific repeat count.  */
1468  for (; f->count < f->repeat; f->count++)
1469    {
1470      if (f->current == NULL)
1471	f->current = f->u.child;
1472
1473      for (; f->current != NULL; f->current = f->current->next)
1474	{
1475	  r = next_format0 (f->current);
1476	  if (r != NULL)
1477	    return r;
1478	}
1479    }
1480
1481  f->count = 0;
1482  return NULL;
1483}
1484
1485
1486/* next_format()-- Return the next format node.  If the format list
1487   ends up being exhausted, we do reversion.  Reversion is only
1488   allowed if we've seen a data descriptor since the
1489   initialization or the last reversion.  We return NULL if there
1490   are no more data descriptors to return (which is an error
1491   condition).  */
1492
1493const fnode *
1494next_format (st_parameter_dt *dtp)
1495{
1496  format_token t;
1497  const fnode *f;
1498  format_data *fmt = dtp->u.p.fmt;
1499
1500  if (fmt->saved_format != NULL)
1501    {				/* Deal with a pushed-back format node */
1502      f = fmt->saved_format;
1503      fmt->saved_format = NULL;
1504      goto done;
1505    }
1506
1507  f = next_format0 (&fmt->array.array[0]);
1508  if (f == NULL)
1509    {
1510      if (!fmt->reversion_ok)
1511	return NULL;
1512
1513      fmt->reversion_ok = 0;
1514      revert (dtp);
1515
1516      f = next_format0 (&fmt->array.array[0]);
1517      if (f == NULL)
1518	{
1519	  format_error (dtp, NULL, reversion_error);
1520	  return NULL;
1521	}
1522
1523      /* Push the first reverted token and return a colon node in case
1524	 there are no more data items.  */
1525
1526      fmt->saved_format = f;
1527      return &colon_node;
1528    }
1529
1530  /* If this is a data edit descriptor, then reversion has become OK. */
1531 done:
1532  t = f->format;
1533
1534  if (!fmt->reversion_ok &&
1535      (t == FMT_I || t == FMT_B || t == FMT_O || t == FMT_Z || t == FMT_F ||
1536       t == FMT_E || t == FMT_EN || t == FMT_ES || t == FMT_G || t == FMT_L ||
1537       t == FMT_A || t == FMT_D || t == FMT_DT))
1538    fmt->reversion_ok = 1;
1539  return f;
1540}
1541
1542
1543/* unget_format()-- Push the given format back so that it will be
1544   returned on the next call to next_format() without affecting
1545   counts.  This is necessary when we've encountered a data
1546   descriptor, but don't know what the data item is yet.  The format
1547   node is pushed back, and we return control to the main program,
1548   which calls the library back with the data item (or not). */
1549
1550void
1551unget_format (st_parameter_dt *dtp, const fnode *f)
1552{
1553  dtp->u.p.fmt->saved_format = f;
1554}
1555
1556