1/* Copyright (C) 2002-2022 Free Software Foundation, Inc. 2 Contributed by Andy Vaught 3 F2003 I/O support contributed by Jerry DeLisle 4 5This file is part of the GNU Fortran runtime library (libgfortran). 6 7Libgfortran is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 3, or (at your option) 10any later version. 11 12Libgfortran is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17Under Section 7 of GPL version 3, you are granted additional 18permissions described in the GCC Runtime Library Exception, version 193.1, as published by the Free Software Foundation. 20 21You should have received a copy of the GNU General Public License and 22a copy of the GCC Runtime Library Exception along with this program; 23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24<http://www.gnu.org/licenses/>. */ 25 26 27/* format.c-- parse a FORMAT string into a binary format suitable for 28 interpretation during I/O statements. */ 29 30#include "io.h" 31#include "format.h" 32#include <string.h> 33 34 35static const fnode colon_node = { FMT_COLON, 0, NULL, NULL, {{ 0, 0, 0 }}, 0, 36 NULL }; 37 38/* Error messages. */ 39 40static const char posint_required[] = "Positive integer required in format", 41 period_required[] = "Period required in format", 42 nonneg_required[] = "Nonnegative width required in format", 43 unexpected_element[] = "Unexpected element '%c' in format\n", 44 unexpected_end[] = "Unexpected end of format string", 45 bad_string[] = "Unterminated character constant in format", 46 bad_hollerith[] = "Hollerith constant extends past the end of the format", 47 reversion_error[] = "Exhausted data descriptors in format", 48 zero_width[] = "Zero width in format descriptor"; 49 50/* The following routines support caching format data from parsed format strings 51 into a hash table. This avoids repeatedly parsing duplicate format strings 52 or format strings in I/O statements that are repeated in loops. */ 53 54 55/* Traverse the table and free all data. */ 56 57void 58free_format_hash_table (gfc_unit *u) 59{ 60 size_t i; 61 62 /* free_format_data handles any NULL pointers. */ 63 for (i = 0; i < FORMAT_HASH_SIZE; i++) 64 { 65 if (u->format_hash_table[i].hashed_fmt != NULL) 66 { 67 free_format_data (u->format_hash_table[i].hashed_fmt); 68 free (u->format_hash_table[i].key); 69 } 70 u->format_hash_table[i].key = NULL; 71 u->format_hash_table[i].key_len = 0; 72 u->format_hash_table[i].hashed_fmt = NULL; 73 } 74} 75 76/* Traverse the format_data structure and reset the fnode counters. */ 77 78static void 79reset_node (fnode *fn) 80{ 81 fnode *f; 82 83 fn->count = 0; 84 fn->current = NULL; 85 86 if (fn->format != FMT_LPAREN) 87 return; 88 89 for (f = fn->u.child; f; f = f->next) 90 { 91 if (f->format == FMT_RPAREN) 92 break; 93 reset_node (f); 94 } 95} 96 97static void 98reset_fnode_counters (st_parameter_dt *dtp) 99{ 100 fnode *f; 101 format_data *fmt; 102 103 fmt = dtp->u.p.fmt; 104 105 /* Clear this pointer at the head so things start at the right place. */ 106 fmt->array.array[0].current = NULL; 107 108 for (f = fmt->array.array[0].u.child; f; f = f->next) 109 reset_node (f); 110} 111 112 113/* A simple hashing function to generate an index into the hash table. */ 114 115static uint32_t 116format_hash (st_parameter_dt *dtp) 117{ 118 char *key; 119 gfc_charlen_type key_len; 120 uint32_t hash = 0; 121 gfc_charlen_type i; 122 123 /* Hash the format string. Super simple, but what the heck! */ 124 key = dtp->format; 125 key_len = dtp->format_len; 126 for (i = 0; i < key_len; i++) 127 hash ^= key[i]; 128 hash &= (FORMAT_HASH_SIZE - 1); 129 return hash; 130} 131 132 133static void 134save_parsed_format (st_parameter_dt *dtp) 135{ 136 uint32_t hash; 137 gfc_unit *u; 138 139 hash = format_hash (dtp); 140 u = dtp->u.p.current_unit; 141 142 /* Index into the hash table. We are simply replacing whatever is there 143 relying on probability. */ 144 if (u->format_hash_table[hash].hashed_fmt != NULL) 145 free_format_data (u->format_hash_table[hash].hashed_fmt); 146 u->format_hash_table[hash].hashed_fmt = NULL; 147 148 free (u->format_hash_table[hash].key); 149 u->format_hash_table[hash].key = dtp->format; 150 151 u->format_hash_table[hash].key_len = dtp->format_len; 152 u->format_hash_table[hash].hashed_fmt = dtp->u.p.fmt; 153} 154 155 156static format_data * 157find_parsed_format (st_parameter_dt *dtp) 158{ 159 uint32_t hash; 160 gfc_unit *u; 161 162 hash = format_hash (dtp); 163 u = dtp->u.p.current_unit; 164 165 if (u->format_hash_table[hash].key != NULL) 166 { 167 /* See if it matches. */ 168 if (u->format_hash_table[hash].key_len == dtp->format_len) 169 { 170 /* So far so good. */ 171 if (strncmp (u->format_hash_table[hash].key, 172 dtp->format, dtp->format_len) == 0) 173 return u->format_hash_table[hash].hashed_fmt; 174 } 175 } 176 return NULL; 177} 178 179 180/* next_char()-- Return the next character in the format string. 181 Returns -1 when the string is done. If the literal flag is set, 182 spaces are significant, otherwise they are not. */ 183 184static int 185next_char (format_data *fmt, int literal) 186{ 187 int c; 188 189 do 190 { 191 if (fmt->format_string_len == 0) 192 return -1; 193 194 fmt->format_string_len--; 195 c = safe_toupper (*fmt->format_string++); 196 fmt->error_element = c; 197 } 198 while ((c == ' ' || c == '\t') && !literal); 199 200 return c; 201} 202 203 204/* unget_char()-- Back up one character position. */ 205 206#define unget_char(fmt) \ 207 { fmt->format_string--; fmt->format_string_len++; } 208 209 210/* get_fnode()-- Allocate a new format node, inserting it into the 211 current singly linked list. These are initially allocated from the 212 static buffer. */ 213 214static fnode * 215get_fnode (format_data *fmt, fnode **head, fnode **tail, format_token t) 216{ 217 fnode *f; 218 219 if (fmt->avail == &fmt->last->array[FARRAY_SIZE]) 220 { 221 fmt->last->next = xmalloc (sizeof (fnode_array)); 222 fmt->last = fmt->last->next; 223 fmt->last->next = NULL; 224 fmt->avail = &fmt->last->array[0]; 225 } 226 f = fmt->avail++; 227 memset (f, '\0', sizeof (fnode)); 228 229 if (*head == NULL) 230 *head = *tail = f; 231 else 232 { 233 (*tail)->next = f; 234 *tail = f; 235 } 236 237 f->format = t; 238 f->repeat = -1; 239 f->source = fmt->format_string; 240 return f; 241} 242 243 244/* free_format()-- Free allocated format string. */ 245void 246free_format (st_parameter_dt *dtp) 247{ 248 if ((dtp->common.flags & IOPARM_DT_HAS_FORMAT) && dtp->format) 249 { 250 free (dtp->format); 251 dtp->format = NULL; 252 } 253} 254 255 256/* free_format_data()-- Free all allocated format data. */ 257 258void 259free_format_data (format_data *fmt) 260{ 261 fnode_array *fa, *fa_next; 262 fnode *fnp; 263 264 if (fmt == NULL) 265 return; 266 267 /* Free vlist descriptors in the fnode_array if one was allocated. */ 268 for (fnp = fmt->array.array; fnp < &fmt->array.array[FARRAY_SIZE] && 269 fnp->format != FMT_NONE; fnp++) 270 if (fnp->format == FMT_DT) 271 { 272 if (GFC_DESCRIPTOR_DATA(fnp->u.udf.vlist)) 273 free (GFC_DESCRIPTOR_DATA(fnp->u.udf.vlist)); 274 free (fnp->u.udf.vlist); 275 } 276 277 for (fa = fmt->array.next; fa; fa = fa_next) 278 { 279 fa_next = fa->next; 280 free (fa); 281 } 282 283 free (fmt); 284 fmt = NULL; 285} 286 287 288/* format_lex()-- Simple lexical analyzer for getting the next token 289 in a FORMAT string. We support a one-level token pushback in the 290 fmt->saved_token variable. */ 291 292static format_token 293format_lex (format_data *fmt) 294{ 295 format_token token; 296 int negative_flag; 297 int c; 298 char delim; 299 300 if (fmt->saved_token != FMT_NONE) 301 { 302 token = fmt->saved_token; 303 fmt->saved_token = FMT_NONE; 304 return token; 305 } 306 307 negative_flag = 0; 308 c = next_char (fmt, 0); 309 310 switch (c) 311 { 312 case '*': 313 token = FMT_STAR; 314 break; 315 316 case '(': 317 token = FMT_LPAREN; 318 break; 319 320 case ')': 321 token = FMT_RPAREN; 322 break; 323 324 case '-': 325 negative_flag = 1; 326 /* Fall Through */ 327 328 case '+': 329 c = next_char (fmt, 0); 330 if (!safe_isdigit (c)) 331 { 332 token = FMT_UNKNOWN; 333 break; 334 } 335 336 fmt->value = c - '0'; 337 338 for (;;) 339 { 340 c = next_char (fmt, 0); 341 if (!safe_isdigit (c)) 342 break; 343 344 fmt->value = 10 * fmt->value + c - '0'; 345 } 346 347 unget_char (fmt); 348 349 if (negative_flag) 350 fmt->value = -fmt->value; 351 token = FMT_SIGNED_INT; 352 break; 353 354 case '0': 355 case '1': 356 case '2': 357 case '3': 358 case '4': 359 case '5': 360 case '6': 361 case '7': 362 case '8': 363 case '9': 364 fmt->value = c - '0'; 365 366 for (;;) 367 { 368 c = next_char (fmt, 0); 369 if (!safe_isdigit (c)) 370 break; 371 372 fmt->value = 10 * fmt->value + c - '0'; 373 } 374 375 unget_char (fmt); 376 token = (fmt->value == 0) ? FMT_ZERO : FMT_POSINT; 377 break; 378 379 case '.': 380 token = FMT_PERIOD; 381 break; 382 383 case ',': 384 token = FMT_COMMA; 385 break; 386 387 case ':': 388 token = FMT_COLON; 389 break; 390 391 case '/': 392 token = FMT_SLASH; 393 break; 394 395 case '$': 396 token = FMT_DOLLAR; 397 break; 398 399 case 'T': 400 switch (next_char (fmt, 0)) 401 { 402 case 'L': 403 token = FMT_TL; 404 break; 405 case 'R': 406 token = FMT_TR; 407 break; 408 default: 409 token = FMT_T; 410 unget_char (fmt); 411 break; 412 } 413 414 break; 415 416 case 'X': 417 token = FMT_X; 418 break; 419 420 case 'S': 421 switch (next_char (fmt, 0)) 422 { 423 case 'S': 424 token = FMT_SS; 425 break; 426 case 'P': 427 token = FMT_SP; 428 break; 429 default: 430 token = FMT_S; 431 unget_char (fmt); 432 break; 433 } 434 435 break; 436 437 case 'B': 438 switch (next_char (fmt, 0)) 439 { 440 case 'N': 441 token = FMT_BN; 442 break; 443 case 'Z': 444 token = FMT_BZ; 445 break; 446 default: 447 token = FMT_B; 448 unget_char (fmt); 449 break; 450 } 451 452 break; 453 454 case '\'': 455 case '"': 456 delim = c; 457 458 fmt->string = fmt->format_string; 459 fmt->value = 0; /* This is the length of the string */ 460 461 for (;;) 462 { 463 c = next_char (fmt, 1); 464 if (c == -1) 465 { 466 token = FMT_BADSTRING; 467 fmt->error = bad_string; 468 break; 469 } 470 471 if (c == delim) 472 { 473 c = next_char (fmt, 1); 474 475 if (c == -1) 476 { 477 token = FMT_BADSTRING; 478 fmt->error = bad_string; 479 break; 480 } 481 482 if (c != delim) 483 { 484 unget_char (fmt); 485 token = FMT_STRING; 486 break; 487 } 488 } 489 490 fmt->value++; 491 } 492 493 break; 494 495 case 'P': 496 token = FMT_P; 497 break; 498 499 case 'I': 500 token = FMT_I; 501 break; 502 503 case 'O': 504 token = FMT_O; 505 break; 506 507 case 'Z': 508 token = FMT_Z; 509 break; 510 511 case 'F': 512 token = FMT_F; 513 break; 514 515 case 'E': 516 switch (next_char (fmt, 0)) 517 { 518 case 'N': 519 token = FMT_EN; 520 break; 521 case 'S': 522 token = FMT_ES; 523 break; 524 default: 525 token = FMT_E; 526 unget_char (fmt); 527 break; 528 } 529 break; 530 531 case 'G': 532 token = FMT_G; 533 break; 534 535 case 'H': 536 token = FMT_H; 537 break; 538 539 case 'L': 540 token = FMT_L; 541 break; 542 543 case 'A': 544 token = FMT_A; 545 break; 546 547 case 'D': 548 switch (next_char (fmt, 0)) 549 { 550 case 'P': 551 token = FMT_DP; 552 break; 553 case 'C': 554 token = FMT_DC; 555 break; 556 case 'T': 557 token = FMT_DT; 558 break; 559 default: 560 token = FMT_D; 561 unget_char (fmt); 562 break; 563 } 564 break; 565 566 case 'R': 567 switch (next_char (fmt, 0)) 568 { 569 case 'C': 570 token = FMT_RC; 571 break; 572 case 'D': 573 token = FMT_RD; 574 break; 575 case 'N': 576 token = FMT_RN; 577 break; 578 case 'P': 579 token = FMT_RP; 580 break; 581 case 'U': 582 token = FMT_RU; 583 break; 584 case 'Z': 585 token = FMT_RZ; 586 break; 587 default: 588 unget_char (fmt); 589 token = FMT_UNKNOWN; 590 break; 591 } 592 break; 593 594 case -1: 595 token = FMT_END; 596 break; 597 598 default: 599 token = FMT_UNKNOWN; 600 break; 601 } 602 603 return token; 604} 605 606 607/* parse_format_list()-- Parse a format list. Assumes that a left 608 paren has already been seen. Returns a list representing the 609 parenthesis node which contains the rest of the list. */ 610 611static fnode * 612parse_format_list (st_parameter_dt *dtp, bool *seen_dd) 613{ 614 fnode *head, *tail; 615 format_token t, u, t2; 616 int repeat; 617 format_data *fmt = dtp->u.p.fmt; 618 bool seen_data_desc = false; 619 int standard; 620 621 head = tail = NULL; 622 623 /* Get the next format item */ 624 format_item: 625 t = format_lex (fmt); 626 format_item_1: 627 switch (t) 628 { 629 case FMT_STAR: 630 t = format_lex (fmt); 631 if (t != FMT_LPAREN) 632 { 633 fmt->error = "Left parenthesis required after '*'"; 634 goto finished; 635 } 636 get_fnode (fmt, &head, &tail, FMT_LPAREN); 637 tail->repeat = -2; /* Signifies unlimited format. */ 638 tail->u.child = parse_format_list (dtp, &seen_data_desc); 639 *seen_dd = seen_data_desc; 640 if (fmt->error != NULL) 641 goto finished; 642 if (!seen_data_desc) 643 { 644 fmt->error = "'*' requires at least one associated data descriptor"; 645 goto finished; 646 } 647 goto between_desc; 648 649 case FMT_POSINT: 650 repeat = fmt->value; 651 652 t = format_lex (fmt); 653 switch (t) 654 { 655 case FMT_LPAREN: 656 get_fnode (fmt, &head, &tail, FMT_LPAREN); 657 tail->repeat = repeat; 658 tail->u.child = parse_format_list (dtp, &seen_data_desc); 659 *seen_dd = seen_data_desc; 660 if (fmt->error != NULL) 661 goto finished; 662 663 goto between_desc; 664 665 case FMT_SLASH: 666 get_fnode (fmt, &head, &tail, FMT_SLASH); 667 tail->repeat = repeat; 668 goto optional_comma; 669 670 case FMT_X: 671 get_fnode (fmt, &head, &tail, FMT_X); 672 tail->repeat = 1; 673 tail->u.k = fmt->value; 674 goto between_desc; 675 676 case FMT_P: 677 goto p_descriptor; 678 679 default: 680 goto data_desc; 681 } 682 683 case FMT_LPAREN: 684 get_fnode (fmt, &head, &tail, FMT_LPAREN); 685 tail->repeat = 1; 686 tail->u.child = parse_format_list (dtp, &seen_data_desc); 687 *seen_dd = seen_data_desc; 688 if (fmt->error != NULL) 689 goto finished; 690 691 goto between_desc; 692 693 case FMT_SIGNED_INT: /* Signed integer can only precede a P format. */ 694 case FMT_ZERO: /* Same for zero. */ 695 t = format_lex (fmt); 696 if (t != FMT_P) 697 { 698 fmt->error = "Expected P edit descriptor in format"; 699 goto finished; 700 } 701 702 p_descriptor: 703 get_fnode (fmt, &head, &tail, FMT_P); 704 tail->u.k = fmt->value; 705 tail->repeat = 1; 706 707 t = format_lex (fmt); 708 if (t == FMT_F || t == FMT_EN || t == FMT_ES || t == FMT_D 709 || t == FMT_G || t == FMT_E) 710 { 711 repeat = 1; 712 goto data_desc; 713 } 714 715 if (t != FMT_COMMA && t != FMT_RPAREN && t != FMT_SLASH 716 && t != FMT_POSINT) 717 { 718 fmt->error = "Comma required after P descriptor"; 719 goto finished; 720 } 721 722 fmt->saved_token = t; 723 goto optional_comma; 724 725 case FMT_P: /* P and X require a prior number */ 726 fmt->error = "P descriptor requires leading scale factor"; 727 goto finished; 728 729 case FMT_X: 730/* 731 EXTENSION! 732 733 If we would be pedantic in the library, we would have to reject 734 an X descriptor without an integer prefix: 735 736 fmt->error = "X descriptor requires leading space count"; 737 goto finished; 738 739 However, this is an extension supported by many Fortran compilers, 740 including Cray, HP, AIX, and IRIX. Therefore, we allow it in the 741 runtime library, and make the front end reject it if the compiler 742 is in pedantic mode. The interpretation of 'X' is '1X'. 743*/ 744 get_fnode (fmt, &head, &tail, FMT_X); 745 tail->repeat = 1; 746 tail->u.k = 1; 747 goto between_desc; 748 749 case FMT_STRING: 750 get_fnode (fmt, &head, &tail, FMT_STRING); 751 tail->u.string.p = fmt->string; 752 tail->u.string.length = fmt->value; 753 tail->repeat = 1; 754 goto optional_comma; 755 756 case FMT_RC: 757 case FMT_RD: 758 case FMT_RN: 759 case FMT_RP: 760 case FMT_RU: 761 case FMT_RZ: 762 notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: Round " 763 "descriptor not allowed"); 764 get_fnode (fmt, &head, &tail, t); 765 tail->repeat = 1; 766 goto between_desc; 767 768 case FMT_DC: 769 case FMT_DP: 770 notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: DC or DP " 771 "descriptor not allowed"); 772 /* Fall through. */ 773 case FMT_S: 774 case FMT_SS: 775 case FMT_SP: 776 case FMT_BN: 777 case FMT_BZ: 778 get_fnode (fmt, &head, &tail, t); 779 tail->repeat = 1; 780 goto between_desc; 781 782 case FMT_COLON: 783 get_fnode (fmt, &head, &tail, FMT_COLON); 784 tail->repeat = 1; 785 goto optional_comma; 786 787 case FMT_SLASH: 788 get_fnode (fmt, &head, &tail, FMT_SLASH); 789 tail->repeat = 1; 790 tail->u.r = 1; 791 goto optional_comma; 792 793 case FMT_DOLLAR: 794 get_fnode (fmt, &head, &tail, FMT_DOLLAR); 795 tail->repeat = 1; 796 notify_std (&dtp->common, GFC_STD_GNU, "Extension: $ descriptor"); 797 goto between_desc; 798 799 case FMT_T: 800 case FMT_TL: 801 case FMT_TR: 802 t2 = format_lex (fmt); 803 if (t2 != FMT_POSINT) 804 { 805 fmt->error = posint_required; 806 goto finished; 807 } 808 get_fnode (fmt, &head, &tail, t); 809 tail->u.n = fmt->value; 810 tail->repeat = 1; 811 goto between_desc; 812 813 case FMT_I: 814 case FMT_B: 815 case FMT_O: 816 case FMT_Z: 817 case FMT_E: 818 case FMT_EN: 819 case FMT_ES: 820 case FMT_D: 821 case FMT_DT: 822 case FMT_L: 823 case FMT_A: 824 case FMT_F: 825 case FMT_G: 826 repeat = 1; 827 *seen_dd = true; 828 goto data_desc; 829 830 case FMT_H: 831 get_fnode (fmt, &head, &tail, FMT_STRING); 832 if (fmt->format_string_len < 1) 833 { 834 fmt->error = bad_hollerith; 835 goto finished; 836 } 837 838 tail->u.string.p = fmt->format_string; 839 tail->u.string.length = 1; 840 tail->repeat = 1; 841 842 fmt->format_string++; 843 fmt->format_string_len--; 844 845 goto between_desc; 846 847 case FMT_END: 848 fmt->error = unexpected_end; 849 goto finished; 850 851 case FMT_BADSTRING: 852 goto finished; 853 854 case FMT_RPAREN: 855 goto finished; 856 857 default: 858 fmt->error = unexpected_element; 859 goto finished; 860 } 861 862 /* In this state, t must currently be a data descriptor. Deal with 863 things that can/must follow the descriptor */ 864 data_desc: 865 866 switch (t) 867 { 868 case FMT_L: 869 *seen_dd = true; 870 t = format_lex (fmt); 871 if (t != FMT_POSINT) 872 { 873 if (t == FMT_ZERO) 874 { 875 if (notification_std(GFC_STD_GNU) == NOTIFICATION_ERROR) 876 { 877 fmt->error = "Extension: Zero width after L descriptor"; 878 goto finished; 879 } 880 else 881 notify_std (&dtp->common, GFC_STD_GNU, 882 "Zero width after L descriptor"); 883 } 884 else 885 { 886 fmt->saved_token = t; 887 notify_std (&dtp->common, GFC_STD_GNU, 888 "Positive width required with L descriptor"); 889 } 890 fmt->value = 1; /* Default width */ 891 } 892 get_fnode (fmt, &head, &tail, FMT_L); 893 tail->u.n = fmt->value; 894 tail->repeat = repeat; 895 break; 896 897 case FMT_A: 898 *seen_dd = true; 899 t = format_lex (fmt); 900 if (t == FMT_ZERO) 901 { 902 fmt->error = zero_width; 903 goto finished; 904 } 905 906 if (t != FMT_POSINT) 907 { 908 fmt->saved_token = t; 909 fmt->value = -1; /* Width not present */ 910 } 911 912 get_fnode (fmt, &head, &tail, FMT_A); 913 tail->repeat = repeat; 914 tail->u.n = fmt->value; 915 break; 916 917 case FMT_D: 918 case FMT_E: 919 case FMT_F: 920 case FMT_G: 921 case FMT_EN: 922 case FMT_ES: 923 *seen_dd = true; 924 get_fnode (fmt, &head, &tail, t); 925 tail->repeat = repeat; 926 927 u = format_lex (fmt); 928 929 /* Processing for zero width formats. */ 930 if (u == FMT_ZERO) 931 { 932 if (t == FMT_F) 933 standard = GFC_STD_F95; 934 else if (t == FMT_G) 935 standard = GFC_STD_F2008; 936 else 937 standard = GFC_STD_F2018; 938 939 if (notification_std (standard) == NOTIFICATION_ERROR 940 || dtp->u.p.mode == READING) 941 { 942 fmt->error = zero_width; 943 goto finished; 944 } 945 tail->u.real.w = 0; 946 947 /* Look for the dot seperator. */ 948 u = format_lex (fmt); 949 if (u != FMT_PERIOD) 950 { 951 fmt->saved_token = u; 952 break; 953 } 954 955 /* Look for the precision. */ 956 u = format_lex (fmt); 957 if (u != FMT_ZERO && u != FMT_POSINT) 958 { 959 fmt->error = nonneg_required; 960 goto finished; 961 } 962 tail->u.real.d = fmt->value; 963 964 /* Look for optional exponent, not allowed for FMT_D */ 965 if (t == FMT_D) 966 break; 967 u = format_lex (fmt); 968 if (u != FMT_E) 969 fmt->saved_token = u; 970 else 971 { 972 u = format_lex (fmt); 973 if (u != FMT_POSINT) 974 { 975 if (u == FMT_ZERO) 976 { 977 notify_std (&dtp->common, GFC_STD_F2018, 978 "Positive exponent width required"); 979 } 980 else 981 { 982 fmt->error = "Positive exponent width required in " 983 "format string at %L"; 984 goto finished; 985 } 986 } 987 tail->u.real.e = fmt->value; 988 } 989 break; 990 } 991 992 /* Processing for positive width formats. */ 993 if (u == FMT_POSINT) 994 { 995 tail->u.real.w = fmt->value; 996 997 /* Look for the dot separator. Because of legacy behaviors 998 we do some look ahead for missing things. */ 999 t2 = t; 1000 t = format_lex (fmt); 1001 if (t != FMT_PERIOD) 1002 { 1003 /* We treat a missing decimal descriptor as 0. Note: This is only 1004 allowed if -std=legacy, otherwise an error occurs. */ 1005 if (compile_options.warn_std != 0) 1006 { 1007 fmt->error = period_required; 1008 goto finished; 1009 } 1010 fmt->saved_token = t; 1011 tail->u.real.d = 0; 1012 tail->u.real.e = -1; 1013 break; 1014 } 1015 1016 /* If we made it here, we should have the dot so look for the 1017 precision. */ 1018 t = format_lex (fmt); 1019 if (t != FMT_ZERO && t != FMT_POSINT) 1020 { 1021 fmt->error = nonneg_required; 1022 goto finished; 1023 } 1024 tail->u.real.d = fmt->value; 1025 tail->u.real.e = -1; 1026 1027 /* Done with D and F formats. */ 1028 if (t2 == FMT_D || t2 == FMT_F) 1029 { 1030 *seen_dd = true; 1031 break; 1032 } 1033 1034 /* Look for optional exponent */ 1035 u = format_lex (fmt); 1036 if (u != FMT_E) 1037 fmt->saved_token = u; 1038 else 1039 { 1040 u = format_lex (fmt); 1041 if (u != FMT_POSINT) 1042 { 1043 if (u == FMT_ZERO) 1044 { 1045 notify_std (&dtp->common, GFC_STD_F2018, 1046 "Positive exponent width required"); 1047 } 1048 else 1049 { 1050 fmt->error = "Positive exponent width required in " 1051 "format string at %L"; 1052 goto finished; 1053 } 1054 } 1055 tail->u.real.e = fmt->value; 1056 } 1057 break; 1058 } 1059 1060 /* Old DEC codes may not have width or precision specified. */ 1061 if (dtp->u.p.mode == WRITING && (dtp->common.flags & IOPARM_DT_DEC_EXT)) 1062 { 1063 tail->u.real.w = DEFAULT_WIDTH; 1064 tail->u.real.d = 0; 1065 tail->u.real.e = -1; 1066 fmt->saved_token = u; 1067 } 1068 break; 1069 1070 case FMT_DT: 1071 *seen_dd = true; 1072 get_fnode (fmt, &head, &tail, t); 1073 tail->repeat = repeat; 1074 1075 t = format_lex (fmt); 1076 1077 /* Initialize the vlist to a zero size, rank-one array. */ 1078 tail->u.udf.vlist= xmalloc (sizeof(gfc_array_i4) 1079 + sizeof (descriptor_dimension)); 1080 GFC_DESCRIPTOR_DATA(tail->u.udf.vlist) = NULL; 1081 GFC_DIMENSION_SET(tail->u.udf.vlist->dim[0],1, 0, 0); 1082 1083 if (t == FMT_STRING) 1084 { 1085 /* Get pointer to the optional format string. */ 1086 tail->u.udf.string = fmt->string; 1087 tail->u.udf.string_len = fmt->value; 1088 t = format_lex (fmt); 1089 } 1090 if (t == FMT_LPAREN) 1091 { 1092 /* Temporary buffer to hold the vlist values. */ 1093 GFC_INTEGER_4 temp[FARRAY_SIZE]; 1094 int i = 0; 1095 loop: 1096 t = format_lex (fmt); 1097 if (t != FMT_POSINT) 1098 { 1099 fmt->error = posint_required; 1100 goto finished; 1101 } 1102 /* Save the positive integer value. */ 1103 temp[i++] = fmt->value; 1104 t = format_lex (fmt); 1105 if (t == FMT_COMMA) 1106 goto loop; 1107 if (t == FMT_RPAREN) 1108 { 1109 /* We have parsed the complete vlist so initialize the 1110 array descriptor and save it in the format node. */ 1111 gfc_full_array_i4 *vp = tail->u.udf.vlist; 1112 GFC_DESCRIPTOR_DATA(vp) = xmalloc (i * sizeof(GFC_INTEGER_4)); 1113 GFC_DIMENSION_SET(vp->dim[0],1, i, 1); 1114 memcpy (GFC_DESCRIPTOR_DATA(vp), temp, i * sizeof(GFC_INTEGER_4)); 1115 break; 1116 } 1117 fmt->error = unexpected_element; 1118 goto finished; 1119 } 1120 fmt->saved_token = t; 1121 break; 1122 case FMT_H: 1123 if (repeat > fmt->format_string_len) 1124 { 1125 fmt->error = bad_hollerith; 1126 goto finished; 1127 } 1128 1129 get_fnode (fmt, &head, &tail, FMT_STRING); 1130 tail->u.string.p = fmt->format_string; 1131 tail->u.string.length = repeat; 1132 tail->repeat = 1; 1133 1134 fmt->format_string += fmt->value; 1135 fmt->format_string_len -= repeat; 1136 1137 break; 1138 1139 case FMT_I: 1140 case FMT_B: 1141 case FMT_O: 1142 case FMT_Z: 1143 *seen_dd = true; 1144 get_fnode (fmt, &head, &tail, t); 1145 tail->repeat = repeat; 1146 1147 t = format_lex (fmt); 1148 1149 if (dtp->u.p.mode == READING) 1150 { 1151 if (t != FMT_POSINT) 1152 { 1153 if (dtp->common.flags & IOPARM_DT_DEC_EXT) 1154 { 1155 tail->u.integer.w = DEFAULT_WIDTH; 1156 tail->u.integer.m = -1; 1157 fmt->saved_token = t; 1158 break; 1159 } 1160 fmt->error = posint_required; 1161 goto finished; 1162 } 1163 } 1164 else 1165 { 1166 if (t != FMT_ZERO && t != FMT_POSINT) 1167 { 1168 if (dtp->common.flags & IOPARM_DT_DEC_EXT) 1169 { 1170 tail->u.integer.w = DEFAULT_WIDTH; 1171 tail->u.integer.m = -1; 1172 fmt->saved_token = t; 1173 break; 1174 } 1175 fmt->error = nonneg_required; 1176 goto finished; 1177 } 1178 } 1179 1180 tail->u.integer.w = fmt->value; 1181 tail->u.integer.m = -1; 1182 1183 t = format_lex (fmt); 1184 if (t != FMT_PERIOD) 1185 { 1186 fmt->saved_token = t; 1187 } 1188 else 1189 { 1190 t = format_lex (fmt); 1191 if (t != FMT_ZERO && t != FMT_POSINT) 1192 { 1193 fmt->error = nonneg_required; 1194 goto finished; 1195 } 1196 1197 tail->u.integer.m = fmt->value; 1198 } 1199 1200 if (tail->u.integer.w != 0 && tail->u.integer.m > tail->u.integer.w) 1201 { 1202 fmt->error = "Minimum digits exceeds field width"; 1203 goto finished; 1204 } 1205 1206 break; 1207 1208 default: 1209 fmt->error = unexpected_element; 1210 goto finished; 1211 } 1212 1213 /* Between a descriptor and what comes next */ 1214 between_desc: 1215 t = format_lex (fmt); 1216 switch (t) 1217 { 1218 case FMT_COMMA: 1219 goto format_item; 1220 1221 case FMT_RPAREN: 1222 goto finished; 1223 1224 case FMT_SLASH: 1225 case FMT_COLON: 1226 get_fnode (fmt, &head, &tail, t); 1227 tail->repeat = 1; 1228 goto optional_comma; 1229 1230 case FMT_END: 1231 fmt->error = unexpected_end; 1232 goto finished; 1233 1234 default: 1235 /* Assume a missing comma, this is a GNU extension */ 1236 goto format_item_1; 1237 } 1238 1239 /* Optional comma is a weird between state where we've just finished 1240 reading a colon, slash or P descriptor. */ 1241 optional_comma: 1242 t = format_lex (fmt); 1243 switch (t) 1244 { 1245 case FMT_COMMA: 1246 break; 1247 1248 case FMT_RPAREN: 1249 goto finished; 1250 1251 default: /* Assume that we have another format item */ 1252 fmt->saved_token = t; 1253 break; 1254 } 1255 1256 goto format_item; 1257 1258 finished: 1259 1260 return head; 1261} 1262 1263 1264/* format_error()-- Generate an error message for a format statement. 1265 If the node that gives the location of the error is NULL, the error 1266 is assumed to happen at parse time, and the current location of the 1267 parser is shown. 1268 1269 We generate a message showing where the problem is. We take extra 1270 care to print only the relevant part of the format if it is longer 1271 than a standard 80 column display. */ 1272 1273void 1274format_error (st_parameter_dt *dtp, const fnode *f, const char *message) 1275{ 1276 int width, i, offset; 1277#define BUFLEN 300 1278 char *p, buffer[BUFLEN]; 1279 format_data *fmt = dtp->u.p.fmt; 1280 1281 if (f != NULL) 1282 p = f->source; 1283 else /* This should not happen. */ 1284 p = dtp->format; 1285 1286 if (message == unexpected_element) 1287 snprintf (buffer, BUFLEN, message, fmt->error_element); 1288 else 1289 snprintf (buffer, BUFLEN, "%s\n", message); 1290 1291 /* Get the offset into the format string where the error occurred. */ 1292 offset = dtp->format_len - (fmt->reversion_ok ? 1293 (int) strlen(p) : fmt->format_string_len); 1294 1295 width = dtp->format_len; 1296 1297 if (width > 80) 1298 width = 80; 1299 1300 /* Show the format */ 1301 1302 p = strchr (buffer, '\0'); 1303 1304 if (dtp->format) 1305 memcpy (p, dtp->format, width); 1306 1307 p += width; 1308 *p++ = '\n'; 1309 1310 /* Show where the problem is */ 1311 1312 for (i = 1; i < offset; i++) 1313 *p++ = ' '; 1314 1315 *p++ = '^'; 1316 *p = '\0'; 1317 1318 generate_error (&dtp->common, LIBERROR_FORMAT, buffer); 1319} 1320 1321 1322/* revert()-- Do reversion of the format. Control reverts to the left 1323 parenthesis that matches the rightmost right parenthesis. From our 1324 tree structure, we are looking for the rightmost parenthesis node 1325 at the second level, the first level always being a single 1326 parenthesis node. If this node doesn't exit, we use the top 1327 level. */ 1328 1329static void 1330revert (st_parameter_dt *dtp) 1331{ 1332 fnode *f, *r; 1333 format_data *fmt = dtp->u.p.fmt; 1334 1335 dtp->u.p.reversion_flag = 1; 1336 1337 r = NULL; 1338 1339 for (f = fmt->array.array[0].u.child; f; f = f->next) 1340 if (f->format == FMT_LPAREN) 1341 r = f; 1342 1343 /* If r is NULL because no node was found, the whole tree will be used */ 1344 1345 fmt->array.array[0].current = r; 1346 fmt->array.array[0].count = 0; 1347} 1348 1349/* parse_format()-- Parse a format string. */ 1350 1351void 1352parse_format (st_parameter_dt *dtp) 1353{ 1354 format_data *fmt; 1355 bool format_cache_ok, seen_data_desc = false; 1356 1357 /* Don't cache for internal units and set an arbitrary limit on the 1358 size of format strings we will cache. (Avoids memory issues.) 1359 Also, the format_hash_table resides in the current_unit, so 1360 child_dtio procedures would overwrite the parent table */ 1361 format_cache_ok = !is_internal_unit (dtp) 1362 && (dtp->u.p.current_unit->child_dtio == 0); 1363 1364 /* Lookup format string to see if it has already been parsed. */ 1365 if (format_cache_ok) 1366 { 1367 dtp->u.p.fmt = find_parsed_format (dtp); 1368 1369 if (dtp->u.p.fmt != NULL) 1370 { 1371 dtp->u.p.fmt->reversion_ok = 0; 1372 dtp->u.p.fmt->saved_token = FMT_NONE; 1373 dtp->u.p.fmt->saved_format = NULL; 1374 reset_fnode_counters (dtp); 1375 return; 1376 } 1377 } 1378 1379 /* Not found so proceed as follows. */ 1380 1381 char *fmt_string = fc_strdup_notrim (dtp->format, dtp->format_len); 1382 dtp->format = fmt_string; 1383 1384 dtp->u.p.fmt = fmt = xmalloc (sizeof (format_data)); 1385 fmt->format_string = dtp->format; 1386 fmt->format_string_len = dtp->format_len; 1387 1388 fmt->string = NULL; 1389 fmt->saved_token = FMT_NONE; 1390 fmt->error = NULL; 1391 fmt->value = 0; 1392 1393 /* Initialize variables used during traversal of the tree. */ 1394 1395 fmt->reversion_ok = 0; 1396 fmt->saved_format = NULL; 1397 1398 /* Initialize the fnode_array. */ 1399 1400 memset (&(fmt->array), 0, sizeof(fmt->array)); 1401 1402 /* Allocate the first format node as the root of the tree. */ 1403 1404 fmt->last = &fmt->array; 1405 fmt->last->next = NULL; 1406 fmt->avail = &fmt->array.array[0]; 1407 1408 memset (fmt->avail, 0, sizeof (*fmt->avail)); 1409 fmt->avail->format = FMT_LPAREN; 1410 fmt->avail->repeat = 1; 1411 fmt->avail++; 1412 1413 if (format_lex (fmt) == FMT_LPAREN) 1414 fmt->array.array[0].u.child = parse_format_list (dtp, &seen_data_desc); 1415 else 1416 fmt->error = "Missing initial left parenthesis in format"; 1417 1418 if (format_cache_ok) 1419 save_parsed_format (dtp); 1420 else 1421 dtp->u.p.format_not_saved = 1; 1422 1423 if (fmt->error) 1424 format_error (dtp, NULL, fmt->error); 1425} 1426 1427 1428/* next_format0()-- Get the next format node without worrying about 1429 reversion. Returns NULL when we hit the end of the list. 1430 Parenthesis nodes are incremented after the list has been 1431 exhausted, other nodes are incremented before they are returned. */ 1432 1433static const fnode * 1434next_format0 (fnode *f) 1435{ 1436 const fnode *r; 1437 1438 if (f == NULL) 1439 return NULL; 1440 1441 if (f->format != FMT_LPAREN) 1442 { 1443 f->count++; 1444 if (f->count <= f->repeat) 1445 return f; 1446 1447 f->count = 0; 1448 return NULL; 1449 } 1450 1451 /* Deal with a parenthesis node with unlimited format. */ 1452 1453 if (f->repeat == -2) /* -2 signifies unlimited. */ 1454 for (;;) 1455 { 1456 if (f->current == NULL) 1457 f->current = f->u.child; 1458 1459 for (; f->current != NULL; f->current = f->current->next) 1460 { 1461 r = next_format0 (f->current); 1462 if (r != NULL) 1463 return r; 1464 } 1465 } 1466 1467 /* Deal with a parenthesis node with specific repeat count. */ 1468 for (; f->count < f->repeat; f->count++) 1469 { 1470 if (f->current == NULL) 1471 f->current = f->u.child; 1472 1473 for (; f->current != NULL; f->current = f->current->next) 1474 { 1475 r = next_format0 (f->current); 1476 if (r != NULL) 1477 return r; 1478 } 1479 } 1480 1481 f->count = 0; 1482 return NULL; 1483} 1484 1485 1486/* next_format()-- Return the next format node. If the format list 1487 ends up being exhausted, we do reversion. Reversion is only 1488 allowed if we've seen a data descriptor since the 1489 initialization or the last reversion. We return NULL if there 1490 are no more data descriptors to return (which is an error 1491 condition). */ 1492 1493const fnode * 1494next_format (st_parameter_dt *dtp) 1495{ 1496 format_token t; 1497 const fnode *f; 1498 format_data *fmt = dtp->u.p.fmt; 1499 1500 if (fmt->saved_format != NULL) 1501 { /* Deal with a pushed-back format node */ 1502 f = fmt->saved_format; 1503 fmt->saved_format = NULL; 1504 goto done; 1505 } 1506 1507 f = next_format0 (&fmt->array.array[0]); 1508 if (f == NULL) 1509 { 1510 if (!fmt->reversion_ok) 1511 return NULL; 1512 1513 fmt->reversion_ok = 0; 1514 revert (dtp); 1515 1516 f = next_format0 (&fmt->array.array[0]); 1517 if (f == NULL) 1518 { 1519 format_error (dtp, NULL, reversion_error); 1520 return NULL; 1521 } 1522 1523 /* Push the first reverted token and return a colon node in case 1524 there are no more data items. */ 1525 1526 fmt->saved_format = f; 1527 return &colon_node; 1528 } 1529 1530 /* If this is a data edit descriptor, then reversion has become OK. */ 1531 done: 1532 t = f->format; 1533 1534 if (!fmt->reversion_ok && 1535 (t == FMT_I || t == FMT_B || t == FMT_O || t == FMT_Z || t == FMT_F || 1536 t == FMT_E || t == FMT_EN || t == FMT_ES || t == FMT_G || t == FMT_L || 1537 t == FMT_A || t == FMT_D || t == FMT_DT)) 1538 fmt->reversion_ok = 1; 1539 return f; 1540} 1541 1542 1543/* unget_format()-- Push the given format back so that it will be 1544 returned on the next call to next_format() without affecting 1545 counts. This is necessary when we've encountered a data 1546 descriptor, but don't know what the data item is yet. The format 1547 node is pushed back, and we return control to the main program, 1548 which calls the library back with the data item (or not). */ 1549 1550void 1551unget_format (st_parameter_dt *dtp, const fnode *f) 1552{ 1553 dtp->u.p.fmt->saved_format = f; 1554} 1555 1556