app.c revision 77298
1/* This is the Assembler Pre-Processor 2 Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 2000 3 Free Software Foundation, Inc. 4 5 This file is part of GAS, the GNU Assembler. 6 7 GAS is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2, or (at your option) 10 any later version. 11 12 GAS is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GAS; see the file COPYING. If not, write to the Free 19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA 20 02111-1307, USA. */ 21 22/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */ 23/* App, the assembler pre-processor. This pre-processor strips out excess 24 spaces, turns single-quoted characters into a decimal constant, and turns 25 # <number> <filename> <garbage> into a .line <number>\n.file <filename> 26 pair. This needs better error-handling. */ 27 28#include <stdio.h> 29#include "as.h" /* For BAD_CASE() only */ 30 31#if (__STDC__ != 1) 32#ifndef const 33#define const /* empty */ 34#endif 35#endif 36 37#ifdef TC_M68K 38/* Whether we are scrubbing in m68k MRI mode. This is different from 39 flag_m68k_mri, because the two flags will be affected by the .mri 40 pseudo-op at different times. */ 41static int scrub_m68k_mri; 42#else 43#define scrub_m68k_mri 0 44#endif 45 46/* The pseudo-op which switches in and out of MRI mode. See the 47 comment in do_scrub_chars. */ 48static const char mri_pseudo[] = ".mri 0"; 49 50#if defined TC_ARM && defined OBJ_ELF 51/* The pseudo-op for which we need to special-case `@' characters. 52 See the comment in do_scrub_chars. */ 53static const char symver_pseudo[] = ".symver"; 54static const char * symver_state; 55#endif 56 57static char lex[256]; 58static const char symbol_chars[] = 59"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; 60 61#define LEX_IS_SYMBOL_COMPONENT 1 62#define LEX_IS_WHITESPACE 2 63#define LEX_IS_LINE_SEPARATOR 3 64#define LEX_IS_COMMENT_START 4 65#define LEX_IS_LINE_COMMENT_START 5 66#define LEX_IS_TWOCHAR_COMMENT_1ST 6 67#define LEX_IS_STRINGQUOTE 8 68#define LEX_IS_COLON 9 69#define LEX_IS_NEWLINE 10 70#define LEX_IS_ONECHAR_QUOTE 11 71#ifdef TC_V850 72#define LEX_IS_DOUBLEDASH_1ST 12 73#endif 74#ifdef TC_M32R 75#define DOUBLEBAR_PARALLEL 76#endif 77#ifdef DOUBLEBAR_PARALLEL 78#define LEX_IS_DOUBLEBAR_1ST 13 79#endif 80#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT) 81#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE) 82#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR) 83#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START) 84#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START) 85#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE) 86 87static int process_escape PARAMS ((int)); 88 89/* FIXME-soon: The entire lexer/parser thingy should be 90 built statically at compile time rather than dynamically 91 each and every time the assembler is run. xoxorich. */ 92 93void 94do_scrub_begin (m68k_mri) 95 int m68k_mri ATTRIBUTE_UNUSED; 96{ 97 const char *p; 98 int c; 99 100 lex[' '] = LEX_IS_WHITESPACE; 101 lex['\t'] = LEX_IS_WHITESPACE; 102 lex['\r'] = LEX_IS_WHITESPACE; 103 lex['\n'] = LEX_IS_NEWLINE; 104 lex[':'] = LEX_IS_COLON; 105 106#ifdef TC_M68K 107 scrub_m68k_mri = m68k_mri; 108 109 if (! m68k_mri) 110#endif 111 { 112 lex['"'] = LEX_IS_STRINGQUOTE; 113 114#if ! defined (TC_HPPA) && ! defined (TC_I370) 115 /* I370 uses single-quotes to delimit integer, float constants */ 116 lex['\''] = LEX_IS_ONECHAR_QUOTE; 117#endif 118 119#ifdef SINGLE_QUOTE_STRINGS 120 lex['\''] = LEX_IS_STRINGQUOTE; 121#endif 122 } 123 124 /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop 125 in state 5 of do_scrub_chars must be changed. */ 126 127 /* Note that these override the previous defaults, e.g. if ';' is a 128 comment char, then it isn't a line separator. */ 129 for (p = symbol_chars; *p; ++p) 130 { 131 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; 132 } /* declare symbol characters */ 133 134 for (c = 128; c < 256; ++c) 135 lex[c] = LEX_IS_SYMBOL_COMPONENT; 136 137#ifdef tc_symbol_chars 138 /* This macro permits the processor to specify all characters which 139 may appears in an operand. This will prevent the scrubber from 140 discarding meaningful whitespace in certain cases. The i386 141 backend uses this to support prefixes, which can confuse the 142 scrubber as to whether it is parsing operands or opcodes. */ 143 for (p = tc_symbol_chars; *p; ++p) 144 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; 145#endif 146 147 /* The m68k backend wants to be able to change comment_chars. */ 148#ifndef tc_comment_chars 149#define tc_comment_chars comment_chars 150#endif 151 for (p = tc_comment_chars; *p; p++) 152 { 153 lex[(unsigned char) *p] = LEX_IS_COMMENT_START; 154 } /* declare comment chars */ 155 156 for (p = line_comment_chars; *p; p++) 157 { 158 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START; 159 } /* declare line comment chars */ 160 161 for (p = line_separator_chars; *p; p++) 162 { 163 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR; 164 } /* declare line separators */ 165 166 /* Only allow slash-star comments if slash is not in use. 167 FIXME: This isn't right. We should always permit them. */ 168 if (lex['/'] == 0) 169 { 170 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST; 171 } 172 173#ifdef TC_M68K 174 if (m68k_mri) 175 { 176 lex['\''] = LEX_IS_STRINGQUOTE; 177 lex[';'] = LEX_IS_COMMENT_START; 178 lex['*'] = LEX_IS_LINE_COMMENT_START; 179 /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but 180 then it can't be used in an expression. */ 181 lex['!'] = LEX_IS_LINE_COMMENT_START; 182 } 183#endif 184 185#ifdef TC_V850 186 lex['-'] = LEX_IS_DOUBLEDASH_1ST; 187#endif 188#ifdef DOUBLEBAR_PARALLEL 189 lex['|'] = LEX_IS_DOUBLEBAR_1ST; 190#endif 191#ifdef TC_D30V 192 /* must do this is we want VLIW instruction with "->" or "<-" */ 193 lex['-'] = LEX_IS_SYMBOL_COMPONENT; 194#endif 195} /* do_scrub_begin() */ 196 197/* Saved state of the scrubber */ 198static int state; 199static int old_state; 200static char *out_string; 201static char out_buf[20]; 202static int add_newlines; 203static char *saved_input; 204static int saved_input_len; 205static char input_buffer[32 * 1024]; 206static const char *mri_state; 207static char mri_last_ch; 208 209/* Data structure for saving the state of app across #include's. Note that 210 app is called asynchronously to the parsing of the .include's, so our 211 state at the time .include is interpreted is completely unrelated. 212 That's why we have to save it all. */ 213 214struct app_save { 215 int state; 216 int old_state; 217 char * out_string; 218 char out_buf[sizeof (out_buf)]; 219 int add_newlines; 220 char * saved_input; 221 int saved_input_len; 222#ifdef TC_M68K 223 int scrub_m68k_mri; 224#endif 225 const char * mri_state; 226 char mri_last_ch; 227#if defined TC_ARM && defined OBJ_ELF 228 const char * symver_state; 229#endif 230}; 231 232char * 233app_push () 234{ 235 register struct app_save *saved; 236 237 saved = (struct app_save *) xmalloc (sizeof (*saved)); 238 saved->state = state; 239 saved->old_state = old_state; 240 saved->out_string = out_string; 241 memcpy (saved->out_buf, out_buf, sizeof (out_buf)); 242 saved->add_newlines = add_newlines; 243 if (saved_input == NULL) 244 saved->saved_input = NULL; 245 else 246 { 247 saved->saved_input = xmalloc (saved_input_len); 248 memcpy (saved->saved_input, saved_input, saved_input_len); 249 saved->saved_input_len = saved_input_len; 250 } 251#ifdef TC_M68K 252 saved->scrub_m68k_mri = scrub_m68k_mri; 253#endif 254 saved->mri_state = mri_state; 255 saved->mri_last_ch = mri_last_ch; 256#if defined TC_ARM && defined OBJ_ELF 257 saved->symver_state = symver_state; 258#endif 259 260 /* do_scrub_begin() is not useful, just wastes time. */ 261 262 state = 0; 263 saved_input = NULL; 264 265 return (char *) saved; 266} 267 268void 269app_pop (arg) 270 char *arg; 271{ 272 register struct app_save *saved = (struct app_save *) arg; 273 274 /* There is no do_scrub_end (). */ 275 state = saved->state; 276 old_state = saved->old_state; 277 out_string = saved->out_string; 278 memcpy (out_buf, saved->out_buf, sizeof (out_buf)); 279 add_newlines = saved->add_newlines; 280 if (saved->saved_input == NULL) 281 saved_input = NULL; 282 else 283 { 284 assert (saved->saved_input_len <= (int) (sizeof input_buffer)); 285 memcpy (input_buffer, saved->saved_input, saved->saved_input_len); 286 saved_input = input_buffer; 287 saved_input_len = saved->saved_input_len; 288 free (saved->saved_input); 289 } 290#ifdef TC_M68K 291 scrub_m68k_mri = saved->scrub_m68k_mri; 292#endif 293 mri_state = saved->mri_state; 294 mri_last_ch = saved->mri_last_ch; 295#if defined TC_ARM && defined OBJ_ELF 296 symver_state = saved->symver_state; 297#endif 298 299 free (arg); 300} /* app_pop() */ 301 302/* @@ This assumes that \n &c are the same on host and target. This is not 303 necessarily true. */ 304static int 305process_escape (ch) 306 int ch; 307{ 308 switch (ch) 309 { 310 case 'b': 311 return '\b'; 312 case 'f': 313 return '\f'; 314 case 'n': 315 return '\n'; 316 case 'r': 317 return '\r'; 318 case 't': 319 return '\t'; 320 case '\'': 321 return '\''; 322 case '"': 323 return '\"'; 324 default: 325 return ch; 326 } 327} 328 329/* This function is called to process input characters. The GET 330 parameter is used to retrieve more input characters. GET should 331 set its parameter to point to a buffer, and return the length of 332 the buffer; it should return 0 at end of file. The scrubbed output 333 characters are put into the buffer starting at TOSTART; the TOSTART 334 buffer is TOLEN bytes in length. The function returns the number 335 of scrubbed characters put into TOSTART. This will be TOLEN unless 336 end of file was seen. This function is arranged as a state 337 machine, and saves its state so that it may return at any point. 338 This is the way the old code used to work. */ 339 340int 341do_scrub_chars (get, tostart, tolen) 342 int (*get) PARAMS ((char *, int)); 343 char *tostart; 344 int tolen; 345{ 346 char *to = tostart; 347 char *toend = tostart + tolen; 348 char *from; 349 char *fromend; 350 int fromlen; 351 register int ch, ch2 = 0; 352 353 /*State 0: beginning of normal line 354 1: After first whitespace on line (flush more white) 355 2: After first non-white (opcode) on line (keep 1white) 356 3: after second white on line (into operands) (flush white) 357 4: after putting out a .line, put out digits 358 5: parsing a string, then go to old-state 359 6: putting out \ escape in a "d string. 360 7: After putting out a .appfile, put out string. 361 8: After putting out a .appfile string, flush until newline. 362 9: After seeing symbol char in state 3 (keep 1white after symchar) 363 10: After seeing whitespace in state 9 (keep white before symchar) 364 11: After seeing a symbol character in state 0 (eg a label definition) 365 -1: output string in out_string and go to the state in old_state 366 -2: flush text until a '*' '/' is seen, then go to state old_state 367#ifdef TC_V850 368 12: After seeing a dash, looking for a second dash as a start of comment. 369#endif 370#ifdef DOUBLEBAR_PARALLEL 371 13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator. 372#endif 373 */ 374 375 /* I added states 9 and 10 because the MIPS ECOFF assembler uses 376 constructs like ``.loc 1 20''. This was turning into ``.loc 377 120''. States 9 and 10 ensure that a space is never dropped in 378 between characters which could appear in a identifier. Ian 379 Taylor, ian@cygnus.com. 380 381 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works 382 correctly on the PA (and any other target where colons are optional). 383 Jeff Law, law@cs.utah.edu. 384 385 I added state 13 so that something like "cmp r1, r2 || trap #1" does not 386 get squashed into "cmp r1,r2||trap#1", with the all important space 387 between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */ 388 389 /* This macro gets the next input character. */ 390 391#define GET() \ 392 (from < fromend \ 393 ? * (unsigned char *) (from++) \ 394 : (saved_input = NULL, \ 395 fromlen = (*get) (input_buffer, sizeof input_buffer), \ 396 from = input_buffer, \ 397 fromend = from + fromlen, \ 398 (fromlen == 0 \ 399 ? EOF \ 400 : * (unsigned char *) (from++)))) 401 402 /* This macro pushes a character back on the input stream. */ 403 404#define UNGET(uch) (*--from = (uch)) 405 406 /* This macro puts a character into the output buffer. If this 407 character fills the output buffer, this macro jumps to the label 408 TOFULL. We use this rather ugly approach because we need to 409 handle two different termination conditions: EOF on the input 410 stream, and a full output buffer. It would be simpler if we 411 always read in the entire input stream before processing it, but 412 I don't want to make such a significant change to the assembler's 413 memory usage. */ 414 415#define PUT(pch) \ 416 do \ 417 { \ 418 *to++ = (pch); \ 419 if (to >= toend) \ 420 goto tofull; \ 421 } \ 422 while (0) 423 424 if (saved_input != NULL) 425 { 426 from = saved_input; 427 fromend = from + saved_input_len; 428 } 429 else 430 { 431 fromlen = (*get) (input_buffer, sizeof input_buffer); 432 if (fromlen == 0) 433 return 0; 434 from = input_buffer; 435 fromend = from + fromlen; 436 } 437 438 while (1) 439 { 440 /* The cases in this switch end with continue, in order to 441 branch back to the top of this while loop and generate the 442 next output character in the appropriate state. */ 443 switch (state) 444 { 445 case -1: 446 ch = *out_string++; 447 if (*out_string == '\0') 448 { 449 state = old_state; 450 old_state = 3; 451 } 452 PUT (ch); 453 continue; 454 455 case -2: 456 for (;;) 457 { 458 do 459 { 460 ch = GET (); 461 462 if (ch == EOF) 463 { 464 as_warn (_("end of file in comment")); 465 goto fromeof; 466 } 467 468 if (ch == '\n') 469 PUT ('\n'); 470 } 471 while (ch != '*'); 472 473 while ((ch = GET ()) == '*') 474 ; 475 476 if (ch == EOF) 477 { 478 as_warn (_("end of file in comment")); 479 goto fromeof; 480 } 481 482 if (ch == '/') 483 break; 484 485 UNGET (ch); 486 } 487 488 state = old_state; 489 UNGET (' '); 490 continue; 491 492 case 4: 493 ch = GET (); 494 if (ch == EOF) 495 goto fromeof; 496 else if (ch >= '0' && ch <= '9') 497 PUT (ch); 498 else 499 { 500 while (ch != EOF && IS_WHITESPACE (ch)) 501 ch = GET (); 502 if (ch == '"') 503 { 504 UNGET (ch); 505 if (scrub_m68k_mri) 506 out_string = "\n\tappfile "; 507 else 508 out_string = "\n\t.appfile "; 509 old_state = 7; 510 state = -1; 511 PUT (*out_string++); 512 } 513 else 514 { 515 while (ch != EOF && ch != '\n') 516 ch = GET (); 517 state = 0; 518 PUT (ch); 519 } 520 } 521 continue; 522 523 case 5: 524 /* We are going to copy everything up to a quote character, 525 with special handling for a backslash. We try to 526 optimize the copying in the simple case without using the 527 GET and PUT macros. */ 528 { 529 char *s; 530 int len; 531 532 for (s = from; s < fromend; s++) 533 { 534 ch = *s; 535 /* This condition must be changed if the type of any 536 other character can be LEX_IS_STRINGQUOTE. */ 537 if (ch == '\\' 538 || ch == '"' 539 || ch == '\'' 540 || ch == '\n') 541 break; 542 } 543 len = s - from; 544 if (len > toend - to) 545 len = toend - to; 546 if (len > 0) 547 { 548 memcpy (to, from, len); 549 to += len; 550 from += len; 551 } 552 } 553 554 ch = GET (); 555 if (ch == EOF) 556 { 557 as_warn (_("end of file in string: inserted '\"'")); 558 state = old_state; 559 UNGET ('\n'); 560 PUT ('"'); 561 } 562 else if (lex[ch] == LEX_IS_STRINGQUOTE) 563 { 564 state = old_state; 565 PUT (ch); 566 } 567#ifndef NO_STRING_ESCAPES 568 else if (ch == '\\') 569 { 570 state = 6; 571 PUT (ch); 572 } 573#endif 574 else if (scrub_m68k_mri && ch == '\n') 575 { 576 /* Just quietly terminate the string. This permits lines like 577 bne label loop if we haven't reach end yet 578 */ 579 state = old_state; 580 UNGET (ch); 581 PUT ('\''); 582 } 583 else 584 { 585 PUT (ch); 586 } 587 continue; 588 589 case 6: 590 state = 5; 591 ch = GET (); 592 switch (ch) 593 { 594 /* Handle strings broken across lines, by turning '\n' into 595 '\\' and 'n'. */ 596 case '\n': 597 UNGET ('n'); 598 add_newlines++; 599 PUT ('\\'); 600 continue; 601 602 case '"': 603 case '\\': 604 case 'b': 605 case 'f': 606 case 'n': 607 case 'r': 608 case 't': 609 case 'v': 610 case 'x': 611 case 'X': 612 case '0': 613 case '1': 614 case '2': 615 case '3': 616 case '4': 617 case '5': 618 case '6': 619 case '7': 620 break; 621#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES) 622 default: 623 as_warn (_("Unknown escape '\\%c' in string: Ignored"), ch); 624 break; 625#else /* ONLY_STANDARD_ESCAPES */ 626 default: 627 /* Accept \x as x for any x */ 628 break; 629#endif /* ONLY_STANDARD_ESCAPES */ 630 631 case EOF: 632 as_warn (_("End of file in string: '\"' inserted")); 633 PUT ('"'); 634 continue; 635 } 636 PUT (ch); 637 continue; 638 639 case 7: 640 ch = GET (); 641 state = 5; 642 old_state = 8; 643 if (ch == EOF) 644 goto fromeof; 645 PUT (ch); 646 continue; 647 648 case 8: 649 do 650 ch = GET (); 651 while (ch != '\n' && ch != EOF); 652 if (ch == EOF) 653 goto fromeof; 654 state = 0; 655 PUT (ch); 656 continue; 657 } 658 659 /* OK, we are somewhere in states 0 through 4 or 9 through 11 */ 660 661 /* flushchar: */ 662 ch = GET (); 663 664 recycle: 665 666#if defined TC_ARM && defined OBJ_ELF 667 /* We need to watch out for .symver directives. See the comment later 668 in this function. */ 669 if (symver_state == NULL) 670 { 671 if ((state == 0 || state == 1) && ch == symver_pseudo[0]) 672 symver_state = symver_pseudo + 1; 673 } 674 else 675 { 676 /* We advance to the next state if we find the right 677 character. */ 678 if (ch != '\0' && (*symver_state == ch)) 679 ++symver_state; 680 else if (*symver_state != '\0') 681 /* We did not get the expected character, or we didn't 682 get a valid terminating character after seeing the 683 entire pseudo-op, so we must go back to the beginning. */ 684 symver_state = NULL; 685 else 686 { 687 /* We've read the entire pseudo-op. If this is the end 688 of the line, go back to the beginning. */ 689 if (IS_NEWLINE (ch)) 690 symver_state = NULL; 691 } 692 } 693#endif /* TC_ARM && OBJ_ELF */ 694 695#ifdef TC_M68K 696 /* We want to have pseudo-ops which control whether we are in 697 MRI mode or not. Unfortunately, since m68k MRI mode affects 698 the scrubber, that means that we need a special purpose 699 recognizer here. */ 700 if (mri_state == NULL) 701 { 702 if ((state == 0 || state == 1) 703 && ch == mri_pseudo[0]) 704 mri_state = mri_pseudo + 1; 705 } 706 else 707 { 708 /* We advance to the next state if we find the right 709 character, or if we need a space character and we get any 710 whitespace character, or if we need a '0' and we get a 711 '1' (this is so that we only need one state to handle 712 ``.mri 0'' and ``.mri 1''). */ 713 if (ch != '\0' 714 && (*mri_state == ch 715 || (*mri_state == ' ' 716 && lex[ch] == LEX_IS_WHITESPACE) 717 || (*mri_state == '0' 718 && ch == '1'))) 719 { 720 mri_last_ch = ch; 721 ++mri_state; 722 } 723 else if (*mri_state != '\0' 724 || (lex[ch] != LEX_IS_WHITESPACE 725 && lex[ch] != LEX_IS_NEWLINE)) 726 { 727 /* We did not get the expected character, or we didn't 728 get a valid terminating character after seeing the 729 entire pseudo-op, so we must go back to the 730 beginning. */ 731 mri_state = NULL; 732 } 733 else 734 { 735 /* We've read the entire pseudo-op. mips_last_ch is 736 either '0' or '1' indicating whether to enter or 737 leave MRI mode. */ 738 do_scrub_begin (mri_last_ch == '1'); 739 mri_state = NULL; 740 741 /* We continue handling the character as usual. The 742 main gas reader must also handle the .mri pseudo-op 743 to control expression parsing and the like. */ 744 } 745 } 746#endif 747 748 if (ch == EOF) 749 { 750 if (state != 0) 751 { 752 as_warn (_("end of file not at end of a line; newline inserted")); 753 state = 0; 754 PUT ('\n'); 755 } 756 goto fromeof; 757 } 758 759 switch (lex[ch]) 760 { 761 case LEX_IS_WHITESPACE: 762 do 763 { 764 ch = GET (); 765 } 766 while (ch != EOF && IS_WHITESPACE (ch)); 767 if (ch == EOF) 768 goto fromeof; 769 770 if (state == 0) 771 { 772 /* Preserve a single whitespace character at the 773 beginning of a line. */ 774 state = 1; 775 UNGET (ch); 776 PUT (' '); 777 break; 778 } 779 780#ifdef KEEP_WHITE_AROUND_COLON 781 if (lex[ch] == LEX_IS_COLON) 782 { 783 /* Only keep this white if there's no white *after* the 784 colon. */ 785 ch2 = GET (); 786 UNGET (ch2); 787 if (!IS_WHITESPACE (ch2)) 788 { 789 state = 9; 790 UNGET (ch); 791 PUT (' '); 792 break; 793 } 794 } 795#endif 796 if (IS_COMMENT (ch) 797 || ch == '/' 798 || IS_LINE_SEPARATOR (ch)) 799 { 800 if (scrub_m68k_mri) 801 { 802 /* In MRI mode, we keep these spaces. */ 803 UNGET (ch); 804 PUT (' '); 805 break; 806 } 807 goto recycle; 808 } 809 810 /* If we're in state 2 or 11, we've seen a non-white 811 character followed by whitespace. If the next character 812 is ':', this is whitespace after a label name which we 813 normally must ignore. In MRI mode, though, spaces are 814 not permitted between the label and the colon. */ 815 if ((state == 2 || state == 11) 816 && lex[ch] == LEX_IS_COLON 817 && ! scrub_m68k_mri) 818 { 819 state = 1; 820 PUT (ch); 821 break; 822 } 823 824 switch (state) 825 { 826 case 0: 827 state++; 828 goto recycle; /* Punted leading sp */ 829 case 1: 830 /* We can arrive here if we leave a leading whitespace 831 character at the beginning of a line. */ 832 goto recycle; 833 case 2: 834 state = 3; 835 if (to + 1 < toend) 836 { 837 /* Optimize common case by skipping UNGET/GET. */ 838 PUT (' '); /* Sp after opco */ 839 goto recycle; 840 } 841 UNGET (ch); 842 PUT (' '); 843 break; 844 case 3: 845 if (scrub_m68k_mri) 846 { 847 /* In MRI mode, we keep these spaces. */ 848 UNGET (ch); 849 PUT (' '); 850 break; 851 } 852 goto recycle; /* Sp in operands */ 853 case 9: 854 case 10: 855 if (scrub_m68k_mri) 856 { 857 /* In MRI mode, we keep these spaces. */ 858 state = 3; 859 UNGET (ch); 860 PUT (' '); 861 break; 862 } 863 state = 10; /* Sp after symbol char */ 864 goto recycle; 865 case 11: 866 if (LABELS_WITHOUT_COLONS || flag_m68k_mri) 867 state = 1; 868 else 869 { 870 /* We know that ch is not ':', since we tested that 871 case above. Therefore this is not a label, so it 872 must be the opcode, and we've just seen the 873 whitespace after it. */ 874 state = 3; 875 } 876 UNGET (ch); 877 PUT (' '); /* Sp after label definition. */ 878 break; 879 default: 880 BAD_CASE (state); 881 } 882 break; 883 884 case LEX_IS_TWOCHAR_COMMENT_1ST: 885 ch2 = GET (); 886 if (ch2 == '*') 887 { 888 for (;;) 889 { 890 do 891 { 892 ch2 = GET (); 893 if (ch2 != EOF && IS_NEWLINE (ch2)) 894 add_newlines++; 895 } 896 while (ch2 != EOF && ch2 != '*'); 897 898 while (ch2 == '*') 899 ch2 = GET (); 900 901 if (ch2 == EOF || ch2 == '/') 902 break; 903 904 /* This UNGET will ensure that we count newlines 905 correctly. */ 906 UNGET (ch2); 907 } 908 909 if (ch2 == EOF) 910 as_warn (_("end of file in multiline comment")); 911 912 ch = ' '; 913 goto recycle; 914 } 915#ifdef DOUBLESLASH_LINE_COMMENTS 916 else if (ch2 == '/') 917 { 918 do 919 { 920 ch = GET (); 921 } 922 while (ch != EOF && !IS_NEWLINE (ch)); 923 if (ch == EOF) 924 as_warn ("end of file in comment; newline inserted"); 925 state = 0; 926 PUT ('\n'); 927 break; 928 } 929#endif 930 else 931 { 932 if (ch2 != EOF) 933 UNGET (ch2); 934 if (state == 9 || state == 10) 935 state = 3; 936 PUT (ch); 937 } 938 break; 939 940 case LEX_IS_STRINGQUOTE: 941 if (state == 10) 942 { 943 /* Preserve the whitespace in foo "bar" */ 944 UNGET (ch); 945 state = 3; 946 PUT (' '); 947 948 /* PUT didn't jump out. We could just break, but we 949 know what will happen, so optimize a bit. */ 950 ch = GET (); 951 old_state = 3; 952 } 953 else if (state == 9) 954 old_state = 3; 955 else 956 old_state = state; 957 state = 5; 958 PUT (ch); 959 break; 960 961#ifndef IEEE_STYLE 962 case LEX_IS_ONECHAR_QUOTE: 963 if (state == 10) 964 { 965 /* Preserve the whitespace in foo 'b' */ 966 UNGET (ch); 967 state = 3; 968 PUT (' '); 969 break; 970 } 971 ch = GET (); 972 if (ch == EOF) 973 { 974 as_warn (_("end of file after a one-character quote; \\0 inserted")); 975 ch = 0; 976 } 977 if (ch == '\\') 978 { 979 ch = GET (); 980 if (ch == EOF) 981 { 982 as_warn (_("end of file in escape character")); 983 ch = '\\'; 984 } 985 else 986 ch = process_escape (ch); 987 } 988 sprintf (out_buf, "%d", (int) (unsigned char) ch); 989 990 /* None of these 'x constants for us. We want 'x'. */ 991 if ((ch = GET ()) != '\'') 992 { 993#ifdef REQUIRE_CHAR_CLOSE_QUOTE 994 as_warn (_("Missing close quote: (assumed)")); 995#else 996 if (ch != EOF) 997 UNGET (ch); 998#endif 999 } 1000 if (strlen (out_buf) == 1) 1001 { 1002 PUT (out_buf[0]); 1003 break; 1004 } 1005 if (state == 9) 1006 old_state = 3; 1007 else 1008 old_state = state; 1009 state = -1; 1010 out_string = out_buf; 1011 PUT (*out_string++); 1012 break; 1013#endif 1014 1015 case LEX_IS_COLON: 1016#ifdef KEEP_WHITE_AROUND_COLON 1017 state = 9; 1018#else 1019 if (state == 9 || state == 10) 1020 state = 3; 1021 else if (state != 3) 1022 state = 1; 1023#endif 1024 PUT (ch); 1025 break; 1026 1027 case LEX_IS_NEWLINE: 1028 /* Roll out a bunch of newlines from inside comments, etc. */ 1029 if (add_newlines) 1030 { 1031 --add_newlines; 1032 UNGET (ch); 1033 } 1034 /* Fall through. */ 1035 1036 case LEX_IS_LINE_SEPARATOR: 1037 state = 0; 1038 PUT (ch); 1039 break; 1040 1041#ifdef TC_V850 1042 case LEX_IS_DOUBLEDASH_1ST: 1043 ch2 = GET (); 1044 if (ch2 != '-') 1045 { 1046 UNGET (ch2); 1047 goto de_fault; 1048 } 1049 /* Read and skip to end of line. */ 1050 do 1051 { 1052 ch = GET (); 1053 } 1054 while (ch != EOF && ch != '\n'); 1055 if (ch == EOF) 1056 { 1057 as_warn (_("end of file in comment; newline inserted")); 1058 } 1059 state = 0; 1060 PUT ('\n'); 1061 break; 1062#endif 1063#ifdef DOUBLEBAR_PARALLEL 1064 case LEX_IS_DOUBLEBAR_1ST: 1065 ch2 = GET (); 1066 if (ch2 != '|') 1067 { 1068 UNGET (ch2); 1069 goto de_fault; 1070 } 1071 /* Reset back to state 1 and pretend that we are parsing a line from 1072 just after the first white space. */ 1073 state = 1; 1074 PUT ('|'); 1075 PUT ('|'); 1076 break; 1077#endif 1078 case LEX_IS_LINE_COMMENT_START: 1079 /* FIXME-someday: The two character comment stuff was badly 1080 thought out. On i386, we want '/' as line comment start 1081 AND we want C style comments. hence this hack. The 1082 whole lexical process should be reworked. xoxorich. */ 1083 if (ch == '/') 1084 { 1085 ch2 = GET (); 1086 if (ch2 == '*') 1087 { 1088 old_state = 3; 1089 state = -2; 1090 break; 1091 } 1092 else 1093 { 1094 UNGET (ch2); 1095 } 1096 } /* bad hack */ 1097 1098 if (state == 0 || state == 1) /* Only comment at start of line. */ 1099 { 1100 int startch; 1101 1102 startch = ch; 1103 1104 do 1105 { 1106 ch = GET (); 1107 } 1108 while (ch != EOF && IS_WHITESPACE (ch)); 1109 if (ch == EOF) 1110 { 1111 as_warn (_("end of file in comment; newline inserted")); 1112 PUT ('\n'); 1113 break; 1114 } 1115 if (ch < '0' || ch > '9' || state != 0 || startch != '#') 1116 { 1117 /* Not a cpp line. */ 1118 while (ch != EOF && !IS_NEWLINE (ch)) 1119 ch = GET (); 1120 if (ch == EOF) 1121 as_warn (_("EOF in Comment: Newline inserted")); 1122 state = 0; 1123 PUT ('\n'); 1124 break; 1125 } 1126 /* Looks like `# 123 "filename"' from cpp. */ 1127 UNGET (ch); 1128 old_state = 4; 1129 state = -1; 1130 if (scrub_m68k_mri) 1131 out_string = "\tappline "; 1132 else 1133 out_string = "\t.appline "; 1134 PUT (*out_string++); 1135 break; 1136 } 1137 1138#ifdef TC_D10V 1139 /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true. 1140 Trap is the only short insn that has a first operand that is 1141 neither register nor label. 1142 We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 . 1143 We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is 1144 already LEX_IS_LINE_COMMENT_START. However, it is the 1145 only character in line_comment_chars for d10v, hence we 1146 can recognize it as such. */ 1147 /* An alternative approach would be to reset the state to 1 when 1148 we see '||', '<'- or '->', but that seems to be overkill. */ 1149 if (state == 10) 1150 PUT (' '); 1151#endif 1152 /* We have a line comment character which is not at the 1153 start of a line. If this is also a normal comment 1154 character, fall through. Otherwise treat it as a default 1155 character. */ 1156 if (strchr (tc_comment_chars, ch) == NULL 1157 && (! scrub_m68k_mri 1158 || (ch != '!' && ch != '*'))) 1159 goto de_fault; 1160 if (scrub_m68k_mri 1161 && (ch == '!' || ch == '*' || ch == '#') 1162 && state != 1 1163 && state != 10) 1164 goto de_fault; 1165 /* Fall through. */ 1166 case LEX_IS_COMMENT_START: 1167#if defined TC_ARM && defined OBJ_ELF 1168 /* On the ARM, `@' is the comment character. 1169 Unfortunately this is also a special character in ELF .symver 1170 directives (and .type, though we deal with those another way). 1171 So we check if this line is such a directive, and treat 1172 the character as default if so. This is a hack. */ 1173 if ((symver_state != NULL) && (*symver_state == 0)) 1174 goto de_fault; 1175#endif 1176#ifdef WARN_COMMENTS 1177 if (!found_comment) 1178 as_where (&found_comment_file, &found_comment); 1179#endif 1180 do 1181 { 1182 ch = GET (); 1183 } 1184 while (ch != EOF && !IS_NEWLINE (ch)); 1185 if (ch == EOF) 1186 as_warn (_("end of file in comment; newline inserted")); 1187 state = 0; 1188 PUT ('\n'); 1189 break; 1190 1191 case LEX_IS_SYMBOL_COMPONENT: 1192 if (state == 10) 1193 { 1194 /* This is a symbol character following another symbol 1195 character, with whitespace in between. We skipped 1196 the whitespace earlier, so output it now. */ 1197 UNGET (ch); 1198 state = 3; 1199 PUT (' '); 1200 break; 1201 } 1202 1203 if (state == 3) 1204 state = 9; 1205 1206 /* This is a common case. Quickly copy CH and all the 1207 following symbol component or normal characters. */ 1208 if (to + 1 < toend 1209 && mri_state == NULL 1210#if defined TC_ARM && defined OBJ_ELF 1211 && symver_state == NULL 1212#endif 1213 ) 1214 { 1215 char *s; 1216 int len; 1217 1218 for (s = from; s < fromend; s++) 1219 { 1220 int type; 1221 1222 ch2 = *(unsigned char *) s; 1223 type = lex[ch2]; 1224 if (type != 0 1225 && type != LEX_IS_SYMBOL_COMPONENT) 1226 break; 1227 } 1228 if (s > from) 1229 { 1230 /* Handle the last character normally, for 1231 simplicity. */ 1232 --s; 1233 } 1234 len = s - from; 1235 if (len > (toend - to) - 1) 1236 len = (toend - to) - 1; 1237 if (len > 0) 1238 { 1239 PUT (ch); 1240 if (len > 8) 1241 { 1242 memcpy (to, from, len); 1243 to += len; 1244 from += len; 1245 } 1246 else 1247 { 1248 switch (len) 1249 { 1250 case 8: *to++ = *from++; 1251 case 7: *to++ = *from++; 1252 case 6: *to++ = *from++; 1253 case 5: *to++ = *from++; 1254 case 4: *to++ = *from++; 1255 case 3: *to++ = *from++; 1256 case 2: *to++ = *from++; 1257 case 1: *to++ = *from++; 1258 } 1259 } 1260 ch = GET (); 1261 } 1262 } 1263 1264 /* Fall through. */ 1265 default: 1266 de_fault: 1267 /* Some relatively `normal' character. */ 1268 if (state == 0) 1269 { 1270 state = 11; /* Now seeing label definition */ 1271 } 1272 else if (state == 1) 1273 { 1274 state = 2; /* Ditto */ 1275 } 1276 else if (state == 9) 1277 { 1278 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT) 1279 state = 3; 1280 } 1281 else if (state == 10) 1282 { 1283 if (ch == '\\') 1284 { 1285 /* Special handling for backslash: a backslash may 1286 be the beginning of a formal parameter (of a 1287 macro) following another symbol character, with 1288 whitespace in between. If that is the case, we 1289 output a space before the parameter. Strictly 1290 speaking, correct handling depends upon what the 1291 macro parameter expands into; if the parameter 1292 expands into something which does not start with 1293 an operand character, then we don't want to keep 1294 the space. We don't have enough information to 1295 make the right choice, so here we are making the 1296 choice which is more likely to be correct. */ 1297 PUT (' '); 1298 } 1299 1300 state = 3; 1301 } 1302 PUT (ch); 1303 break; 1304 } 1305 } 1306 1307 /*NOTREACHED*/ 1308 1309 fromeof: 1310 /* We have reached the end of the input. */ 1311 return to - tostart; 1312 1313 tofull: 1314 /* The output buffer is full. Save any input we have not yet 1315 processed. */ 1316 if (fromend > from) 1317 { 1318 saved_input = from; 1319 saved_input_len = fromend - from; 1320 } 1321 else 1322 saved_input = NULL; 1323 1324 return to - tostart; 1325} 1326 1327/* end of app.c */ 1328