app.c revision 89857
133965Sjdp/* This is the Assembler Pre-Processor 278828Sobrien Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 378828Sobrien 1999, 2000 433965Sjdp Free Software Foundation, Inc. 533965Sjdp 633965Sjdp This file is part of GAS, the GNU Assembler. 733965Sjdp 833965Sjdp GAS is free software; you can redistribute it and/or modify 933965Sjdp it under the terms of the GNU General Public License as published by 1033965Sjdp the Free Software Foundation; either version 2, or (at your option) 1133965Sjdp any later version. 1233965Sjdp 1333965Sjdp GAS is distributed in the hope that it will be useful, 1433965Sjdp but WITHOUT ANY WARRANTY; without even the implied warranty of 1533965Sjdp MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1633965Sjdp GNU General Public License for more details. 1733965Sjdp 1833965Sjdp You should have received a copy of the GNU General Public License 1933965Sjdp along with GAS; see the file COPYING. If not, write to the Free 2033965Sjdp Software Foundation, 59 Temple Place - Suite 330, Boston, MA 2133965Sjdp 02111-1307, USA. */ 2233965Sjdp 2333965Sjdp/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */ 2433965Sjdp/* App, the assembler pre-processor. This pre-processor strips out excess 2533965Sjdp spaces, turns single-quoted characters into a decimal constant, and turns 2633965Sjdp # <number> <filename> <garbage> into a .line <number>\n.file <filename> 2733965Sjdp pair. This needs better error-handling. */ 2833965Sjdp 2933965Sjdp#include <stdio.h> 3033965Sjdp#include "as.h" /* For BAD_CASE() only */ 3133965Sjdp 3233965Sjdp#if (__STDC__ != 1) 3333965Sjdp#ifndef const 3433965Sjdp#define const /* empty */ 3533965Sjdp#endif 3633965Sjdp#endif 3733965Sjdp 3860484Sobrien#ifdef TC_M68K 3933965Sjdp/* Whether we are scrubbing in m68k MRI mode. This is different from 4033965Sjdp flag_m68k_mri, because the two flags will be affected by the .mri 4133965Sjdp pseudo-op at different times. */ 4233965Sjdpstatic int scrub_m68k_mri; 4360484Sobrien#else 4460484Sobrien#define scrub_m68k_mri 0 4560484Sobrien#endif 4633965Sjdp 4733965Sjdp/* The pseudo-op which switches in and out of MRI mode. See the 4833965Sjdp comment in do_scrub_chars. */ 4933965Sjdpstatic const char mri_pseudo[] = ".mri 0"; 5033965Sjdp 5160484Sobrien#if defined TC_ARM && defined OBJ_ELF 5277298Sobrien/* The pseudo-op for which we need to special-case `@' characters. 5360484Sobrien See the comment in do_scrub_chars. */ 5460484Sobrienstatic const char symver_pseudo[] = ".symver"; 5560484Sobrienstatic const char * symver_state; 5660484Sobrien#endif 5760484Sobrien 5833965Sjdpstatic char lex[256]; 5933965Sjdpstatic const char symbol_chars[] = 6033965Sjdp"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; 6133965Sjdp 6233965Sjdp#define LEX_IS_SYMBOL_COMPONENT 1 6333965Sjdp#define LEX_IS_WHITESPACE 2 6433965Sjdp#define LEX_IS_LINE_SEPARATOR 3 6533965Sjdp#define LEX_IS_COMMENT_START 4 6633965Sjdp#define LEX_IS_LINE_COMMENT_START 5 6733965Sjdp#define LEX_IS_TWOCHAR_COMMENT_1ST 6 6833965Sjdp#define LEX_IS_STRINGQUOTE 8 6933965Sjdp#define LEX_IS_COLON 9 7033965Sjdp#define LEX_IS_NEWLINE 10 7133965Sjdp#define LEX_IS_ONECHAR_QUOTE 11 7238889Sjdp#ifdef TC_V850 7338889Sjdp#define LEX_IS_DOUBLEDASH_1ST 12 7438889Sjdp#endif 7538889Sjdp#ifdef TC_M32R 7660484Sobrien#define DOUBLEBAR_PARALLEL 7760484Sobrien#endif 7860484Sobrien#ifdef DOUBLEBAR_PARALLEL 7938889Sjdp#define LEX_IS_DOUBLEBAR_1ST 13 8038889Sjdp#endif 8189857Sobrien#define LEX_IS_PARALLEL_SEPARATOR 14 8233965Sjdp#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT) 8333965Sjdp#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE) 8433965Sjdp#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR) 8589857Sobrien#define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR) 8633965Sjdp#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START) 8733965Sjdp#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START) 8833965Sjdp#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE) 8933965Sjdp 9033965Sjdpstatic int process_escape PARAMS ((int)); 9133965Sjdp 9233965Sjdp/* FIXME-soon: The entire lexer/parser thingy should be 9333965Sjdp built statically at compile time rather than dynamically 9477298Sobrien each and every time the assembler is run. xoxorich. */ 9533965Sjdp 9677298Sobrienvoid 9733965Sjdpdo_scrub_begin (m68k_mri) 9860484Sobrien int m68k_mri ATTRIBUTE_UNUSED; 9933965Sjdp{ 10033965Sjdp const char *p; 10160484Sobrien int c; 10233965Sjdp 10333965Sjdp lex[' '] = LEX_IS_WHITESPACE; 10433965Sjdp lex['\t'] = LEX_IS_WHITESPACE; 10538889Sjdp lex['\r'] = LEX_IS_WHITESPACE; 10633965Sjdp lex['\n'] = LEX_IS_NEWLINE; 10733965Sjdp lex[':'] = LEX_IS_COLON; 10833965Sjdp 10960484Sobrien#ifdef TC_M68K 11060484Sobrien scrub_m68k_mri = m68k_mri; 11160484Sobrien 11233965Sjdp if (! m68k_mri) 11360484Sobrien#endif 11433965Sjdp { 11533965Sjdp lex['"'] = LEX_IS_STRINGQUOTE; 11633965Sjdp 11760484Sobrien#if ! defined (TC_HPPA) && ! defined (TC_I370) 11860484Sobrien /* I370 uses single-quotes to delimit integer, float constants */ 11933965Sjdp lex['\''] = LEX_IS_ONECHAR_QUOTE; 12033965Sjdp#endif 12133965Sjdp 12233965Sjdp#ifdef SINGLE_QUOTE_STRINGS 12333965Sjdp lex['\''] = LEX_IS_STRINGQUOTE; 12433965Sjdp#endif 12533965Sjdp } 12633965Sjdp 12733965Sjdp /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop 12833965Sjdp in state 5 of do_scrub_chars must be changed. */ 12933965Sjdp 13033965Sjdp /* Note that these override the previous defaults, e.g. if ';' is a 13133965Sjdp comment char, then it isn't a line separator. */ 13233965Sjdp for (p = symbol_chars; *p; ++p) 13333965Sjdp { 13433965Sjdp lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; 13533965Sjdp } /* declare symbol characters */ 13633965Sjdp 13760484Sobrien for (c = 128; c < 256; ++c) 13860484Sobrien lex[c] = LEX_IS_SYMBOL_COMPONENT; 13960484Sobrien 14060484Sobrien#ifdef tc_symbol_chars 14160484Sobrien /* This macro permits the processor to specify all characters which 14260484Sobrien may appears in an operand. This will prevent the scrubber from 14360484Sobrien discarding meaningful whitespace in certain cases. The i386 14460484Sobrien backend uses this to support prefixes, which can confuse the 14560484Sobrien scrubber as to whether it is parsing operands or opcodes. */ 14660484Sobrien for (p = tc_symbol_chars; *p; ++p) 14760484Sobrien lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; 14860484Sobrien#endif 14960484Sobrien 15033965Sjdp /* The m68k backend wants to be able to change comment_chars. */ 15133965Sjdp#ifndef tc_comment_chars 15233965Sjdp#define tc_comment_chars comment_chars 15333965Sjdp#endif 15433965Sjdp for (p = tc_comment_chars; *p; p++) 15533965Sjdp { 15633965Sjdp lex[(unsigned char) *p] = LEX_IS_COMMENT_START; 15733965Sjdp } /* declare comment chars */ 15833965Sjdp 15933965Sjdp for (p = line_comment_chars; *p; p++) 16033965Sjdp { 16133965Sjdp lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START; 16233965Sjdp } /* declare line comment chars */ 16333965Sjdp 16433965Sjdp for (p = line_separator_chars; *p; p++) 16533965Sjdp { 16633965Sjdp lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR; 16733965Sjdp } /* declare line separators */ 16833965Sjdp 16989857Sobrien#ifdef tc_parallel_separator_chars 17089857Sobrien /* This macro permits the processor to specify all characters which 17189857Sobrien separate parallel insns on the same line. */ 17289857Sobrien for (p = tc_parallel_separator_chars; *p; p++) 17389857Sobrien { 17489857Sobrien lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR; 17589857Sobrien } /* declare parallel separators */ 17689857Sobrien#endif 17789857Sobrien 17833965Sjdp /* Only allow slash-star comments if slash is not in use. 17933965Sjdp FIXME: This isn't right. We should always permit them. */ 18033965Sjdp if (lex['/'] == 0) 18133965Sjdp { 18233965Sjdp lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST; 18333965Sjdp } 18433965Sjdp 18560484Sobrien#ifdef TC_M68K 18633965Sjdp if (m68k_mri) 18733965Sjdp { 18833965Sjdp lex['\''] = LEX_IS_STRINGQUOTE; 18933965Sjdp lex[';'] = LEX_IS_COMMENT_START; 19033965Sjdp lex['*'] = LEX_IS_LINE_COMMENT_START; 19133965Sjdp /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but 19233965Sjdp then it can't be used in an expression. */ 19333965Sjdp lex['!'] = LEX_IS_LINE_COMMENT_START; 19433965Sjdp } 19560484Sobrien#endif 19638889Sjdp 19738889Sjdp#ifdef TC_V850 19838889Sjdp lex['-'] = LEX_IS_DOUBLEDASH_1ST; 19938889Sjdp#endif 20060484Sobrien#ifdef DOUBLEBAR_PARALLEL 20138889Sjdp lex['|'] = LEX_IS_DOUBLEBAR_1ST; 20238889Sjdp#endif 20360484Sobrien#ifdef TC_D30V 20460484Sobrien /* must do this is we want VLIW instruction with "->" or "<-" */ 20560484Sobrien lex['-'] = LEX_IS_SYMBOL_COMPONENT; 20660484Sobrien#endif 20733965Sjdp} /* do_scrub_begin() */ 20833965Sjdp 20933965Sjdp/* Saved state of the scrubber */ 21033965Sjdpstatic int state; 21133965Sjdpstatic int old_state; 21233965Sjdpstatic char *out_string; 21333965Sjdpstatic char out_buf[20]; 21433965Sjdpstatic int add_newlines; 21533965Sjdpstatic char *saved_input; 21633965Sjdpstatic int saved_input_len; 21760484Sobrienstatic char input_buffer[32 * 1024]; 21833965Sjdpstatic const char *mri_state; 21933965Sjdpstatic char mri_last_ch; 22033965Sjdp 22133965Sjdp/* Data structure for saving the state of app across #include's. Note that 22233965Sjdp app is called asynchronously to the parsing of the .include's, so our 22333965Sjdp state at the time .include is interpreted is completely unrelated. 22433965Sjdp That's why we have to save it all. */ 22533965Sjdp 22677298Sobrienstruct app_save { 22777298Sobrien int state; 22877298Sobrien int old_state; 22977298Sobrien char * out_string; 23077298Sobrien char out_buf[sizeof (out_buf)]; 23177298Sobrien int add_newlines; 23277298Sobrien char * saved_input; 23377298Sobrien int saved_input_len; 23460484Sobrien#ifdef TC_M68K 23577298Sobrien int scrub_m68k_mri; 23660484Sobrien#endif 23777298Sobrien const char * mri_state; 23877298Sobrien char mri_last_ch; 23960484Sobrien#if defined TC_ARM && defined OBJ_ELF 24077298Sobrien const char * symver_state; 24160484Sobrien#endif 24277298Sobrien}; 24333965Sjdp 24433965Sjdpchar * 24533965Sjdpapp_push () 24633965Sjdp{ 24733965Sjdp register struct app_save *saved; 24833965Sjdp 24933965Sjdp saved = (struct app_save *) xmalloc (sizeof (*saved)); 25033965Sjdp saved->state = state; 25133965Sjdp saved->old_state = old_state; 25233965Sjdp saved->out_string = out_string; 25333965Sjdp memcpy (saved->out_buf, out_buf, sizeof (out_buf)); 25433965Sjdp saved->add_newlines = add_newlines; 25560484Sobrien if (saved_input == NULL) 25660484Sobrien saved->saved_input = NULL; 25760484Sobrien else 25860484Sobrien { 25960484Sobrien saved->saved_input = xmalloc (saved_input_len); 26060484Sobrien memcpy (saved->saved_input, saved_input, saved_input_len); 26160484Sobrien saved->saved_input_len = saved_input_len; 26260484Sobrien } 26360484Sobrien#ifdef TC_M68K 26433965Sjdp saved->scrub_m68k_mri = scrub_m68k_mri; 26560484Sobrien#endif 26633965Sjdp saved->mri_state = mri_state; 26733965Sjdp saved->mri_last_ch = mri_last_ch; 26860484Sobrien#if defined TC_ARM && defined OBJ_ELF 26960484Sobrien saved->symver_state = symver_state; 27060484Sobrien#endif 27133965Sjdp 27277298Sobrien /* do_scrub_begin() is not useful, just wastes time. */ 27333965Sjdp 27433965Sjdp state = 0; 27533965Sjdp saved_input = NULL; 27633965Sjdp 27733965Sjdp return (char *) saved; 27833965Sjdp} 27933965Sjdp 28077298Sobrienvoid 28133965Sjdpapp_pop (arg) 28233965Sjdp char *arg; 28333965Sjdp{ 28433965Sjdp register struct app_save *saved = (struct app_save *) arg; 28533965Sjdp 28677298Sobrien /* There is no do_scrub_end (). */ 28733965Sjdp state = saved->state; 28833965Sjdp old_state = saved->old_state; 28933965Sjdp out_string = saved->out_string; 29033965Sjdp memcpy (out_buf, saved->out_buf, sizeof (out_buf)); 29133965Sjdp add_newlines = saved->add_newlines; 29260484Sobrien if (saved->saved_input == NULL) 29360484Sobrien saved_input = NULL; 29460484Sobrien else 29560484Sobrien { 29660484Sobrien assert (saved->saved_input_len <= (int) (sizeof input_buffer)); 29760484Sobrien memcpy (input_buffer, saved->saved_input, saved->saved_input_len); 29860484Sobrien saved_input = input_buffer; 29960484Sobrien saved_input_len = saved->saved_input_len; 30060484Sobrien free (saved->saved_input); 30160484Sobrien } 30260484Sobrien#ifdef TC_M68K 30333965Sjdp scrub_m68k_mri = saved->scrub_m68k_mri; 30460484Sobrien#endif 30533965Sjdp mri_state = saved->mri_state; 30633965Sjdp mri_last_ch = saved->mri_last_ch; 30760484Sobrien#if defined TC_ARM && defined OBJ_ELF 30860484Sobrien symver_state = saved->symver_state; 30960484Sobrien#endif 31033965Sjdp 31133965Sjdp free (arg); 31233965Sjdp} /* app_pop() */ 31333965Sjdp 31433965Sjdp/* @@ This assumes that \n &c are the same on host and target. This is not 31533965Sjdp necessarily true. */ 31677298Sobrienstatic int 31733965Sjdpprocess_escape (ch) 31833965Sjdp int ch; 31933965Sjdp{ 32033965Sjdp switch (ch) 32133965Sjdp { 32233965Sjdp case 'b': 32333965Sjdp return '\b'; 32433965Sjdp case 'f': 32533965Sjdp return '\f'; 32633965Sjdp case 'n': 32733965Sjdp return '\n'; 32833965Sjdp case 'r': 32933965Sjdp return '\r'; 33033965Sjdp case 't': 33133965Sjdp return '\t'; 33233965Sjdp case '\'': 33333965Sjdp return '\''; 33433965Sjdp case '"': 33533965Sjdp return '\"'; 33633965Sjdp default: 33733965Sjdp return ch; 33833965Sjdp } 33933965Sjdp} 34033965Sjdp 34133965Sjdp/* This function is called to process input characters. The GET 34233965Sjdp parameter is used to retrieve more input characters. GET should 34333965Sjdp set its parameter to point to a buffer, and return the length of 34433965Sjdp the buffer; it should return 0 at end of file. The scrubbed output 34533965Sjdp characters are put into the buffer starting at TOSTART; the TOSTART 34633965Sjdp buffer is TOLEN bytes in length. The function returns the number 34733965Sjdp of scrubbed characters put into TOSTART. This will be TOLEN unless 34833965Sjdp end of file was seen. This function is arranged as a state 34933965Sjdp machine, and saves its state so that it may return at any point. 35033965Sjdp This is the way the old code used to work. */ 35133965Sjdp 35233965Sjdpint 35333965Sjdpdo_scrub_chars (get, tostart, tolen) 35460484Sobrien int (*get) PARAMS ((char *, int)); 35533965Sjdp char *tostart; 35633965Sjdp int tolen; 35733965Sjdp{ 35833965Sjdp char *to = tostart; 35933965Sjdp char *toend = tostart + tolen; 36033965Sjdp char *from; 36133965Sjdp char *fromend; 36233965Sjdp int fromlen; 36333965Sjdp register int ch, ch2 = 0; 36433965Sjdp 36533965Sjdp /*State 0: beginning of normal line 36633965Sjdp 1: After first whitespace on line (flush more white) 36733965Sjdp 2: After first non-white (opcode) on line (keep 1white) 36833965Sjdp 3: after second white on line (into operands) (flush white) 36933965Sjdp 4: after putting out a .line, put out digits 37033965Sjdp 5: parsing a string, then go to old-state 37133965Sjdp 6: putting out \ escape in a "d string. 37233965Sjdp 7: After putting out a .appfile, put out string. 37333965Sjdp 8: After putting out a .appfile string, flush until newline. 37433965Sjdp 9: After seeing symbol char in state 3 (keep 1white after symchar) 37533965Sjdp 10: After seeing whitespace in state 9 (keep white before symchar) 37633965Sjdp 11: After seeing a symbol character in state 0 (eg a label definition) 37733965Sjdp -1: output string in out_string and go to the state in old_state 37833965Sjdp -2: flush text until a '*' '/' is seen, then go to state old_state 37938889Sjdp#ifdef TC_V850 38038889Sjdp 12: After seeing a dash, looking for a second dash as a start of comment. 38138889Sjdp#endif 38260484Sobrien#ifdef DOUBLEBAR_PARALLEL 38338889Sjdp 13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator. 38438889Sjdp#endif 38533965Sjdp */ 38633965Sjdp 38733965Sjdp /* I added states 9 and 10 because the MIPS ECOFF assembler uses 38833965Sjdp constructs like ``.loc 1 20''. This was turning into ``.loc 38933965Sjdp 120''. States 9 and 10 ensure that a space is never dropped in 39089857Sobrien between characters which could appear in an identifier. Ian 39133965Sjdp Taylor, ian@cygnus.com. 39233965Sjdp 39333965Sjdp I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works 39433965Sjdp correctly on the PA (and any other target where colons are optional). 39538889Sjdp Jeff Law, law@cs.utah.edu. 39633965Sjdp 39738889Sjdp I added state 13 so that something like "cmp r1, r2 || trap #1" does not 39838889Sjdp get squashed into "cmp r1,r2||trap#1", with the all important space 39938889Sjdp between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */ 40038889Sjdp 40133965Sjdp /* This macro gets the next input character. */ 40233965Sjdp 40360484Sobrien#define GET() \ 40460484Sobrien (from < fromend \ 40560484Sobrien ? * (unsigned char *) (from++) \ 40660484Sobrien : (saved_input = NULL, \ 40760484Sobrien fromlen = (*get) (input_buffer, sizeof input_buffer), \ 40860484Sobrien from = input_buffer, \ 40960484Sobrien fromend = from + fromlen, \ 41060484Sobrien (fromlen == 0 \ 41160484Sobrien ? EOF \ 41260484Sobrien : * (unsigned char *) (from++)))) 41333965Sjdp 41433965Sjdp /* This macro pushes a character back on the input stream. */ 41533965Sjdp 41633965Sjdp#define UNGET(uch) (*--from = (uch)) 41733965Sjdp 41833965Sjdp /* This macro puts a character into the output buffer. If this 41933965Sjdp character fills the output buffer, this macro jumps to the label 42033965Sjdp TOFULL. We use this rather ugly approach because we need to 42133965Sjdp handle two different termination conditions: EOF on the input 42233965Sjdp stream, and a full output buffer. It would be simpler if we 42333965Sjdp always read in the entire input stream before processing it, but 42433965Sjdp I don't want to make such a significant change to the assembler's 42533965Sjdp memory usage. */ 42633965Sjdp 42733965Sjdp#define PUT(pch) \ 42833965Sjdp do \ 42933965Sjdp { \ 43033965Sjdp *to++ = (pch); \ 43133965Sjdp if (to >= toend) \ 43233965Sjdp goto tofull; \ 43333965Sjdp } \ 43433965Sjdp while (0) 43533965Sjdp 43633965Sjdp if (saved_input != NULL) 43733965Sjdp { 43833965Sjdp from = saved_input; 43933965Sjdp fromend = from + saved_input_len; 44033965Sjdp } 44133965Sjdp else 44233965Sjdp { 44360484Sobrien fromlen = (*get) (input_buffer, sizeof input_buffer); 44433965Sjdp if (fromlen == 0) 44533965Sjdp return 0; 44660484Sobrien from = input_buffer; 44733965Sjdp fromend = from + fromlen; 44833965Sjdp } 44933965Sjdp 45033965Sjdp while (1) 45133965Sjdp { 45233965Sjdp /* The cases in this switch end with continue, in order to 45333965Sjdp branch back to the top of this while loop and generate the 45433965Sjdp next output character in the appropriate state. */ 45533965Sjdp switch (state) 45633965Sjdp { 45733965Sjdp case -1: 45833965Sjdp ch = *out_string++; 45933965Sjdp if (*out_string == '\0') 46033965Sjdp { 46133965Sjdp state = old_state; 46233965Sjdp old_state = 3; 46333965Sjdp } 46433965Sjdp PUT (ch); 46533965Sjdp continue; 46633965Sjdp 46733965Sjdp case -2: 46833965Sjdp for (;;) 46933965Sjdp { 47033965Sjdp do 47133965Sjdp { 47233965Sjdp ch = GET (); 47333965Sjdp 47433965Sjdp if (ch == EOF) 47533965Sjdp { 47660484Sobrien as_warn (_("end of file in comment")); 47733965Sjdp goto fromeof; 47833965Sjdp } 47933965Sjdp 48033965Sjdp if (ch == '\n') 48133965Sjdp PUT ('\n'); 48233965Sjdp } 48333965Sjdp while (ch != '*'); 48433965Sjdp 48533965Sjdp while ((ch = GET ()) == '*') 48633965Sjdp ; 48733965Sjdp 48833965Sjdp if (ch == EOF) 48933965Sjdp { 49060484Sobrien as_warn (_("end of file in comment")); 49133965Sjdp goto fromeof; 49233965Sjdp } 49333965Sjdp 49433965Sjdp if (ch == '/') 49533965Sjdp break; 49633965Sjdp 49733965Sjdp UNGET (ch); 49833965Sjdp } 49933965Sjdp 50033965Sjdp state = old_state; 50133965Sjdp UNGET (' '); 50233965Sjdp continue; 50333965Sjdp 50433965Sjdp case 4: 50533965Sjdp ch = GET (); 50633965Sjdp if (ch == EOF) 50733965Sjdp goto fromeof; 50833965Sjdp else if (ch >= '0' && ch <= '9') 50933965Sjdp PUT (ch); 51033965Sjdp else 51133965Sjdp { 51233965Sjdp while (ch != EOF && IS_WHITESPACE (ch)) 51333965Sjdp ch = GET (); 51433965Sjdp if (ch == '"') 51533965Sjdp { 51633965Sjdp UNGET (ch); 51733965Sjdp if (scrub_m68k_mri) 51833965Sjdp out_string = "\n\tappfile "; 51933965Sjdp else 52033965Sjdp out_string = "\n\t.appfile "; 52133965Sjdp old_state = 7; 52233965Sjdp state = -1; 52333965Sjdp PUT (*out_string++); 52433965Sjdp } 52533965Sjdp else 52633965Sjdp { 52733965Sjdp while (ch != EOF && ch != '\n') 52833965Sjdp ch = GET (); 52933965Sjdp state = 0; 53033965Sjdp PUT (ch); 53133965Sjdp } 53233965Sjdp } 53333965Sjdp continue; 53433965Sjdp 53533965Sjdp case 5: 53633965Sjdp /* We are going to copy everything up to a quote character, 53733965Sjdp with special handling for a backslash. We try to 53833965Sjdp optimize the copying in the simple case without using the 53933965Sjdp GET and PUT macros. */ 54033965Sjdp { 54133965Sjdp char *s; 54233965Sjdp int len; 54333965Sjdp 54433965Sjdp for (s = from; s < fromend; s++) 54533965Sjdp { 54633965Sjdp ch = *s; 54733965Sjdp /* This condition must be changed if the type of any 54833965Sjdp other character can be LEX_IS_STRINGQUOTE. */ 54933965Sjdp if (ch == '\\' 55033965Sjdp || ch == '"' 55133965Sjdp || ch == '\'' 55233965Sjdp || ch == '\n') 55333965Sjdp break; 55433965Sjdp } 55533965Sjdp len = s - from; 55633965Sjdp if (len > toend - to) 55733965Sjdp len = toend - to; 55833965Sjdp if (len > 0) 55933965Sjdp { 56033965Sjdp memcpy (to, from, len); 56133965Sjdp to += len; 56233965Sjdp from += len; 56333965Sjdp } 56433965Sjdp } 56533965Sjdp 56633965Sjdp ch = GET (); 56733965Sjdp if (ch == EOF) 56833965Sjdp { 56989857Sobrien as_warn (_("end of file in string; inserted '\"'")); 57033965Sjdp state = old_state; 57133965Sjdp UNGET ('\n'); 57233965Sjdp PUT ('"'); 57333965Sjdp } 57433965Sjdp else if (lex[ch] == LEX_IS_STRINGQUOTE) 57533965Sjdp { 57633965Sjdp state = old_state; 57733965Sjdp PUT (ch); 57833965Sjdp } 57933965Sjdp#ifndef NO_STRING_ESCAPES 58033965Sjdp else if (ch == '\\') 58133965Sjdp { 58233965Sjdp state = 6; 58333965Sjdp PUT (ch); 58433965Sjdp } 58533965Sjdp#endif 58633965Sjdp else if (scrub_m68k_mri && ch == '\n') 58733965Sjdp { 58833965Sjdp /* Just quietly terminate the string. This permits lines like 58933965Sjdp bne label loop if we haven't reach end yet 59033965Sjdp */ 59133965Sjdp state = old_state; 59233965Sjdp UNGET (ch); 59333965Sjdp PUT ('\''); 59433965Sjdp } 59533965Sjdp else 59633965Sjdp { 59733965Sjdp PUT (ch); 59833965Sjdp } 59933965Sjdp continue; 60033965Sjdp 60133965Sjdp case 6: 60233965Sjdp state = 5; 60333965Sjdp ch = GET (); 60433965Sjdp switch (ch) 60533965Sjdp { 60633965Sjdp /* Handle strings broken across lines, by turning '\n' into 60733965Sjdp '\\' and 'n'. */ 60833965Sjdp case '\n': 60933965Sjdp UNGET ('n'); 61033965Sjdp add_newlines++; 61133965Sjdp PUT ('\\'); 61233965Sjdp continue; 61333965Sjdp 61433965Sjdp case '"': 61533965Sjdp case '\\': 61633965Sjdp case 'b': 61733965Sjdp case 'f': 61833965Sjdp case 'n': 61933965Sjdp case 'r': 62033965Sjdp case 't': 62133965Sjdp case 'v': 62233965Sjdp case 'x': 62333965Sjdp case 'X': 62433965Sjdp case '0': 62533965Sjdp case '1': 62633965Sjdp case '2': 62733965Sjdp case '3': 62833965Sjdp case '4': 62933965Sjdp case '5': 63033965Sjdp case '6': 63133965Sjdp case '7': 63233965Sjdp break; 63333965Sjdp#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES) 63433965Sjdp default: 63589857Sobrien as_warn (_("unknown escape '\\%c' in string; ignored"), ch); 63633965Sjdp break; 63733965Sjdp#else /* ONLY_STANDARD_ESCAPES */ 63833965Sjdp default: 63933965Sjdp /* Accept \x as x for any x */ 64033965Sjdp break; 64133965Sjdp#endif /* ONLY_STANDARD_ESCAPES */ 64233965Sjdp 64333965Sjdp case EOF: 64489857Sobrien as_warn (_("end of file in string; '\"' inserted")); 64533965Sjdp PUT ('"'); 64633965Sjdp continue; 64733965Sjdp } 64833965Sjdp PUT (ch); 64933965Sjdp continue; 65033965Sjdp 65133965Sjdp case 7: 65233965Sjdp ch = GET (); 65333965Sjdp state = 5; 65433965Sjdp old_state = 8; 65533965Sjdp if (ch == EOF) 65633965Sjdp goto fromeof; 65733965Sjdp PUT (ch); 65833965Sjdp continue; 65933965Sjdp 66033965Sjdp case 8: 66133965Sjdp do 66233965Sjdp ch = GET (); 66333965Sjdp while (ch != '\n' && ch != EOF); 66433965Sjdp if (ch == EOF) 66533965Sjdp goto fromeof; 66633965Sjdp state = 0; 66733965Sjdp PUT (ch); 66833965Sjdp continue; 66933965Sjdp } 67033965Sjdp 67133965Sjdp /* OK, we are somewhere in states 0 through 4 or 9 through 11 */ 67233965Sjdp 67333965Sjdp /* flushchar: */ 67433965Sjdp ch = GET (); 67533965Sjdp 67633965Sjdp recycle: 67733965Sjdp 67860484Sobrien#if defined TC_ARM && defined OBJ_ELF 67960484Sobrien /* We need to watch out for .symver directives. See the comment later 68060484Sobrien in this function. */ 68160484Sobrien if (symver_state == NULL) 68260484Sobrien { 68360484Sobrien if ((state == 0 || state == 1) && ch == symver_pseudo[0]) 68460484Sobrien symver_state = symver_pseudo + 1; 68560484Sobrien } 68660484Sobrien else 68760484Sobrien { 68860484Sobrien /* We advance to the next state if we find the right 68960484Sobrien character. */ 69060484Sobrien if (ch != '\0' && (*symver_state == ch)) 69160484Sobrien ++symver_state; 69260484Sobrien else if (*symver_state != '\0') 69360484Sobrien /* We did not get the expected character, or we didn't 69460484Sobrien get a valid terminating character after seeing the 69560484Sobrien entire pseudo-op, so we must go back to the beginning. */ 69660484Sobrien symver_state = NULL; 69760484Sobrien else 69860484Sobrien { 69960484Sobrien /* We've read the entire pseudo-op. If this is the end 70060484Sobrien of the line, go back to the beginning. */ 70160484Sobrien if (IS_NEWLINE (ch)) 70260484Sobrien symver_state = NULL; 70360484Sobrien } 70460484Sobrien } 70560484Sobrien#endif /* TC_ARM && OBJ_ELF */ 70660484Sobrien 70733965Sjdp#ifdef TC_M68K 70833965Sjdp /* We want to have pseudo-ops which control whether we are in 70933965Sjdp MRI mode or not. Unfortunately, since m68k MRI mode affects 71033965Sjdp the scrubber, that means that we need a special purpose 71133965Sjdp recognizer here. */ 71233965Sjdp if (mri_state == NULL) 71333965Sjdp { 71433965Sjdp if ((state == 0 || state == 1) 71533965Sjdp && ch == mri_pseudo[0]) 71633965Sjdp mri_state = mri_pseudo + 1; 71733965Sjdp } 71833965Sjdp else 71933965Sjdp { 72033965Sjdp /* We advance to the next state if we find the right 72133965Sjdp character, or if we need a space character and we get any 72233965Sjdp whitespace character, or if we need a '0' and we get a 72333965Sjdp '1' (this is so that we only need one state to handle 72433965Sjdp ``.mri 0'' and ``.mri 1''). */ 72533965Sjdp if (ch != '\0' 72633965Sjdp && (*mri_state == ch 72733965Sjdp || (*mri_state == ' ' 72833965Sjdp && lex[ch] == LEX_IS_WHITESPACE) 72933965Sjdp || (*mri_state == '0' 73033965Sjdp && ch == '1'))) 73133965Sjdp { 73233965Sjdp mri_last_ch = ch; 73333965Sjdp ++mri_state; 73433965Sjdp } 73533965Sjdp else if (*mri_state != '\0' 73633965Sjdp || (lex[ch] != LEX_IS_WHITESPACE 73733965Sjdp && lex[ch] != LEX_IS_NEWLINE)) 73833965Sjdp { 73933965Sjdp /* We did not get the expected character, or we didn't 74033965Sjdp get a valid terminating character after seeing the 74133965Sjdp entire pseudo-op, so we must go back to the 74233965Sjdp beginning. */ 74333965Sjdp mri_state = NULL; 74433965Sjdp } 74533965Sjdp else 74633965Sjdp { 74733965Sjdp /* We've read the entire pseudo-op. mips_last_ch is 74833965Sjdp either '0' or '1' indicating whether to enter or 74933965Sjdp leave MRI mode. */ 75033965Sjdp do_scrub_begin (mri_last_ch == '1'); 75138889Sjdp mri_state = NULL; 75233965Sjdp 75333965Sjdp /* We continue handling the character as usual. The 75433965Sjdp main gas reader must also handle the .mri pseudo-op 75533965Sjdp to control expression parsing and the like. */ 75633965Sjdp } 75733965Sjdp } 75833965Sjdp#endif 75933965Sjdp 76033965Sjdp if (ch == EOF) 76133965Sjdp { 76233965Sjdp if (state != 0) 76333965Sjdp { 76460484Sobrien as_warn (_("end of file not at end of a line; newline inserted")); 76533965Sjdp state = 0; 76633965Sjdp PUT ('\n'); 76733965Sjdp } 76833965Sjdp goto fromeof; 76933965Sjdp } 77033965Sjdp 77133965Sjdp switch (lex[ch]) 77233965Sjdp { 77333965Sjdp case LEX_IS_WHITESPACE: 77433965Sjdp do 77533965Sjdp { 77633965Sjdp ch = GET (); 77733965Sjdp } 77833965Sjdp while (ch != EOF && IS_WHITESPACE (ch)); 77933965Sjdp if (ch == EOF) 78033965Sjdp goto fromeof; 78133965Sjdp 78233965Sjdp if (state == 0) 78333965Sjdp { 78433965Sjdp /* Preserve a single whitespace character at the 78533965Sjdp beginning of a line. */ 78633965Sjdp state = 1; 78733965Sjdp UNGET (ch); 78833965Sjdp PUT (' '); 78933965Sjdp break; 79033965Sjdp } 79133965Sjdp 79260484Sobrien#ifdef KEEP_WHITE_AROUND_COLON 79377298Sobrien if (lex[ch] == LEX_IS_COLON) 79477298Sobrien { 79577298Sobrien /* Only keep this white if there's no white *after* the 79677298Sobrien colon. */ 79777298Sobrien ch2 = GET (); 79877298Sobrien UNGET (ch2); 79977298Sobrien if (!IS_WHITESPACE (ch2)) 80077298Sobrien { 80177298Sobrien state = 9; 80277298Sobrien UNGET (ch); 80377298Sobrien PUT (' '); 80477298Sobrien break; 80577298Sobrien } 80677298Sobrien } 80760484Sobrien#endif 80833965Sjdp if (IS_COMMENT (ch) 80933965Sjdp || ch == '/' 81089857Sobrien || IS_LINE_SEPARATOR (ch) 81189857Sobrien || IS_PARALLEL_SEPARATOR (ch)) 81233965Sjdp { 81333965Sjdp if (scrub_m68k_mri) 81433965Sjdp { 81533965Sjdp /* In MRI mode, we keep these spaces. */ 81633965Sjdp UNGET (ch); 81733965Sjdp PUT (' '); 81833965Sjdp break; 81933965Sjdp } 82033965Sjdp goto recycle; 82133965Sjdp } 82233965Sjdp 82333965Sjdp /* If we're in state 2 or 11, we've seen a non-white 82433965Sjdp character followed by whitespace. If the next character 82533965Sjdp is ':', this is whitespace after a label name which we 82633965Sjdp normally must ignore. In MRI mode, though, spaces are 82733965Sjdp not permitted between the label and the colon. */ 82833965Sjdp if ((state == 2 || state == 11) 82933965Sjdp && lex[ch] == LEX_IS_COLON 83033965Sjdp && ! scrub_m68k_mri) 83133965Sjdp { 83233965Sjdp state = 1; 83333965Sjdp PUT (ch); 83433965Sjdp break; 83533965Sjdp } 83633965Sjdp 83733965Sjdp switch (state) 83833965Sjdp { 83933965Sjdp case 0: 84033965Sjdp state++; 84133965Sjdp goto recycle; /* Punted leading sp */ 84233965Sjdp case 1: 84333965Sjdp /* We can arrive here if we leave a leading whitespace 84433965Sjdp character at the beginning of a line. */ 84533965Sjdp goto recycle; 84633965Sjdp case 2: 84733965Sjdp state = 3; 84833965Sjdp if (to + 1 < toend) 84933965Sjdp { 85033965Sjdp /* Optimize common case by skipping UNGET/GET. */ 85133965Sjdp PUT (' '); /* Sp after opco */ 85233965Sjdp goto recycle; 85333965Sjdp } 85433965Sjdp UNGET (ch); 85533965Sjdp PUT (' '); 85633965Sjdp break; 85733965Sjdp case 3: 85833965Sjdp if (scrub_m68k_mri) 85933965Sjdp { 86033965Sjdp /* In MRI mode, we keep these spaces. */ 86133965Sjdp UNGET (ch); 86233965Sjdp PUT (' '); 86333965Sjdp break; 86433965Sjdp } 86533965Sjdp goto recycle; /* Sp in operands */ 86633965Sjdp case 9: 86733965Sjdp case 10: 86833965Sjdp if (scrub_m68k_mri) 86933965Sjdp { 87033965Sjdp /* In MRI mode, we keep these spaces. */ 87133965Sjdp state = 3; 87233965Sjdp UNGET (ch); 87333965Sjdp PUT (' '); 87433965Sjdp break; 87533965Sjdp } 87633965Sjdp state = 10; /* Sp after symbol char */ 87733965Sjdp goto recycle; 87833965Sjdp case 11: 87960484Sobrien if (LABELS_WITHOUT_COLONS || flag_m68k_mri) 88033965Sjdp state = 1; 88133965Sjdp else 88233965Sjdp { 88333965Sjdp /* We know that ch is not ':', since we tested that 88433965Sjdp case above. Therefore this is not a label, so it 88533965Sjdp must be the opcode, and we've just seen the 88633965Sjdp whitespace after it. */ 88733965Sjdp state = 3; 88833965Sjdp } 88933965Sjdp UNGET (ch); 89033965Sjdp PUT (' '); /* Sp after label definition. */ 89133965Sjdp break; 89233965Sjdp default: 89333965Sjdp BAD_CASE (state); 89433965Sjdp } 89533965Sjdp break; 89633965Sjdp 89733965Sjdp case LEX_IS_TWOCHAR_COMMENT_1ST: 89833965Sjdp ch2 = GET (); 89933965Sjdp if (ch2 == '*') 90033965Sjdp { 90133965Sjdp for (;;) 90233965Sjdp { 90333965Sjdp do 90433965Sjdp { 90533965Sjdp ch2 = GET (); 90633965Sjdp if (ch2 != EOF && IS_NEWLINE (ch2)) 90733965Sjdp add_newlines++; 90833965Sjdp } 90933965Sjdp while (ch2 != EOF && ch2 != '*'); 91033965Sjdp 91133965Sjdp while (ch2 == '*') 91233965Sjdp ch2 = GET (); 91333965Sjdp 91433965Sjdp if (ch2 == EOF || ch2 == '/') 91533965Sjdp break; 91633965Sjdp 91733965Sjdp /* This UNGET will ensure that we count newlines 91833965Sjdp correctly. */ 91933965Sjdp UNGET (ch2); 92033965Sjdp } 92133965Sjdp 92233965Sjdp if (ch2 == EOF) 92360484Sobrien as_warn (_("end of file in multiline comment")); 92433965Sjdp 92533965Sjdp ch = ' '; 92633965Sjdp goto recycle; 92733965Sjdp } 92877298Sobrien#ifdef DOUBLESLASH_LINE_COMMENTS 92977298Sobrien else if (ch2 == '/') 93077298Sobrien { 93177298Sobrien do 93277298Sobrien { 93377298Sobrien ch = GET (); 93477298Sobrien } 93577298Sobrien while (ch != EOF && !IS_NEWLINE (ch)); 93677298Sobrien if (ch == EOF) 93777298Sobrien as_warn ("end of file in comment; newline inserted"); 93877298Sobrien state = 0; 93977298Sobrien PUT ('\n'); 94077298Sobrien break; 94177298Sobrien } 94277298Sobrien#endif 94333965Sjdp else 94433965Sjdp { 94533965Sjdp if (ch2 != EOF) 94633965Sjdp UNGET (ch2); 94733965Sjdp if (state == 9 || state == 10) 94833965Sjdp state = 3; 94933965Sjdp PUT (ch); 95033965Sjdp } 95133965Sjdp break; 95233965Sjdp 95333965Sjdp case LEX_IS_STRINGQUOTE: 95433965Sjdp if (state == 10) 95533965Sjdp { 95633965Sjdp /* Preserve the whitespace in foo "bar" */ 95733965Sjdp UNGET (ch); 95833965Sjdp state = 3; 95933965Sjdp PUT (' '); 96033965Sjdp 96133965Sjdp /* PUT didn't jump out. We could just break, but we 96233965Sjdp know what will happen, so optimize a bit. */ 96333965Sjdp ch = GET (); 96433965Sjdp old_state = 3; 96533965Sjdp } 96633965Sjdp else if (state == 9) 96733965Sjdp old_state = 3; 96833965Sjdp else 96933965Sjdp old_state = state; 97033965Sjdp state = 5; 97133965Sjdp PUT (ch); 97233965Sjdp break; 97333965Sjdp 97433965Sjdp#ifndef IEEE_STYLE 97533965Sjdp case LEX_IS_ONECHAR_QUOTE: 97633965Sjdp if (state == 10) 97733965Sjdp { 97833965Sjdp /* Preserve the whitespace in foo 'b' */ 97933965Sjdp UNGET (ch); 98033965Sjdp state = 3; 98133965Sjdp PUT (' '); 98233965Sjdp break; 98333965Sjdp } 98433965Sjdp ch = GET (); 98533965Sjdp if (ch == EOF) 98633965Sjdp { 98760484Sobrien as_warn (_("end of file after a one-character quote; \\0 inserted")); 98833965Sjdp ch = 0; 98933965Sjdp } 99033965Sjdp if (ch == '\\') 99133965Sjdp { 99233965Sjdp ch = GET (); 99333965Sjdp if (ch == EOF) 99433965Sjdp { 99560484Sobrien as_warn (_("end of file in escape character")); 99633965Sjdp ch = '\\'; 99733965Sjdp } 99833965Sjdp else 99933965Sjdp ch = process_escape (ch); 100033965Sjdp } 100133965Sjdp sprintf (out_buf, "%d", (int) (unsigned char) ch); 100233965Sjdp 100333965Sjdp /* None of these 'x constants for us. We want 'x'. */ 100433965Sjdp if ((ch = GET ()) != '\'') 100533965Sjdp { 100633965Sjdp#ifdef REQUIRE_CHAR_CLOSE_QUOTE 100789857Sobrien as_warn (_("missing close quote; (assumed)")); 100833965Sjdp#else 100933965Sjdp if (ch != EOF) 101033965Sjdp UNGET (ch); 101133965Sjdp#endif 101233965Sjdp } 101333965Sjdp if (strlen (out_buf) == 1) 101433965Sjdp { 101533965Sjdp PUT (out_buf[0]); 101633965Sjdp break; 101733965Sjdp } 101833965Sjdp if (state == 9) 101933965Sjdp old_state = 3; 102033965Sjdp else 102133965Sjdp old_state = state; 102233965Sjdp state = -1; 102333965Sjdp out_string = out_buf; 102433965Sjdp PUT (*out_string++); 102533965Sjdp break; 102633965Sjdp#endif 102733965Sjdp 102833965Sjdp case LEX_IS_COLON: 102960484Sobrien#ifdef KEEP_WHITE_AROUND_COLON 103077298Sobrien state = 9; 103160484Sobrien#else 103233965Sjdp if (state == 9 || state == 10) 103333965Sjdp state = 3; 103433965Sjdp else if (state != 3) 103533965Sjdp state = 1; 103660484Sobrien#endif 103733965Sjdp PUT (ch); 103833965Sjdp break; 103933965Sjdp 104033965Sjdp case LEX_IS_NEWLINE: 104133965Sjdp /* Roll out a bunch of newlines from inside comments, etc. */ 104233965Sjdp if (add_newlines) 104333965Sjdp { 104433965Sjdp --add_newlines; 104533965Sjdp UNGET (ch); 104633965Sjdp } 104777298Sobrien /* Fall through. */ 104833965Sjdp 104933965Sjdp case LEX_IS_LINE_SEPARATOR: 105033965Sjdp state = 0; 105133965Sjdp PUT (ch); 105233965Sjdp break; 105333965Sjdp 105489857Sobrien case LEX_IS_PARALLEL_SEPARATOR: 105589857Sobrien state = 1; 105689857Sobrien PUT (ch); 105789857Sobrien break; 105889857Sobrien 105938889Sjdp#ifdef TC_V850 106038889Sjdp case LEX_IS_DOUBLEDASH_1ST: 106177298Sobrien ch2 = GET (); 106238889Sjdp if (ch2 != '-') 106338889Sjdp { 106438889Sjdp UNGET (ch2); 106538889Sjdp goto de_fault; 106638889Sjdp } 106777298Sobrien /* Read and skip to end of line. */ 106838889Sjdp do 106938889Sjdp { 107038889Sjdp ch = GET (); 107138889Sjdp } 107238889Sjdp while (ch != EOF && ch != '\n'); 107338889Sjdp if (ch == EOF) 107438889Sjdp { 107560484Sobrien as_warn (_("end of file in comment; newline inserted")); 107638889Sjdp } 107738889Sjdp state = 0; 107838889Sjdp PUT ('\n'); 107938889Sjdp break; 108077298Sobrien#endif 108160484Sobrien#ifdef DOUBLEBAR_PARALLEL 108238889Sjdp case LEX_IS_DOUBLEBAR_1ST: 108377298Sobrien ch2 = GET (); 108438889Sjdp if (ch2 != '|') 108538889Sjdp { 108638889Sjdp UNGET (ch2); 108738889Sjdp goto de_fault; 108838889Sjdp } 108938889Sjdp /* Reset back to state 1 and pretend that we are parsing a line from 109038889Sjdp just after the first white space. */ 109138889Sjdp state = 1; 109238889Sjdp PUT ('|'); 109338889Sjdp PUT ('|'); 109438889Sjdp break; 109577298Sobrien#endif 109633965Sjdp case LEX_IS_LINE_COMMENT_START: 109733965Sjdp /* FIXME-someday: The two character comment stuff was badly 109833965Sjdp thought out. On i386, we want '/' as line comment start 109933965Sjdp AND we want C style comments. hence this hack. The 110033965Sjdp whole lexical process should be reworked. xoxorich. */ 110133965Sjdp if (ch == '/') 110233965Sjdp { 110333965Sjdp ch2 = GET (); 110433965Sjdp if (ch2 == '*') 110533965Sjdp { 110633965Sjdp old_state = 3; 110733965Sjdp state = -2; 110833965Sjdp break; 110933965Sjdp } 111033965Sjdp else 111133965Sjdp { 111233965Sjdp UNGET (ch2); 111333965Sjdp } 111433965Sjdp } /* bad hack */ 111533965Sjdp 111633965Sjdp if (state == 0 || state == 1) /* Only comment at start of line. */ 111733965Sjdp { 111833965Sjdp int startch; 111933965Sjdp 112033965Sjdp startch = ch; 112133965Sjdp 112233965Sjdp do 112333965Sjdp { 112433965Sjdp ch = GET (); 112533965Sjdp } 112633965Sjdp while (ch != EOF && IS_WHITESPACE (ch)); 112733965Sjdp if (ch == EOF) 112833965Sjdp { 112960484Sobrien as_warn (_("end of file in comment; newline inserted")); 113033965Sjdp PUT ('\n'); 113133965Sjdp break; 113233965Sjdp } 113333965Sjdp if (ch < '0' || ch > '9' || state != 0 || startch != '#') 113433965Sjdp { 113533965Sjdp /* Not a cpp line. */ 113633965Sjdp while (ch != EOF && !IS_NEWLINE (ch)) 113733965Sjdp ch = GET (); 113833965Sjdp if (ch == EOF) 113989857Sobrien as_warn (_("end of file in comment; newline inserted")); 114033965Sjdp state = 0; 114133965Sjdp PUT ('\n'); 114233965Sjdp break; 114333965Sjdp } 114477298Sobrien /* Looks like `# 123 "filename"' from cpp. */ 114533965Sjdp UNGET (ch); 114633965Sjdp old_state = 4; 114733965Sjdp state = -1; 114833965Sjdp if (scrub_m68k_mri) 114933965Sjdp out_string = "\tappline "; 115033965Sjdp else 115133965Sjdp out_string = "\t.appline "; 115233965Sjdp PUT (*out_string++); 115333965Sjdp break; 115433965Sjdp } 115533965Sjdp 115638889Sjdp#ifdef TC_D10V 115738889Sjdp /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true. 115838889Sjdp Trap is the only short insn that has a first operand that is 115938889Sjdp neither register nor label. 116038889Sjdp We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 . 116177298Sobrien We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is 116277298Sobrien already LEX_IS_LINE_COMMENT_START. However, it is the 116377298Sobrien only character in line_comment_chars for d10v, hence we 116477298Sobrien can recognize it as such. */ 116538889Sjdp /* An alternative approach would be to reset the state to 1 when 116638889Sjdp we see '||', '<'- or '->', but that seems to be overkill. */ 116777298Sobrien if (state == 10) 116877298Sobrien PUT (' '); 116938889Sjdp#endif 117033965Sjdp /* We have a line comment character which is not at the 117133965Sjdp start of a line. If this is also a normal comment 117233965Sjdp character, fall through. Otherwise treat it as a default 117333965Sjdp character. */ 117433965Sjdp if (strchr (tc_comment_chars, ch) == NULL 117533965Sjdp && (! scrub_m68k_mri 117633965Sjdp || (ch != '!' && ch != '*'))) 117733965Sjdp goto de_fault; 117833965Sjdp if (scrub_m68k_mri 117933965Sjdp && (ch == '!' || ch == '*' || ch == '#') 118033965Sjdp && state != 1 118133965Sjdp && state != 10) 118233965Sjdp goto de_fault; 118333965Sjdp /* Fall through. */ 118433965Sjdp case LEX_IS_COMMENT_START: 118560484Sobrien#if defined TC_ARM && defined OBJ_ELF 118660484Sobrien /* On the ARM, `@' is the comment character. 118760484Sobrien Unfortunately this is also a special character in ELF .symver 118877298Sobrien directives (and .type, though we deal with those another way). 118977298Sobrien So we check if this line is such a directive, and treat 119077298Sobrien the character as default if so. This is a hack. */ 119160484Sobrien if ((symver_state != NULL) && (*symver_state == 0)) 119260484Sobrien goto de_fault; 119360484Sobrien#endif 119477298Sobrien#ifdef WARN_COMMENTS 119577298Sobrien if (!found_comment) 119677298Sobrien as_where (&found_comment_file, &found_comment); 119777298Sobrien#endif 119833965Sjdp do 119933965Sjdp { 120033965Sjdp ch = GET (); 120133965Sjdp } 120233965Sjdp while (ch != EOF && !IS_NEWLINE (ch)); 120333965Sjdp if (ch == EOF) 120460484Sobrien as_warn (_("end of file in comment; newline inserted")); 120533965Sjdp state = 0; 120633965Sjdp PUT ('\n'); 120733965Sjdp break; 120833965Sjdp 120933965Sjdp case LEX_IS_SYMBOL_COMPONENT: 121033965Sjdp if (state == 10) 121133965Sjdp { 121233965Sjdp /* This is a symbol character following another symbol 121333965Sjdp character, with whitespace in between. We skipped 121433965Sjdp the whitespace earlier, so output it now. */ 121533965Sjdp UNGET (ch); 121633965Sjdp state = 3; 121733965Sjdp PUT (' '); 121833965Sjdp break; 121933965Sjdp } 122033965Sjdp 122133965Sjdp if (state == 3) 122233965Sjdp state = 9; 122333965Sjdp 122433965Sjdp /* This is a common case. Quickly copy CH and all the 122533965Sjdp following symbol component or normal characters. */ 122660484Sobrien if (to + 1 < toend 122760484Sobrien && mri_state == NULL 122860484Sobrien#if defined TC_ARM && defined OBJ_ELF 122960484Sobrien && symver_state == NULL 123060484Sobrien#endif 123160484Sobrien ) 123233965Sjdp { 123333965Sjdp char *s; 123433965Sjdp int len; 123533965Sjdp 123633965Sjdp for (s = from; s < fromend; s++) 123733965Sjdp { 123833965Sjdp int type; 123933965Sjdp 124077298Sobrien ch2 = *(unsigned char *) s; 124133965Sjdp type = lex[ch2]; 124233965Sjdp if (type != 0 124333965Sjdp && type != LEX_IS_SYMBOL_COMPONENT) 124433965Sjdp break; 124533965Sjdp } 124633965Sjdp if (s > from) 124733965Sjdp { 124833965Sjdp /* Handle the last character normally, for 124933965Sjdp simplicity. */ 125033965Sjdp --s; 125133965Sjdp } 125233965Sjdp len = s - from; 125333965Sjdp if (len > (toend - to) - 1) 125433965Sjdp len = (toend - to) - 1; 125533965Sjdp if (len > 0) 125633965Sjdp { 125733965Sjdp PUT (ch); 125833965Sjdp if (len > 8) 125933965Sjdp { 126033965Sjdp memcpy (to, from, len); 126133965Sjdp to += len; 126233965Sjdp from += len; 126333965Sjdp } 126433965Sjdp else 126533965Sjdp { 126633965Sjdp switch (len) 126733965Sjdp { 126833965Sjdp case 8: *to++ = *from++; 126933965Sjdp case 7: *to++ = *from++; 127033965Sjdp case 6: *to++ = *from++; 127133965Sjdp case 5: *to++ = *from++; 127233965Sjdp case 4: *to++ = *from++; 127333965Sjdp case 3: *to++ = *from++; 127433965Sjdp case 2: *to++ = *from++; 127533965Sjdp case 1: *to++ = *from++; 127633965Sjdp } 127777298Sobrien } 127833965Sjdp ch = GET (); 127933965Sjdp } 128033965Sjdp } 128133965Sjdp 128233965Sjdp /* Fall through. */ 128333965Sjdp default: 128433965Sjdp de_fault: 128533965Sjdp /* Some relatively `normal' character. */ 128633965Sjdp if (state == 0) 128733965Sjdp { 128833965Sjdp state = 11; /* Now seeing label definition */ 128933965Sjdp } 129033965Sjdp else if (state == 1) 129133965Sjdp { 129233965Sjdp state = 2; /* Ditto */ 129333965Sjdp } 129433965Sjdp else if (state == 9) 129533965Sjdp { 129633965Sjdp if (lex[ch] != LEX_IS_SYMBOL_COMPONENT) 129733965Sjdp state = 3; 129833965Sjdp } 129933965Sjdp else if (state == 10) 130033965Sjdp { 130160484Sobrien if (ch == '\\') 130260484Sobrien { 130360484Sobrien /* Special handling for backslash: a backslash may 130460484Sobrien be the beginning of a formal parameter (of a 130560484Sobrien macro) following another symbol character, with 130660484Sobrien whitespace in between. If that is the case, we 130760484Sobrien output a space before the parameter. Strictly 130860484Sobrien speaking, correct handling depends upon what the 130960484Sobrien macro parameter expands into; if the parameter 131060484Sobrien expands into something which does not start with 131160484Sobrien an operand character, then we don't want to keep 131260484Sobrien the space. We don't have enough information to 131360484Sobrien make the right choice, so here we are making the 131460484Sobrien choice which is more likely to be correct. */ 131560484Sobrien PUT (' '); 131660484Sobrien } 131760484Sobrien 131833965Sjdp state = 3; 131933965Sjdp } 132033965Sjdp PUT (ch); 132133965Sjdp break; 132233965Sjdp } 132333965Sjdp } 132433965Sjdp 132533965Sjdp /*NOTREACHED*/ 132633965Sjdp 132733965Sjdp fromeof: 132833965Sjdp /* We have reached the end of the input. */ 132933965Sjdp return to - tostart; 133033965Sjdp 133133965Sjdp tofull: 133233965Sjdp /* The output buffer is full. Save any input we have not yet 133333965Sjdp processed. */ 133433965Sjdp if (fromend > from) 133533965Sjdp { 133660484Sobrien saved_input = from; 133733965Sjdp saved_input_len = fromend - from; 133833965Sjdp } 133933965Sjdp else 134060484Sobrien saved_input = NULL; 134160484Sobrien 134233965Sjdp return to - tostart; 134333965Sjdp} 134433965Sjdp 134533965Sjdp/* end of app.c */ 1346