133965Sjdp/* This is the Assembler Pre-Processor 278828Sobrien Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 3218822Sdim 1999, 2000, 2001, 2002, 2003, 2006, 2007 433965Sjdp Free Software Foundation, Inc. 533965Sjdp 633965Sjdp This file is part of GAS, the GNU Assembler. 733965Sjdp 833965Sjdp GAS is free software; you can redistribute it and/or modify 933965Sjdp it under the terms of the GNU General Public License as published by 1033965Sjdp the Free Software Foundation; either version 2, or (at your option) 1133965Sjdp any later version. 1233965Sjdp 1333965Sjdp GAS is distributed in the hope that it will be useful, 1433965Sjdp but WITHOUT ANY WARRANTY; without even the implied warranty of 1533965Sjdp MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1633965Sjdp GNU General Public License for more details. 1733965Sjdp 1833965Sjdp You should have received a copy of the GNU General Public License 1933965Sjdp along with GAS; see the file COPYING. If not, write to the Free 20218822Sdim Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 21218822Sdim 02110-1301, USA. */ 2233965Sjdp 23130561Sobrien/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */ 24218822Sdim/* App, the assembler pre-processor. This pre-processor strips out 25218822Sdim excess spaces, turns single-quoted characters into a decimal 26218822Sdim constant, and turns the # in # <number> <filename> <garbage> into a 27218822Sdim .linefile. This needs better error-handling. */ 2833965Sjdp 29218822Sdim#include "as.h" 3033965Sjdp 3133965Sjdp#if (__STDC__ != 1) 3233965Sjdp#ifndef const 3333965Sjdp#define const /* empty */ 3433965Sjdp#endif 3533965Sjdp#endif 3633965Sjdp 3760484Sobrien#ifdef TC_M68K 3833965Sjdp/* Whether we are scrubbing in m68k MRI mode. This is different from 3933965Sjdp flag_m68k_mri, because the two flags will be affected by the .mri 4033965Sjdp pseudo-op at different times. */ 4133965Sjdpstatic int scrub_m68k_mri; 4233965Sjdp 4333965Sjdp/* The pseudo-op which switches in and out of MRI mode. See the 4433965Sjdp comment in do_scrub_chars. */ 4533965Sjdpstatic const char mri_pseudo[] = ".mri 0"; 46104834Sobrien#else 47104834Sobrien#define scrub_m68k_mri 0 48104834Sobrien#endif 4933965Sjdp 5060484Sobrien#if defined TC_ARM && defined OBJ_ELF 5177298Sobrien/* The pseudo-op for which we need to special-case `@' characters. 5260484Sobrien See the comment in do_scrub_chars. */ 5360484Sobrienstatic const char symver_pseudo[] = ".symver"; 5460484Sobrienstatic const char * symver_state; 5560484Sobrien#endif 5660484Sobrien 5733965Sjdpstatic char lex[256]; 5833965Sjdpstatic const char symbol_chars[] = 5933965Sjdp"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; 6033965Sjdp 6133965Sjdp#define LEX_IS_SYMBOL_COMPONENT 1 6233965Sjdp#define LEX_IS_WHITESPACE 2 6333965Sjdp#define LEX_IS_LINE_SEPARATOR 3 6433965Sjdp#define LEX_IS_COMMENT_START 4 6533965Sjdp#define LEX_IS_LINE_COMMENT_START 5 6633965Sjdp#define LEX_IS_TWOCHAR_COMMENT_1ST 6 6733965Sjdp#define LEX_IS_STRINGQUOTE 8 6833965Sjdp#define LEX_IS_COLON 9 6933965Sjdp#define LEX_IS_NEWLINE 10 7033965Sjdp#define LEX_IS_ONECHAR_QUOTE 11 7138889Sjdp#ifdef TC_V850 7238889Sjdp#define LEX_IS_DOUBLEDASH_1ST 12 7338889Sjdp#endif 7438889Sjdp#ifdef TC_M32R 7560484Sobrien#define DOUBLEBAR_PARALLEL 7660484Sobrien#endif 7760484Sobrien#ifdef DOUBLEBAR_PARALLEL 7838889Sjdp#define LEX_IS_DOUBLEBAR_1ST 13 7938889Sjdp#endif 8089857Sobrien#define LEX_IS_PARALLEL_SEPARATOR 14 8133965Sjdp#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT) 8233965Sjdp#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE) 8333965Sjdp#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR) 8489857Sobrien#define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR) 8533965Sjdp#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START) 8633965Sjdp#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START) 8733965Sjdp#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE) 8833965Sjdp 89130561Sobrienstatic int process_escape (int); 9033965Sjdp 9133965Sjdp/* FIXME-soon: The entire lexer/parser thingy should be 9233965Sjdp built statically at compile time rather than dynamically 9377298Sobrien each and every time the assembler is run. xoxorich. */ 9433965Sjdp 9577298Sobrienvoid 96130561Sobriendo_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED) 9733965Sjdp{ 9833965Sjdp const char *p; 9960484Sobrien int c; 10033965Sjdp 10133965Sjdp lex[' '] = LEX_IS_WHITESPACE; 10233965Sjdp lex['\t'] = LEX_IS_WHITESPACE; 10338889Sjdp lex['\r'] = LEX_IS_WHITESPACE; 10433965Sjdp lex['\n'] = LEX_IS_NEWLINE; 10533965Sjdp lex[':'] = LEX_IS_COLON; 10633965Sjdp 10760484Sobrien#ifdef TC_M68K 10860484Sobrien scrub_m68k_mri = m68k_mri; 10960484Sobrien 11033965Sjdp if (! m68k_mri) 11160484Sobrien#endif 11233965Sjdp { 11333965Sjdp lex['"'] = LEX_IS_STRINGQUOTE; 11433965Sjdp 11560484Sobrien#if ! defined (TC_HPPA) && ! defined (TC_I370) 116130561Sobrien /* I370 uses single-quotes to delimit integer, float constants. */ 11733965Sjdp lex['\''] = LEX_IS_ONECHAR_QUOTE; 11833965Sjdp#endif 11933965Sjdp 12033965Sjdp#ifdef SINGLE_QUOTE_STRINGS 12133965Sjdp lex['\''] = LEX_IS_STRINGQUOTE; 12233965Sjdp#endif 12333965Sjdp } 12433965Sjdp 12533965Sjdp /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop 12633965Sjdp in state 5 of do_scrub_chars must be changed. */ 12733965Sjdp 12833965Sjdp /* Note that these override the previous defaults, e.g. if ';' is a 12933965Sjdp comment char, then it isn't a line separator. */ 13033965Sjdp for (p = symbol_chars; *p; ++p) 131130561Sobrien lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; 13233965Sjdp 13360484Sobrien for (c = 128; c < 256; ++c) 13460484Sobrien lex[c] = LEX_IS_SYMBOL_COMPONENT; 13560484Sobrien 13660484Sobrien#ifdef tc_symbol_chars 13760484Sobrien /* This macro permits the processor to specify all characters which 13860484Sobrien may appears in an operand. This will prevent the scrubber from 13960484Sobrien discarding meaningful whitespace in certain cases. The i386 14060484Sobrien backend uses this to support prefixes, which can confuse the 14160484Sobrien scrubber as to whether it is parsing operands or opcodes. */ 14260484Sobrien for (p = tc_symbol_chars; *p; ++p) 14360484Sobrien lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; 14460484Sobrien#endif 14560484Sobrien 14633965Sjdp /* The m68k backend wants to be able to change comment_chars. */ 14733965Sjdp#ifndef tc_comment_chars 14833965Sjdp#define tc_comment_chars comment_chars 14933965Sjdp#endif 15033965Sjdp for (p = tc_comment_chars; *p; p++) 151130561Sobrien lex[(unsigned char) *p] = LEX_IS_COMMENT_START; 15233965Sjdp 15333965Sjdp for (p = line_comment_chars; *p; p++) 154130561Sobrien lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START; 15533965Sjdp 15633965Sjdp for (p = line_separator_chars; *p; p++) 157130561Sobrien lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR; 15833965Sjdp 15989857Sobrien#ifdef tc_parallel_separator_chars 16089857Sobrien /* This macro permits the processor to specify all characters which 16189857Sobrien separate parallel insns on the same line. */ 16289857Sobrien for (p = tc_parallel_separator_chars; *p; p++) 163130561Sobrien lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR; 16489857Sobrien#endif 16589857Sobrien 16633965Sjdp /* Only allow slash-star comments if slash is not in use. 16733965Sjdp FIXME: This isn't right. We should always permit them. */ 16833965Sjdp if (lex['/'] == 0) 169130561Sobrien lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST; 17033965Sjdp 17160484Sobrien#ifdef TC_M68K 17233965Sjdp if (m68k_mri) 17333965Sjdp { 17433965Sjdp lex['\''] = LEX_IS_STRINGQUOTE; 17533965Sjdp lex[';'] = LEX_IS_COMMENT_START; 17633965Sjdp lex['*'] = LEX_IS_LINE_COMMENT_START; 17733965Sjdp /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but 178130561Sobrien then it can't be used in an expression. */ 17933965Sjdp lex['!'] = LEX_IS_LINE_COMMENT_START; 18033965Sjdp } 18160484Sobrien#endif 18238889Sjdp 18338889Sjdp#ifdef TC_V850 18438889Sjdp lex['-'] = LEX_IS_DOUBLEDASH_1ST; 18538889Sjdp#endif 18660484Sobrien#ifdef DOUBLEBAR_PARALLEL 18738889Sjdp lex['|'] = LEX_IS_DOUBLEBAR_1ST; 18838889Sjdp#endif 18960484Sobrien#ifdef TC_D30V 190130561Sobrien /* Must do this is we want VLIW instruction with "->" or "<-". */ 19160484Sobrien lex['-'] = LEX_IS_SYMBOL_COMPONENT; 19260484Sobrien#endif 193130561Sobrien} 19433965Sjdp 195130561Sobrien/* Saved state of the scrubber. */ 19633965Sjdpstatic int state; 19733965Sjdpstatic int old_state; 19833965Sjdpstatic char *out_string; 19933965Sjdpstatic char out_buf[20]; 20033965Sjdpstatic int add_newlines; 20133965Sjdpstatic char *saved_input; 20233965Sjdpstatic int saved_input_len; 20360484Sobrienstatic char input_buffer[32 * 1024]; 20433965Sjdpstatic const char *mri_state; 20533965Sjdpstatic char mri_last_ch; 20633965Sjdp 20733965Sjdp/* Data structure for saving the state of app across #include's. Note that 20833965Sjdp app is called asynchronously to the parsing of the .include's, so our 20933965Sjdp state at the time .include is interpreted is completely unrelated. 21033965Sjdp That's why we have to save it all. */ 21133965Sjdp 212130561Sobrienstruct app_save 213130561Sobrien{ 21477298Sobrien int state; 21577298Sobrien int old_state; 21677298Sobrien char * out_string; 21777298Sobrien char out_buf[sizeof (out_buf)]; 21877298Sobrien int add_newlines; 21977298Sobrien char * saved_input; 22077298Sobrien int saved_input_len; 22160484Sobrien#ifdef TC_M68K 22277298Sobrien int scrub_m68k_mri; 22360484Sobrien#endif 22477298Sobrien const char * mri_state; 22577298Sobrien char mri_last_ch; 22660484Sobrien#if defined TC_ARM && defined OBJ_ELF 22777298Sobrien const char * symver_state; 22860484Sobrien#endif 22977298Sobrien}; 23033965Sjdp 23133965Sjdpchar * 232130561Sobrienapp_push (void) 23333965Sjdp{ 23433965Sjdp register struct app_save *saved; 23533965Sjdp 23633965Sjdp saved = (struct app_save *) xmalloc (sizeof (*saved)); 23733965Sjdp saved->state = state; 23833965Sjdp saved->old_state = old_state; 23933965Sjdp saved->out_string = out_string; 24033965Sjdp memcpy (saved->out_buf, out_buf, sizeof (out_buf)); 24133965Sjdp saved->add_newlines = add_newlines; 24260484Sobrien if (saved_input == NULL) 24360484Sobrien saved->saved_input = NULL; 24460484Sobrien else 24560484Sobrien { 24660484Sobrien saved->saved_input = xmalloc (saved_input_len); 24760484Sobrien memcpy (saved->saved_input, saved_input, saved_input_len); 24860484Sobrien saved->saved_input_len = saved_input_len; 24960484Sobrien } 25060484Sobrien#ifdef TC_M68K 25133965Sjdp saved->scrub_m68k_mri = scrub_m68k_mri; 25260484Sobrien#endif 25333965Sjdp saved->mri_state = mri_state; 25433965Sjdp saved->mri_last_ch = mri_last_ch; 25560484Sobrien#if defined TC_ARM && defined OBJ_ELF 25660484Sobrien saved->symver_state = symver_state; 25760484Sobrien#endif 25833965Sjdp 25977298Sobrien /* do_scrub_begin() is not useful, just wastes time. */ 26033965Sjdp 26133965Sjdp state = 0; 26233965Sjdp saved_input = NULL; 26333965Sjdp 26433965Sjdp return (char *) saved; 26533965Sjdp} 26633965Sjdp 26777298Sobrienvoid 268130561Sobrienapp_pop (char *arg) 26933965Sjdp{ 27033965Sjdp register struct app_save *saved = (struct app_save *) arg; 27133965Sjdp 27277298Sobrien /* There is no do_scrub_end (). */ 27333965Sjdp state = saved->state; 27433965Sjdp old_state = saved->old_state; 27533965Sjdp out_string = saved->out_string; 27633965Sjdp memcpy (out_buf, saved->out_buf, sizeof (out_buf)); 27733965Sjdp add_newlines = saved->add_newlines; 27860484Sobrien if (saved->saved_input == NULL) 27960484Sobrien saved_input = NULL; 28060484Sobrien else 28160484Sobrien { 28260484Sobrien assert (saved->saved_input_len <= (int) (sizeof input_buffer)); 28360484Sobrien memcpy (input_buffer, saved->saved_input, saved->saved_input_len); 28460484Sobrien saved_input = input_buffer; 28560484Sobrien saved_input_len = saved->saved_input_len; 28660484Sobrien free (saved->saved_input); 28760484Sobrien } 28860484Sobrien#ifdef TC_M68K 28933965Sjdp scrub_m68k_mri = saved->scrub_m68k_mri; 29060484Sobrien#endif 29133965Sjdp mri_state = saved->mri_state; 29233965Sjdp mri_last_ch = saved->mri_last_ch; 29360484Sobrien#if defined TC_ARM && defined OBJ_ELF 29460484Sobrien symver_state = saved->symver_state; 29560484Sobrien#endif 29633965Sjdp 29733965Sjdp free (arg); 298130561Sobrien} 29933965Sjdp 30033965Sjdp/* @@ This assumes that \n &c are the same on host and target. This is not 30133965Sjdp necessarily true. */ 302130561Sobrien 30377298Sobrienstatic int 304130561Sobrienprocess_escape (int ch) 30533965Sjdp{ 30633965Sjdp switch (ch) 30733965Sjdp { 30833965Sjdp case 'b': 30933965Sjdp return '\b'; 31033965Sjdp case 'f': 31133965Sjdp return '\f'; 31233965Sjdp case 'n': 31333965Sjdp return '\n'; 31433965Sjdp case 'r': 31533965Sjdp return '\r'; 31633965Sjdp case 't': 31733965Sjdp return '\t'; 31833965Sjdp case '\'': 31933965Sjdp return '\''; 32033965Sjdp case '"': 32133965Sjdp return '\"'; 32233965Sjdp default: 32333965Sjdp return ch; 32433965Sjdp } 32533965Sjdp} 32633965Sjdp 32733965Sjdp/* This function is called to process input characters. The GET 32833965Sjdp parameter is used to retrieve more input characters. GET should 32933965Sjdp set its parameter to point to a buffer, and return the length of 33033965Sjdp the buffer; it should return 0 at end of file. The scrubbed output 33133965Sjdp characters are put into the buffer starting at TOSTART; the TOSTART 33233965Sjdp buffer is TOLEN bytes in length. The function returns the number 33333965Sjdp of scrubbed characters put into TOSTART. This will be TOLEN unless 33433965Sjdp end of file was seen. This function is arranged as a state 33533965Sjdp machine, and saves its state so that it may return at any point. 33633965Sjdp This is the way the old code used to work. */ 33733965Sjdp 33833965Sjdpint 339130561Sobriendo_scrub_chars (int (*get) (char *, int), char *tostart, int tolen) 34033965Sjdp{ 34133965Sjdp char *to = tostart; 34233965Sjdp char *toend = tostart + tolen; 34333965Sjdp char *from; 34433965Sjdp char *fromend; 34533965Sjdp int fromlen; 34633965Sjdp register int ch, ch2 = 0; 347218822Sdim /* Character that started the string we're working on. */ 348218822Sdim static char quotechar; 34933965Sjdp 35033965Sjdp /*State 0: beginning of normal line 35133965Sjdp 1: After first whitespace on line (flush more white) 35233965Sjdp 2: After first non-white (opcode) on line (keep 1white) 35333965Sjdp 3: after second white on line (into operands) (flush white) 354218822Sdim 4: after putting out a .linefile, put out digits 35533965Sjdp 5: parsing a string, then go to old-state 35633965Sjdp 6: putting out \ escape in a "d string. 357218822Sdim 7: no longer used 358218822Sdim 8: no longer used 35933965Sjdp 9: After seeing symbol char in state 3 (keep 1white after symchar) 36033965Sjdp 10: After seeing whitespace in state 9 (keep white before symchar) 36133965Sjdp 11: After seeing a symbol character in state 0 (eg a label definition) 36233965Sjdp -1: output string in out_string and go to the state in old_state 36333965Sjdp -2: flush text until a '*' '/' is seen, then go to state old_state 36438889Sjdp#ifdef TC_V850 365130561Sobrien 12: After seeing a dash, looking for a second dash as a start 366130561Sobrien of comment. 36738889Sjdp#endif 36860484Sobrien#ifdef DOUBLEBAR_PARALLEL 369130561Sobrien 13: After seeing a vertical bar, looking for a second 370130561Sobrien vertical bar as a parallel expression separator. 37138889Sjdp#endif 372130561Sobrien#ifdef TC_IA64 373130561Sobrien 14: After seeing a `(' at state 0, looking for a `)' as 374130561Sobrien predicate. 375130561Sobrien 15: After seeing a `(' at state 1, looking for a `)' as 376130561Sobrien predicate. 377130561Sobrien#endif 378218822Sdim#ifdef TC_Z80 379218822Sdim 16: After seeing an 'a' or an 'A' at the start of a symbol 380218822Sdim 17: After seeing an 'f' or an 'F' in state 16 381218822Sdim#endif 38233965Sjdp */ 38333965Sjdp 38433965Sjdp /* I added states 9 and 10 because the MIPS ECOFF assembler uses 38533965Sjdp constructs like ``.loc 1 20''. This was turning into ``.loc 38633965Sjdp 120''. States 9 and 10 ensure that a space is never dropped in 38789857Sobrien between characters which could appear in an identifier. Ian 38833965Sjdp Taylor, ian@cygnus.com. 38933965Sjdp 39033965Sjdp I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works 39133965Sjdp correctly on the PA (and any other target where colons are optional). 39238889Sjdp Jeff Law, law@cs.utah.edu. 39333965Sjdp 39438889Sjdp I added state 13 so that something like "cmp r1, r2 || trap #1" does not 39538889Sjdp get squashed into "cmp r1,r2||trap#1", with the all important space 39638889Sjdp between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */ 39738889Sjdp 39833965Sjdp /* This macro gets the next input character. */ 39933965Sjdp 40060484Sobrien#define GET() \ 40160484Sobrien (from < fromend \ 40260484Sobrien ? * (unsigned char *) (from++) \ 40360484Sobrien : (saved_input = NULL, \ 40460484Sobrien fromlen = (*get) (input_buffer, sizeof input_buffer), \ 40560484Sobrien from = input_buffer, \ 40660484Sobrien fromend = from + fromlen, \ 40760484Sobrien (fromlen == 0 \ 40860484Sobrien ? EOF \ 40960484Sobrien : * (unsigned char *) (from++)))) 41033965Sjdp 41133965Sjdp /* This macro pushes a character back on the input stream. */ 41233965Sjdp 41333965Sjdp#define UNGET(uch) (*--from = (uch)) 41433965Sjdp 41533965Sjdp /* This macro puts a character into the output buffer. If this 41633965Sjdp character fills the output buffer, this macro jumps to the label 41733965Sjdp TOFULL. We use this rather ugly approach because we need to 41833965Sjdp handle two different termination conditions: EOF on the input 41933965Sjdp stream, and a full output buffer. It would be simpler if we 42033965Sjdp always read in the entire input stream before processing it, but 42133965Sjdp I don't want to make such a significant change to the assembler's 42233965Sjdp memory usage. */ 42333965Sjdp 424104834Sobrien#define PUT(pch) \ 425104834Sobrien do \ 426104834Sobrien { \ 427104834Sobrien *to++ = (pch); \ 428104834Sobrien if (to >= toend) \ 429104834Sobrien goto tofull; \ 430104834Sobrien } \ 43133965Sjdp while (0) 43233965Sjdp 43333965Sjdp if (saved_input != NULL) 43433965Sjdp { 43533965Sjdp from = saved_input; 43633965Sjdp fromend = from + saved_input_len; 43733965Sjdp } 43833965Sjdp else 43933965Sjdp { 44060484Sobrien fromlen = (*get) (input_buffer, sizeof input_buffer); 44133965Sjdp if (fromlen == 0) 44233965Sjdp return 0; 44360484Sobrien from = input_buffer; 44433965Sjdp fromend = from + fromlen; 44533965Sjdp } 44633965Sjdp 44733965Sjdp while (1) 44833965Sjdp { 44933965Sjdp /* The cases in this switch end with continue, in order to 450130561Sobrien branch back to the top of this while loop and generate the 451130561Sobrien next output character in the appropriate state. */ 45233965Sjdp switch (state) 45333965Sjdp { 45433965Sjdp case -1: 45533965Sjdp ch = *out_string++; 45633965Sjdp if (*out_string == '\0') 45733965Sjdp { 45833965Sjdp state = old_state; 45933965Sjdp old_state = 3; 46033965Sjdp } 46133965Sjdp PUT (ch); 46233965Sjdp continue; 46333965Sjdp 46433965Sjdp case -2: 46533965Sjdp for (;;) 46633965Sjdp { 46733965Sjdp do 46833965Sjdp { 46933965Sjdp ch = GET (); 47033965Sjdp 47133965Sjdp if (ch == EOF) 47233965Sjdp { 47360484Sobrien as_warn (_("end of file in comment")); 47433965Sjdp goto fromeof; 47533965Sjdp } 47633965Sjdp 47733965Sjdp if (ch == '\n') 47833965Sjdp PUT ('\n'); 47933965Sjdp } 48033965Sjdp while (ch != '*'); 48133965Sjdp 48233965Sjdp while ((ch = GET ()) == '*') 48333965Sjdp ; 48433965Sjdp 48533965Sjdp if (ch == EOF) 48633965Sjdp { 48760484Sobrien as_warn (_("end of file in comment")); 48833965Sjdp goto fromeof; 48933965Sjdp } 49033965Sjdp 49133965Sjdp if (ch == '/') 49233965Sjdp break; 49333965Sjdp 49433965Sjdp UNGET (ch); 49533965Sjdp } 49633965Sjdp 49733965Sjdp state = old_state; 49833965Sjdp UNGET (' '); 49933965Sjdp continue; 50033965Sjdp 50133965Sjdp case 4: 50233965Sjdp ch = GET (); 50333965Sjdp if (ch == EOF) 50433965Sjdp goto fromeof; 50533965Sjdp else if (ch >= '0' && ch <= '9') 50633965Sjdp PUT (ch); 50733965Sjdp else 50833965Sjdp { 50933965Sjdp while (ch != EOF && IS_WHITESPACE (ch)) 51033965Sjdp ch = GET (); 51133965Sjdp if (ch == '"') 51233965Sjdp { 513218822Sdim quotechar = ch; 514218822Sdim state = 5; 515218822Sdim old_state = 3; 516218822Sdim PUT (ch); 51733965Sjdp } 51833965Sjdp else 51933965Sjdp { 52033965Sjdp while (ch != EOF && ch != '\n') 52133965Sjdp ch = GET (); 52233965Sjdp state = 0; 52333965Sjdp PUT (ch); 52433965Sjdp } 52533965Sjdp } 52633965Sjdp continue; 52733965Sjdp 52833965Sjdp case 5: 52933965Sjdp /* We are going to copy everything up to a quote character, 530130561Sobrien with special handling for a backslash. We try to 531130561Sobrien optimize the copying in the simple case without using the 532130561Sobrien GET and PUT macros. */ 53333965Sjdp { 53433965Sjdp char *s; 53533965Sjdp int len; 53633965Sjdp 53733965Sjdp for (s = from; s < fromend; s++) 53833965Sjdp { 53933965Sjdp ch = *s; 54033965Sjdp if (ch == '\\' 541218822Sdim || ch == quotechar 54233965Sjdp || ch == '\n') 54333965Sjdp break; 54433965Sjdp } 54533965Sjdp len = s - from; 54633965Sjdp if (len > toend - to) 54733965Sjdp len = toend - to; 54833965Sjdp if (len > 0) 54933965Sjdp { 55033965Sjdp memcpy (to, from, len); 55133965Sjdp to += len; 55233965Sjdp from += len; 553218822Sdim if (to >= toend) 554218822Sdim goto tofull; 55533965Sjdp } 55633965Sjdp } 55733965Sjdp 55833965Sjdp ch = GET (); 55933965Sjdp if (ch == EOF) 56033965Sjdp { 561218822Sdim as_warn (_("end of file in string; '%c' inserted"), quotechar); 56233965Sjdp state = old_state; 56333965Sjdp UNGET ('\n'); 564218822Sdim PUT (quotechar); 56533965Sjdp } 566218822Sdim else if (ch == quotechar) 56733965Sjdp { 56833965Sjdp state = old_state; 56933965Sjdp PUT (ch); 57033965Sjdp } 57133965Sjdp#ifndef NO_STRING_ESCAPES 57233965Sjdp else if (ch == '\\') 57333965Sjdp { 57433965Sjdp state = 6; 57533965Sjdp PUT (ch); 57633965Sjdp } 57733965Sjdp#endif 57833965Sjdp else if (scrub_m68k_mri && ch == '\n') 57933965Sjdp { 58033965Sjdp /* Just quietly terminate the string. This permits lines like 581130561Sobrien bne label loop if we haven't reach end yet. */ 58233965Sjdp state = old_state; 58333965Sjdp UNGET (ch); 58433965Sjdp PUT ('\''); 58533965Sjdp } 58633965Sjdp else 58733965Sjdp { 58833965Sjdp PUT (ch); 58933965Sjdp } 59033965Sjdp continue; 59133965Sjdp 59233965Sjdp case 6: 59333965Sjdp state = 5; 59433965Sjdp ch = GET (); 59533965Sjdp switch (ch) 59633965Sjdp { 59733965Sjdp /* Handle strings broken across lines, by turning '\n' into 59833965Sjdp '\\' and 'n'. */ 59933965Sjdp case '\n': 60033965Sjdp UNGET ('n'); 60133965Sjdp add_newlines++; 60233965Sjdp PUT ('\\'); 60333965Sjdp continue; 60433965Sjdp 605130561Sobrien case EOF: 606218822Sdim as_warn (_("end of file in string; '%c' inserted"), quotechar); 607218822Sdim PUT (quotechar); 608130561Sobrien continue; 609130561Sobrien 61033965Sjdp case '"': 61133965Sjdp case '\\': 61233965Sjdp case 'b': 61333965Sjdp case 'f': 61433965Sjdp case 'n': 61533965Sjdp case 'r': 61633965Sjdp case 't': 61733965Sjdp case 'v': 61833965Sjdp case 'x': 61933965Sjdp case 'X': 62033965Sjdp case '0': 62133965Sjdp case '1': 62233965Sjdp case '2': 62333965Sjdp case '3': 62433965Sjdp case '4': 62533965Sjdp case '5': 62633965Sjdp case '6': 62733965Sjdp case '7': 62833965Sjdp break; 629130561Sobrien 63033965Sjdp default: 631130561Sobrien#ifdef ONLY_STANDARD_ESCAPES 63289857Sobrien as_warn (_("unknown escape '\\%c' in string; ignored"), ch); 633130561Sobrien#endif 63433965Sjdp break; 63533965Sjdp } 63633965Sjdp PUT (ch); 63733965Sjdp continue; 63833965Sjdp 639130561Sobrien#ifdef DOUBLEBAR_PARALLEL 640130561Sobrien case 13: 641130561Sobrien ch = GET (); 642130561Sobrien if (ch != '|') 643130561Sobrien abort (); 644130561Sobrien 645130561Sobrien /* Reset back to state 1 and pretend that we are parsing a 646130561Sobrien line from just after the first white space. */ 647130561Sobrien state = 1; 648130561Sobrien PUT ('|'); 649130561Sobrien continue; 650130561Sobrien#endif 651218822Sdim#ifdef TC_Z80 652218822Sdim case 16: 653218822Sdim /* We have seen an 'a' at the start of a symbol, look for an 'f'. */ 654218822Sdim ch = GET (); 655218822Sdim if (ch == 'f' || ch == 'F') 656218822Sdim { 657218822Sdim state = 17; 658218822Sdim PUT (ch); 659218822Sdim } 660218822Sdim else 661218822Sdim { 662218822Sdim state = 9; 663218822Sdim break; 664218822Sdim } 665218822Sdim case 17: 666218822Sdim /* We have seen "af" at the start of a symbol, 667218822Sdim a ' here is a part of that symbol. */ 668218822Sdim ch = GET (); 669218822Sdim state = 9; 670218822Sdim if (ch == '\'') 671218822Sdim /* Change to avoid warning about unclosed string. */ 672218822Sdim PUT ('`'); 673218822Sdim else 674218822Sdim UNGET (ch); 675218822Sdim break; 676218822Sdim#endif 67733965Sjdp } 67833965Sjdp 679130561Sobrien /* OK, we are somewhere in states 0 through 4 or 9 through 11. */ 68033965Sjdp 68133965Sjdp /* flushchar: */ 68233965Sjdp ch = GET (); 68333965Sjdp 684130561Sobrien#ifdef TC_IA64 685130561Sobrien if (ch == '(' && (state == 0 || state == 1)) 686130561Sobrien { 687130561Sobrien state += 14; 688130561Sobrien PUT (ch); 689130561Sobrien continue; 690130561Sobrien } 691130561Sobrien else if (state == 14 || state == 15) 692130561Sobrien { 693130561Sobrien if (ch == ')') 694130561Sobrien { 695130561Sobrien state -= 14; 696130561Sobrien PUT (ch); 697130561Sobrien ch = GET (); 698130561Sobrien } 699130561Sobrien else 700130561Sobrien { 701130561Sobrien PUT (ch); 702130561Sobrien continue; 703130561Sobrien } 704130561Sobrien } 705130561Sobrien#endif 706130561Sobrien 70733965Sjdp recycle: 70833965Sjdp 70960484Sobrien#if defined TC_ARM && defined OBJ_ELF 71060484Sobrien /* We need to watch out for .symver directives. See the comment later 71160484Sobrien in this function. */ 71260484Sobrien if (symver_state == NULL) 71360484Sobrien { 71460484Sobrien if ((state == 0 || state == 1) && ch == symver_pseudo[0]) 71560484Sobrien symver_state = symver_pseudo + 1; 71660484Sobrien } 71760484Sobrien else 71860484Sobrien { 71960484Sobrien /* We advance to the next state if we find the right 72060484Sobrien character. */ 72160484Sobrien if (ch != '\0' && (*symver_state == ch)) 72260484Sobrien ++symver_state; 72360484Sobrien else if (*symver_state != '\0') 72460484Sobrien /* We did not get the expected character, or we didn't 72560484Sobrien get a valid terminating character after seeing the 72660484Sobrien entire pseudo-op, so we must go back to the beginning. */ 72760484Sobrien symver_state = NULL; 72860484Sobrien else 72960484Sobrien { 73060484Sobrien /* We've read the entire pseudo-op. If this is the end 73160484Sobrien of the line, go back to the beginning. */ 73260484Sobrien if (IS_NEWLINE (ch)) 73360484Sobrien symver_state = NULL; 73460484Sobrien } 73560484Sobrien } 73660484Sobrien#endif /* TC_ARM && OBJ_ELF */ 73760484Sobrien 73833965Sjdp#ifdef TC_M68K 73933965Sjdp /* We want to have pseudo-ops which control whether we are in 740130561Sobrien MRI mode or not. Unfortunately, since m68k MRI mode affects 741130561Sobrien the scrubber, that means that we need a special purpose 742130561Sobrien recognizer here. */ 74333965Sjdp if (mri_state == NULL) 74433965Sjdp { 74533965Sjdp if ((state == 0 || state == 1) 74633965Sjdp && ch == mri_pseudo[0]) 74733965Sjdp mri_state = mri_pseudo + 1; 74833965Sjdp } 74933965Sjdp else 75033965Sjdp { 75133965Sjdp /* We advance to the next state if we find the right 75233965Sjdp character, or if we need a space character and we get any 75333965Sjdp whitespace character, or if we need a '0' and we get a 75433965Sjdp '1' (this is so that we only need one state to handle 75533965Sjdp ``.mri 0'' and ``.mri 1''). */ 75633965Sjdp if (ch != '\0' 75733965Sjdp && (*mri_state == ch 75833965Sjdp || (*mri_state == ' ' 75933965Sjdp && lex[ch] == LEX_IS_WHITESPACE) 76033965Sjdp || (*mri_state == '0' 76133965Sjdp && ch == '1'))) 76233965Sjdp { 76333965Sjdp mri_last_ch = ch; 76433965Sjdp ++mri_state; 76533965Sjdp } 76633965Sjdp else if (*mri_state != '\0' 76733965Sjdp || (lex[ch] != LEX_IS_WHITESPACE 76833965Sjdp && lex[ch] != LEX_IS_NEWLINE)) 76933965Sjdp { 77033965Sjdp /* We did not get the expected character, or we didn't 77133965Sjdp get a valid terminating character after seeing the 77233965Sjdp entire pseudo-op, so we must go back to the 77333965Sjdp beginning. */ 77433965Sjdp mri_state = NULL; 77533965Sjdp } 77633965Sjdp else 77733965Sjdp { 77833965Sjdp /* We've read the entire pseudo-op. mips_last_ch is 779130561Sobrien either '0' or '1' indicating whether to enter or 780130561Sobrien leave MRI mode. */ 78133965Sjdp do_scrub_begin (mri_last_ch == '1'); 78238889Sjdp mri_state = NULL; 78333965Sjdp 78433965Sjdp /* We continue handling the character as usual. The 785130561Sobrien main gas reader must also handle the .mri pseudo-op 786130561Sobrien to control expression parsing and the like. */ 78733965Sjdp } 78833965Sjdp } 78933965Sjdp#endif 79033965Sjdp 79133965Sjdp if (ch == EOF) 79233965Sjdp { 79333965Sjdp if (state != 0) 79433965Sjdp { 79560484Sobrien as_warn (_("end of file not at end of a line; newline inserted")); 79633965Sjdp state = 0; 79733965Sjdp PUT ('\n'); 79833965Sjdp } 79933965Sjdp goto fromeof; 80033965Sjdp } 80133965Sjdp 80233965Sjdp switch (lex[ch]) 80333965Sjdp { 80433965Sjdp case LEX_IS_WHITESPACE: 80533965Sjdp do 80633965Sjdp { 80733965Sjdp ch = GET (); 80833965Sjdp } 80933965Sjdp while (ch != EOF && IS_WHITESPACE (ch)); 81033965Sjdp if (ch == EOF) 81133965Sjdp goto fromeof; 81233965Sjdp 81333965Sjdp if (state == 0) 81433965Sjdp { 81533965Sjdp /* Preserve a single whitespace character at the 81633965Sjdp beginning of a line. */ 81733965Sjdp state = 1; 81833965Sjdp UNGET (ch); 81933965Sjdp PUT (' '); 82033965Sjdp break; 82133965Sjdp } 82233965Sjdp 82360484Sobrien#ifdef KEEP_WHITE_AROUND_COLON 82477298Sobrien if (lex[ch] == LEX_IS_COLON) 82577298Sobrien { 82677298Sobrien /* Only keep this white if there's no white *after* the 827130561Sobrien colon. */ 82877298Sobrien ch2 = GET (); 82977298Sobrien UNGET (ch2); 83077298Sobrien if (!IS_WHITESPACE (ch2)) 83177298Sobrien { 83277298Sobrien state = 9; 83377298Sobrien UNGET (ch); 83477298Sobrien PUT (' '); 83577298Sobrien break; 83677298Sobrien } 83777298Sobrien } 83860484Sobrien#endif 83933965Sjdp if (IS_COMMENT (ch) 84033965Sjdp || ch == '/' 84189857Sobrien || IS_LINE_SEPARATOR (ch) 84289857Sobrien || IS_PARALLEL_SEPARATOR (ch)) 84333965Sjdp { 84433965Sjdp if (scrub_m68k_mri) 84533965Sjdp { 84633965Sjdp /* In MRI mode, we keep these spaces. */ 84733965Sjdp UNGET (ch); 84833965Sjdp PUT (' '); 84933965Sjdp break; 85033965Sjdp } 85133965Sjdp goto recycle; 85233965Sjdp } 85333965Sjdp 85433965Sjdp /* If we're in state 2 or 11, we've seen a non-white 85533965Sjdp character followed by whitespace. If the next character 85633965Sjdp is ':', this is whitespace after a label name which we 85733965Sjdp normally must ignore. In MRI mode, though, spaces are 85833965Sjdp not permitted between the label and the colon. */ 85933965Sjdp if ((state == 2 || state == 11) 86033965Sjdp && lex[ch] == LEX_IS_COLON 86133965Sjdp && ! scrub_m68k_mri) 86233965Sjdp { 86333965Sjdp state = 1; 86433965Sjdp PUT (ch); 86533965Sjdp break; 86633965Sjdp } 86733965Sjdp 86833965Sjdp switch (state) 86933965Sjdp { 87033965Sjdp case 1: 87133965Sjdp /* We can arrive here if we leave a leading whitespace 87233965Sjdp character at the beginning of a line. */ 87333965Sjdp goto recycle; 87433965Sjdp case 2: 87533965Sjdp state = 3; 87633965Sjdp if (to + 1 < toend) 87733965Sjdp { 87833965Sjdp /* Optimize common case by skipping UNGET/GET. */ 87933965Sjdp PUT (' '); /* Sp after opco */ 88033965Sjdp goto recycle; 88133965Sjdp } 88233965Sjdp UNGET (ch); 88333965Sjdp PUT (' '); 88433965Sjdp break; 88533965Sjdp case 3: 88633965Sjdp if (scrub_m68k_mri) 88733965Sjdp { 88833965Sjdp /* In MRI mode, we keep these spaces. */ 88933965Sjdp UNGET (ch); 89033965Sjdp PUT (' '); 89133965Sjdp break; 89233965Sjdp } 89333965Sjdp goto recycle; /* Sp in operands */ 89433965Sjdp case 9: 89533965Sjdp case 10: 89633965Sjdp if (scrub_m68k_mri) 89733965Sjdp { 89833965Sjdp /* In MRI mode, we keep these spaces. */ 89933965Sjdp state = 3; 90033965Sjdp UNGET (ch); 90133965Sjdp PUT (' '); 90233965Sjdp break; 90333965Sjdp } 90433965Sjdp state = 10; /* Sp after symbol char */ 90533965Sjdp goto recycle; 90633965Sjdp case 11: 90760484Sobrien if (LABELS_WITHOUT_COLONS || flag_m68k_mri) 90833965Sjdp state = 1; 90933965Sjdp else 91033965Sjdp { 91133965Sjdp /* We know that ch is not ':', since we tested that 912130561Sobrien case above. Therefore this is not a label, so it 913130561Sobrien must be the opcode, and we've just seen the 914130561Sobrien whitespace after it. */ 91533965Sjdp state = 3; 91633965Sjdp } 91733965Sjdp UNGET (ch); 91833965Sjdp PUT (' '); /* Sp after label definition. */ 91933965Sjdp break; 92033965Sjdp default: 92133965Sjdp BAD_CASE (state); 92233965Sjdp } 92333965Sjdp break; 92433965Sjdp 92533965Sjdp case LEX_IS_TWOCHAR_COMMENT_1ST: 92633965Sjdp ch2 = GET (); 92733965Sjdp if (ch2 == '*') 92833965Sjdp { 92933965Sjdp for (;;) 93033965Sjdp { 93133965Sjdp do 93233965Sjdp { 93333965Sjdp ch2 = GET (); 93433965Sjdp if (ch2 != EOF && IS_NEWLINE (ch2)) 93533965Sjdp add_newlines++; 93633965Sjdp } 93733965Sjdp while (ch2 != EOF && ch2 != '*'); 93833965Sjdp 93933965Sjdp while (ch2 == '*') 94033965Sjdp ch2 = GET (); 94133965Sjdp 94233965Sjdp if (ch2 == EOF || ch2 == '/') 94333965Sjdp break; 94433965Sjdp 94533965Sjdp /* This UNGET will ensure that we count newlines 946130561Sobrien correctly. */ 94733965Sjdp UNGET (ch2); 94833965Sjdp } 94933965Sjdp 95033965Sjdp if (ch2 == EOF) 95160484Sobrien as_warn (_("end of file in multiline comment")); 95233965Sjdp 95333965Sjdp ch = ' '; 95433965Sjdp goto recycle; 95533965Sjdp } 95677298Sobrien#ifdef DOUBLESLASH_LINE_COMMENTS 95777298Sobrien else if (ch2 == '/') 95877298Sobrien { 95977298Sobrien do 96077298Sobrien { 96177298Sobrien ch = GET (); 96277298Sobrien } 96377298Sobrien while (ch != EOF && !IS_NEWLINE (ch)); 96477298Sobrien if (ch == EOF) 96577298Sobrien as_warn ("end of file in comment; newline inserted"); 96677298Sobrien state = 0; 96777298Sobrien PUT ('\n'); 96877298Sobrien break; 96977298Sobrien } 97077298Sobrien#endif 97133965Sjdp else 97233965Sjdp { 97333965Sjdp if (ch2 != EOF) 97433965Sjdp UNGET (ch2); 97533965Sjdp if (state == 9 || state == 10) 97633965Sjdp state = 3; 97733965Sjdp PUT (ch); 97833965Sjdp } 97933965Sjdp break; 98033965Sjdp 98133965Sjdp case LEX_IS_STRINGQUOTE: 982218822Sdim quotechar = ch; 98333965Sjdp if (state == 10) 98433965Sjdp { 985130561Sobrien /* Preserve the whitespace in foo "bar". */ 98633965Sjdp UNGET (ch); 98733965Sjdp state = 3; 98833965Sjdp PUT (' '); 98933965Sjdp 99033965Sjdp /* PUT didn't jump out. We could just break, but we 991130561Sobrien know what will happen, so optimize a bit. */ 99233965Sjdp ch = GET (); 99333965Sjdp old_state = 3; 99433965Sjdp } 99533965Sjdp else if (state == 9) 99633965Sjdp old_state = 3; 99733965Sjdp else 99833965Sjdp old_state = state; 99933965Sjdp state = 5; 100033965Sjdp PUT (ch); 100133965Sjdp break; 100233965Sjdp 100333965Sjdp#ifndef IEEE_STYLE 100433965Sjdp case LEX_IS_ONECHAR_QUOTE: 100533965Sjdp if (state == 10) 100633965Sjdp { 1007130561Sobrien /* Preserve the whitespace in foo 'b'. */ 100833965Sjdp UNGET (ch); 100933965Sjdp state = 3; 101033965Sjdp PUT (' '); 101133965Sjdp break; 101233965Sjdp } 101333965Sjdp ch = GET (); 101433965Sjdp if (ch == EOF) 101533965Sjdp { 101660484Sobrien as_warn (_("end of file after a one-character quote; \\0 inserted")); 101733965Sjdp ch = 0; 101833965Sjdp } 101933965Sjdp if (ch == '\\') 102033965Sjdp { 102133965Sjdp ch = GET (); 102233965Sjdp if (ch == EOF) 102333965Sjdp { 102460484Sobrien as_warn (_("end of file in escape character")); 102533965Sjdp ch = '\\'; 102633965Sjdp } 102733965Sjdp else 102833965Sjdp ch = process_escape (ch); 102933965Sjdp } 103033965Sjdp sprintf (out_buf, "%d", (int) (unsigned char) ch); 103133965Sjdp 103233965Sjdp /* None of these 'x constants for us. We want 'x'. */ 103333965Sjdp if ((ch = GET ()) != '\'') 103433965Sjdp { 103533965Sjdp#ifdef REQUIRE_CHAR_CLOSE_QUOTE 103689857Sobrien as_warn (_("missing close quote; (assumed)")); 103733965Sjdp#else 103833965Sjdp if (ch != EOF) 103933965Sjdp UNGET (ch); 104033965Sjdp#endif 104133965Sjdp } 104233965Sjdp if (strlen (out_buf) == 1) 104333965Sjdp { 104433965Sjdp PUT (out_buf[0]); 104533965Sjdp break; 104633965Sjdp } 104733965Sjdp if (state == 9) 104833965Sjdp old_state = 3; 104933965Sjdp else 105033965Sjdp old_state = state; 105133965Sjdp state = -1; 105233965Sjdp out_string = out_buf; 105333965Sjdp PUT (*out_string++); 105433965Sjdp break; 105533965Sjdp#endif 105633965Sjdp 105733965Sjdp case LEX_IS_COLON: 105860484Sobrien#ifdef KEEP_WHITE_AROUND_COLON 105977298Sobrien state = 9; 106060484Sobrien#else 106133965Sjdp if (state == 9 || state == 10) 106233965Sjdp state = 3; 106333965Sjdp else if (state != 3) 106433965Sjdp state = 1; 106560484Sobrien#endif 106633965Sjdp PUT (ch); 106733965Sjdp break; 106833965Sjdp 106933965Sjdp case LEX_IS_NEWLINE: 107033965Sjdp /* Roll out a bunch of newlines from inside comments, etc. */ 107133965Sjdp if (add_newlines) 107233965Sjdp { 107333965Sjdp --add_newlines; 107433965Sjdp UNGET (ch); 107533965Sjdp } 107677298Sobrien /* Fall through. */ 107733965Sjdp 107833965Sjdp case LEX_IS_LINE_SEPARATOR: 107933965Sjdp state = 0; 108033965Sjdp PUT (ch); 108133965Sjdp break; 108233965Sjdp 108389857Sobrien case LEX_IS_PARALLEL_SEPARATOR: 108489857Sobrien state = 1; 108589857Sobrien PUT (ch); 108689857Sobrien break; 108789857Sobrien 108838889Sjdp#ifdef TC_V850 108938889Sjdp case LEX_IS_DOUBLEDASH_1ST: 109077298Sobrien ch2 = GET (); 109138889Sjdp if (ch2 != '-') 109238889Sjdp { 109338889Sjdp UNGET (ch2); 109438889Sjdp goto de_fault; 109538889Sjdp } 109677298Sobrien /* Read and skip to end of line. */ 109738889Sjdp do 109838889Sjdp { 109938889Sjdp ch = GET (); 110038889Sjdp } 110138889Sjdp while (ch != EOF && ch != '\n'); 1102130561Sobrien 110338889Sjdp if (ch == EOF) 1104130561Sobrien as_warn (_("end of file in comment; newline inserted")); 1105130561Sobrien 110638889Sjdp state = 0; 110738889Sjdp PUT ('\n'); 110838889Sjdp break; 110977298Sobrien#endif 111060484Sobrien#ifdef DOUBLEBAR_PARALLEL 111138889Sjdp case LEX_IS_DOUBLEBAR_1ST: 111277298Sobrien ch2 = GET (); 1113130561Sobrien UNGET (ch2); 111438889Sjdp if (ch2 != '|') 1115130561Sobrien goto de_fault; 1116130561Sobrien 1117130561Sobrien /* Handle '||' in two states as invoking PUT twice might 1118130561Sobrien result in the first one jumping out of this loop. We'd 1119130561Sobrien then lose track of the state and one '|' char. */ 1120130561Sobrien state = 13; 112138889Sjdp PUT ('|'); 112238889Sjdp break; 112377298Sobrien#endif 112433965Sjdp case LEX_IS_LINE_COMMENT_START: 112533965Sjdp /* FIXME-someday: The two character comment stuff was badly 112633965Sjdp thought out. On i386, we want '/' as line comment start 112733965Sjdp AND we want C style comments. hence this hack. The 112833965Sjdp whole lexical process should be reworked. xoxorich. */ 112933965Sjdp if (ch == '/') 113033965Sjdp { 113133965Sjdp ch2 = GET (); 113233965Sjdp if (ch2 == '*') 113333965Sjdp { 113433965Sjdp old_state = 3; 113533965Sjdp state = -2; 113633965Sjdp break; 113733965Sjdp } 113833965Sjdp else 113933965Sjdp { 114033965Sjdp UNGET (ch2); 114133965Sjdp } 1142130561Sobrien } 114333965Sjdp 114433965Sjdp if (state == 0 || state == 1) /* Only comment at start of line. */ 114533965Sjdp { 114633965Sjdp int startch; 114733965Sjdp 114833965Sjdp startch = ch; 114933965Sjdp 115033965Sjdp do 115133965Sjdp { 115233965Sjdp ch = GET (); 115333965Sjdp } 115433965Sjdp while (ch != EOF && IS_WHITESPACE (ch)); 1155130561Sobrien 115633965Sjdp if (ch == EOF) 115733965Sjdp { 115860484Sobrien as_warn (_("end of file in comment; newline inserted")); 115933965Sjdp PUT ('\n'); 116033965Sjdp break; 116133965Sjdp } 1162130561Sobrien 116333965Sjdp if (ch < '0' || ch > '9' || state != 0 || startch != '#') 116433965Sjdp { 116533965Sjdp /* Not a cpp line. */ 116633965Sjdp while (ch != EOF && !IS_NEWLINE (ch)) 116733965Sjdp ch = GET (); 116833965Sjdp if (ch == EOF) 116989857Sobrien as_warn (_("end of file in comment; newline inserted")); 117033965Sjdp state = 0; 117133965Sjdp PUT ('\n'); 117233965Sjdp break; 117333965Sjdp } 117477298Sobrien /* Looks like `# 123 "filename"' from cpp. */ 117533965Sjdp UNGET (ch); 117633965Sjdp old_state = 4; 117733965Sjdp state = -1; 117833965Sjdp if (scrub_m68k_mri) 1179218822Sdim out_string = "\tlinefile "; 118033965Sjdp else 1181218822Sdim out_string = "\t.linefile "; 118233965Sjdp PUT (*out_string++); 118333965Sjdp break; 118433965Sjdp } 118533965Sjdp 118638889Sjdp#ifdef TC_D10V 118738889Sjdp /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true. 118838889Sjdp Trap is the only short insn that has a first operand that is 118938889Sjdp neither register nor label. 119038889Sjdp We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 . 119177298Sobrien We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is 119277298Sobrien already LEX_IS_LINE_COMMENT_START. However, it is the 119377298Sobrien only character in line_comment_chars for d10v, hence we 119477298Sobrien can recognize it as such. */ 119538889Sjdp /* An alternative approach would be to reset the state to 1 when 119638889Sjdp we see '||', '<'- or '->', but that seems to be overkill. */ 119777298Sobrien if (state == 10) 119877298Sobrien PUT (' '); 119938889Sjdp#endif 120033965Sjdp /* We have a line comment character which is not at the 120133965Sjdp start of a line. If this is also a normal comment 120233965Sjdp character, fall through. Otherwise treat it as a default 120333965Sjdp character. */ 120433965Sjdp if (strchr (tc_comment_chars, ch) == NULL 120533965Sjdp && (! scrub_m68k_mri 120633965Sjdp || (ch != '!' && ch != '*'))) 120733965Sjdp goto de_fault; 120833965Sjdp if (scrub_m68k_mri 120933965Sjdp && (ch == '!' || ch == '*' || ch == '#') 121033965Sjdp && state != 1 121133965Sjdp && state != 10) 121233965Sjdp goto de_fault; 121333965Sjdp /* Fall through. */ 121433965Sjdp case LEX_IS_COMMENT_START: 121560484Sobrien#if defined TC_ARM && defined OBJ_ELF 121660484Sobrien /* On the ARM, `@' is the comment character. 121760484Sobrien Unfortunately this is also a special character in ELF .symver 121877298Sobrien directives (and .type, though we deal with those another way). 121977298Sobrien So we check if this line is such a directive, and treat 122077298Sobrien the character as default if so. This is a hack. */ 122160484Sobrien if ((symver_state != NULL) && (*symver_state == 0)) 122260484Sobrien goto de_fault; 122360484Sobrien#endif 1224218822Sdim 1225218822Sdim#ifdef TC_ARM 1226218822Sdim /* For the ARM, care is needed not to damage occurrences of \@ 1227218822Sdim by stripping the @ onwards. Yuck. */ 1228218822Sdim if (to > tostart && *(to - 1) == '\\') 1229218822Sdim /* Do not treat the @ as a start-of-comment. */ 1230218822Sdim goto de_fault; 1231218822Sdim#endif 1232218822Sdim 123377298Sobrien#ifdef WARN_COMMENTS 123477298Sobrien if (!found_comment) 123577298Sobrien as_where (&found_comment_file, &found_comment); 123677298Sobrien#endif 123733965Sjdp do 123833965Sjdp { 123933965Sjdp ch = GET (); 124033965Sjdp } 124133965Sjdp while (ch != EOF && !IS_NEWLINE (ch)); 124233965Sjdp if (ch == EOF) 124360484Sobrien as_warn (_("end of file in comment; newline inserted")); 124433965Sjdp state = 0; 124533965Sjdp PUT ('\n'); 124633965Sjdp break; 124733965Sjdp 124833965Sjdp case LEX_IS_SYMBOL_COMPONENT: 124933965Sjdp if (state == 10) 125033965Sjdp { 125133965Sjdp /* This is a symbol character following another symbol 125233965Sjdp character, with whitespace in between. We skipped 125333965Sjdp the whitespace earlier, so output it now. */ 125433965Sjdp UNGET (ch); 125533965Sjdp state = 3; 125633965Sjdp PUT (' '); 125733965Sjdp break; 125833965Sjdp } 125933965Sjdp 1260218822Sdim#ifdef TC_Z80 1261218822Sdim /* "af'" is a symbol containing '\''. */ 1262218822Sdim if (state == 3 && (ch == 'a' || ch == 'A')) 1263218822Sdim { 1264218822Sdim state = 16; 1265218822Sdim PUT (ch); 1266218822Sdim ch = GET (); 1267218822Sdim if (ch == 'f' || ch == 'F') 1268218822Sdim { 1269218822Sdim state = 17; 1270218822Sdim PUT (ch); 1271218822Sdim break; 1272218822Sdim } 1273218822Sdim else 1274218822Sdim { 1275218822Sdim state = 9; 1276218822Sdim if (!IS_SYMBOL_COMPONENT (ch)) 1277218822Sdim { 1278218822Sdim UNGET (ch); 1279218822Sdim break; 1280218822Sdim } 1281218822Sdim } 1282218822Sdim } 1283218822Sdim#endif 128433965Sjdp if (state == 3) 128533965Sjdp state = 9; 128633965Sjdp 128733965Sjdp /* This is a common case. Quickly copy CH and all the 1288130561Sobrien following symbol component or normal characters. */ 128960484Sobrien if (to + 1 < toend 129060484Sobrien && mri_state == NULL 129160484Sobrien#if defined TC_ARM && defined OBJ_ELF 129260484Sobrien && symver_state == NULL 129360484Sobrien#endif 129460484Sobrien ) 129533965Sjdp { 129633965Sjdp char *s; 129733965Sjdp int len; 129833965Sjdp 129933965Sjdp for (s = from; s < fromend; s++) 130033965Sjdp { 130133965Sjdp int type; 130233965Sjdp 130377298Sobrien ch2 = *(unsigned char *) s; 130433965Sjdp type = lex[ch2]; 130533965Sjdp if (type != 0 130633965Sjdp && type != LEX_IS_SYMBOL_COMPONENT) 130733965Sjdp break; 130833965Sjdp } 1309130561Sobrien 131033965Sjdp if (s > from) 1311130561Sobrien /* Handle the last character normally, for 1312130561Sobrien simplicity. */ 1313130561Sobrien --s; 1314130561Sobrien 131533965Sjdp len = s - from; 1316130561Sobrien 131733965Sjdp if (len > (toend - to) - 1) 131833965Sjdp len = (toend - to) - 1; 1319130561Sobrien 132033965Sjdp if (len > 0) 132133965Sjdp { 132233965Sjdp PUT (ch); 1323218822Sdim memcpy (to, from, len); 1324218822Sdim to += len; 1325218822Sdim from += len; 1326218822Sdim if (to >= toend) 1327218822Sdim goto tofull; 132833965Sjdp ch = GET (); 132933965Sjdp } 133033965Sjdp } 133133965Sjdp 133233965Sjdp /* Fall through. */ 133333965Sjdp default: 133433965Sjdp de_fault: 133533965Sjdp /* Some relatively `normal' character. */ 133633965Sjdp if (state == 0) 133733965Sjdp { 1338130561Sobrien state = 11; /* Now seeing label definition. */ 133933965Sjdp } 134033965Sjdp else if (state == 1) 134133965Sjdp { 1342130561Sobrien state = 2; /* Ditto. */ 134333965Sjdp } 134433965Sjdp else if (state == 9) 134533965Sjdp { 1346130561Sobrien if (!IS_SYMBOL_COMPONENT (ch)) 134733965Sjdp state = 3; 134833965Sjdp } 134933965Sjdp else if (state == 10) 135033965Sjdp { 135160484Sobrien if (ch == '\\') 135260484Sobrien { 135360484Sobrien /* Special handling for backslash: a backslash may 135460484Sobrien be the beginning of a formal parameter (of a 135560484Sobrien macro) following another symbol character, with 135660484Sobrien whitespace in between. If that is the case, we 135760484Sobrien output a space before the parameter. Strictly 135860484Sobrien speaking, correct handling depends upon what the 135960484Sobrien macro parameter expands into; if the parameter 136060484Sobrien expands into something which does not start with 136160484Sobrien an operand character, then we don't want to keep 136260484Sobrien the space. We don't have enough information to 136360484Sobrien make the right choice, so here we are making the 136460484Sobrien choice which is more likely to be correct. */ 1365218822Sdim if (to + 1 >= toend) 1366218822Sdim { 1367218822Sdim /* If we're near the end of the buffer, save the 1368218822Sdim character for the next time round. Otherwise 1369218822Sdim we'll lose our state. */ 1370218822Sdim UNGET (ch); 1371218822Sdim goto tofull; 1372218822Sdim } 1373218822Sdim *to++ = ' '; 137460484Sobrien } 137560484Sobrien 137633965Sjdp state = 3; 137733965Sjdp } 137833965Sjdp PUT (ch); 137933965Sjdp break; 138033965Sjdp } 138133965Sjdp } 138233965Sjdp 138333965Sjdp /*NOTREACHED*/ 138433965Sjdp 138533965Sjdp fromeof: 138633965Sjdp /* We have reached the end of the input. */ 138733965Sjdp return to - tostart; 138833965Sjdp 138933965Sjdp tofull: 139033965Sjdp /* The output buffer is full. Save any input we have not yet 139133965Sjdp processed. */ 139233965Sjdp if (fromend > from) 139333965Sjdp { 139460484Sobrien saved_input = from; 139533965Sjdp saved_input_len = fromend - from; 139633965Sjdp } 139733965Sjdp else 139860484Sobrien saved_input = NULL; 139960484Sobrien 140033965Sjdp return to - tostart; 140133965Sjdp} 140233965Sjdp 1403