app.c revision 78828
133965Sjdp/* This is the Assembler Pre-Processor 278828Sobrien Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 378828Sobrien 1999, 2000 433965Sjdp Free Software Foundation, Inc. 533965Sjdp 633965Sjdp This file is part of GAS, the GNU Assembler. 733965Sjdp 833965Sjdp GAS is free software; you can redistribute it and/or modify 933965Sjdp it under the terms of the GNU General Public License as published by 1033965Sjdp the Free Software Foundation; either version 2, or (at your option) 1133965Sjdp any later version. 1233965Sjdp 1333965Sjdp GAS is distributed in the hope that it will be useful, 1433965Sjdp but WITHOUT ANY WARRANTY; without even the implied warranty of 1533965Sjdp MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1633965Sjdp GNU General Public License for more details. 1733965Sjdp 1833965Sjdp You should have received a copy of the GNU General Public License 1933965Sjdp along with GAS; see the file COPYING. If not, write to the Free 2033965Sjdp Software Foundation, 59 Temple Place - Suite 330, Boston, MA 2133965Sjdp 02111-1307, USA. */ 2233965Sjdp 2333965Sjdp/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */ 2433965Sjdp/* App, the assembler pre-processor. This pre-processor strips out excess 2533965Sjdp spaces, turns single-quoted characters into a decimal constant, and turns 2633965Sjdp # <number> <filename> <garbage> into a .line <number>\n.file <filename> 2733965Sjdp pair. This needs better error-handling. */ 2833965Sjdp 2933965Sjdp#include <stdio.h> 3033965Sjdp#include "as.h" /* For BAD_CASE() only */ 3133965Sjdp 3233965Sjdp#if (__STDC__ != 1) 3333965Sjdp#ifndef const 3433965Sjdp#define const /* empty */ 3533965Sjdp#endif 3633965Sjdp#endif 3733965Sjdp 3860484Sobrien#ifdef TC_M68K 3933965Sjdp/* Whether we are scrubbing in m68k MRI mode. This is different from 4033965Sjdp flag_m68k_mri, because the two flags will be affected by the .mri 4133965Sjdp pseudo-op at different times. */ 4233965Sjdpstatic int scrub_m68k_mri; 4360484Sobrien#else 4460484Sobrien#define scrub_m68k_mri 0 4560484Sobrien#endif 4633965Sjdp 4733965Sjdp/* The pseudo-op which switches in and out of MRI mode. See the 4833965Sjdp comment in do_scrub_chars. */ 4933965Sjdpstatic const char mri_pseudo[] = ".mri 0"; 5033965Sjdp 5160484Sobrien#if defined TC_ARM && defined OBJ_ELF 5277298Sobrien/* The pseudo-op for which we need to special-case `@' characters. 5360484Sobrien See the comment in do_scrub_chars. */ 5460484Sobrienstatic const char symver_pseudo[] = ".symver"; 5560484Sobrienstatic const char * symver_state; 5660484Sobrien#endif 5760484Sobrien 5833965Sjdpstatic char lex[256]; 5933965Sjdpstatic const char symbol_chars[] = 6033965Sjdp"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; 6133965Sjdp 6233965Sjdp#define LEX_IS_SYMBOL_COMPONENT 1 6333965Sjdp#define LEX_IS_WHITESPACE 2 6433965Sjdp#define LEX_IS_LINE_SEPARATOR 3 6533965Sjdp#define LEX_IS_COMMENT_START 4 6633965Sjdp#define LEX_IS_LINE_COMMENT_START 5 6733965Sjdp#define LEX_IS_TWOCHAR_COMMENT_1ST 6 6833965Sjdp#define LEX_IS_STRINGQUOTE 8 6933965Sjdp#define LEX_IS_COLON 9 7033965Sjdp#define LEX_IS_NEWLINE 10 7133965Sjdp#define LEX_IS_ONECHAR_QUOTE 11 7238889Sjdp#ifdef TC_V850 7338889Sjdp#define LEX_IS_DOUBLEDASH_1ST 12 7438889Sjdp#endif 7538889Sjdp#ifdef TC_M32R 7660484Sobrien#define DOUBLEBAR_PARALLEL 7760484Sobrien#endif 7860484Sobrien#ifdef DOUBLEBAR_PARALLEL 7938889Sjdp#define LEX_IS_DOUBLEBAR_1ST 13 8038889Sjdp#endif 8133965Sjdp#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT) 8233965Sjdp#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE) 8333965Sjdp#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR) 8433965Sjdp#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START) 8533965Sjdp#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START) 8633965Sjdp#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE) 8733965Sjdp 8833965Sjdpstatic int process_escape PARAMS ((int)); 8933965Sjdp 9033965Sjdp/* FIXME-soon: The entire lexer/parser thingy should be 9133965Sjdp built statically at compile time rather than dynamically 9277298Sobrien each and every time the assembler is run. xoxorich. */ 9333965Sjdp 9477298Sobrienvoid 9533965Sjdpdo_scrub_begin (m68k_mri) 9660484Sobrien int m68k_mri ATTRIBUTE_UNUSED; 9733965Sjdp{ 9833965Sjdp const char *p; 9960484Sobrien int c; 10033965Sjdp 10133965Sjdp lex[' '] = LEX_IS_WHITESPACE; 10233965Sjdp lex['\t'] = LEX_IS_WHITESPACE; 10338889Sjdp lex['\r'] = LEX_IS_WHITESPACE; 10433965Sjdp lex['\n'] = LEX_IS_NEWLINE; 10533965Sjdp lex[':'] = LEX_IS_COLON; 10633965Sjdp 10760484Sobrien#ifdef TC_M68K 10860484Sobrien scrub_m68k_mri = m68k_mri; 10960484Sobrien 11033965Sjdp if (! m68k_mri) 11160484Sobrien#endif 11233965Sjdp { 11333965Sjdp lex['"'] = LEX_IS_STRINGQUOTE; 11433965Sjdp 11560484Sobrien#if ! defined (TC_HPPA) && ! defined (TC_I370) 11660484Sobrien /* I370 uses single-quotes to delimit integer, float constants */ 11733965Sjdp lex['\''] = LEX_IS_ONECHAR_QUOTE; 11833965Sjdp#endif 11933965Sjdp 12033965Sjdp#ifdef SINGLE_QUOTE_STRINGS 12133965Sjdp lex['\''] = LEX_IS_STRINGQUOTE; 12233965Sjdp#endif 12333965Sjdp } 12433965Sjdp 12533965Sjdp /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop 12633965Sjdp in state 5 of do_scrub_chars must be changed. */ 12733965Sjdp 12833965Sjdp /* Note that these override the previous defaults, e.g. if ';' is a 12933965Sjdp comment char, then it isn't a line separator. */ 13033965Sjdp for (p = symbol_chars; *p; ++p) 13133965Sjdp { 13233965Sjdp lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; 13333965Sjdp } /* declare symbol characters */ 13433965Sjdp 13560484Sobrien for (c = 128; c < 256; ++c) 13660484Sobrien lex[c] = LEX_IS_SYMBOL_COMPONENT; 13760484Sobrien 13860484Sobrien#ifdef tc_symbol_chars 13960484Sobrien /* This macro permits the processor to specify all characters which 14060484Sobrien may appears in an operand. This will prevent the scrubber from 14160484Sobrien discarding meaningful whitespace in certain cases. The i386 14260484Sobrien backend uses this to support prefixes, which can confuse the 14360484Sobrien scrubber as to whether it is parsing operands or opcodes. */ 14460484Sobrien for (p = tc_symbol_chars; *p; ++p) 14560484Sobrien lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; 14660484Sobrien#endif 14760484Sobrien 14833965Sjdp /* The m68k backend wants to be able to change comment_chars. */ 14933965Sjdp#ifndef tc_comment_chars 15033965Sjdp#define tc_comment_chars comment_chars 15133965Sjdp#endif 15233965Sjdp for (p = tc_comment_chars; *p; p++) 15333965Sjdp { 15433965Sjdp lex[(unsigned char) *p] = LEX_IS_COMMENT_START; 15533965Sjdp } /* declare comment chars */ 15633965Sjdp 15733965Sjdp for (p = line_comment_chars; *p; p++) 15833965Sjdp { 15933965Sjdp lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START; 16033965Sjdp } /* declare line comment chars */ 16133965Sjdp 16233965Sjdp for (p = line_separator_chars; *p; p++) 16333965Sjdp { 16433965Sjdp lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR; 16533965Sjdp } /* declare line separators */ 16633965Sjdp 16733965Sjdp /* Only allow slash-star comments if slash is not in use. 16833965Sjdp FIXME: This isn't right. We should always permit them. */ 16933965Sjdp if (lex['/'] == 0) 17033965Sjdp { 17133965Sjdp lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST; 17233965Sjdp } 17333965Sjdp 17460484Sobrien#ifdef TC_M68K 17533965Sjdp if (m68k_mri) 17633965Sjdp { 17733965Sjdp lex['\''] = LEX_IS_STRINGQUOTE; 17833965Sjdp lex[';'] = LEX_IS_COMMENT_START; 17933965Sjdp lex['*'] = LEX_IS_LINE_COMMENT_START; 18033965Sjdp /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but 18133965Sjdp then it can't be used in an expression. */ 18233965Sjdp lex['!'] = LEX_IS_LINE_COMMENT_START; 18333965Sjdp } 18460484Sobrien#endif 18538889Sjdp 18638889Sjdp#ifdef TC_V850 18738889Sjdp lex['-'] = LEX_IS_DOUBLEDASH_1ST; 18838889Sjdp#endif 18960484Sobrien#ifdef DOUBLEBAR_PARALLEL 19038889Sjdp lex['|'] = LEX_IS_DOUBLEBAR_1ST; 19138889Sjdp#endif 19260484Sobrien#ifdef TC_D30V 19360484Sobrien /* must do this is we want VLIW instruction with "->" or "<-" */ 19460484Sobrien lex['-'] = LEX_IS_SYMBOL_COMPONENT; 19560484Sobrien#endif 19633965Sjdp} /* do_scrub_begin() */ 19733965Sjdp 19833965Sjdp/* Saved state of the scrubber */ 19933965Sjdpstatic int state; 20033965Sjdpstatic int old_state; 20133965Sjdpstatic char *out_string; 20233965Sjdpstatic char out_buf[20]; 20333965Sjdpstatic int add_newlines; 20433965Sjdpstatic char *saved_input; 20533965Sjdpstatic int saved_input_len; 20660484Sobrienstatic char input_buffer[32 * 1024]; 20733965Sjdpstatic const char *mri_state; 20833965Sjdpstatic char mri_last_ch; 20933965Sjdp 21033965Sjdp/* Data structure for saving the state of app across #include's. Note that 21133965Sjdp app is called asynchronously to the parsing of the .include's, so our 21233965Sjdp state at the time .include is interpreted is completely unrelated. 21333965Sjdp That's why we have to save it all. */ 21433965Sjdp 21577298Sobrienstruct app_save { 21677298Sobrien int state; 21777298Sobrien int old_state; 21877298Sobrien char * out_string; 21977298Sobrien char out_buf[sizeof (out_buf)]; 22077298Sobrien int add_newlines; 22177298Sobrien char * saved_input; 22277298Sobrien int saved_input_len; 22360484Sobrien#ifdef TC_M68K 22477298Sobrien int scrub_m68k_mri; 22560484Sobrien#endif 22677298Sobrien const char * mri_state; 22777298Sobrien char mri_last_ch; 22860484Sobrien#if defined TC_ARM && defined OBJ_ELF 22977298Sobrien const char * symver_state; 23060484Sobrien#endif 23177298Sobrien}; 23233965Sjdp 23333965Sjdpchar * 23433965Sjdpapp_push () 23533965Sjdp{ 23633965Sjdp register struct app_save *saved; 23733965Sjdp 23833965Sjdp saved = (struct app_save *) xmalloc (sizeof (*saved)); 23933965Sjdp saved->state = state; 24033965Sjdp saved->old_state = old_state; 24133965Sjdp saved->out_string = out_string; 24233965Sjdp memcpy (saved->out_buf, out_buf, sizeof (out_buf)); 24333965Sjdp saved->add_newlines = add_newlines; 24460484Sobrien if (saved_input == NULL) 24560484Sobrien saved->saved_input = NULL; 24660484Sobrien else 24760484Sobrien { 24860484Sobrien saved->saved_input = xmalloc (saved_input_len); 24960484Sobrien memcpy (saved->saved_input, saved_input, saved_input_len); 25060484Sobrien saved->saved_input_len = saved_input_len; 25160484Sobrien } 25260484Sobrien#ifdef TC_M68K 25333965Sjdp saved->scrub_m68k_mri = scrub_m68k_mri; 25460484Sobrien#endif 25533965Sjdp saved->mri_state = mri_state; 25633965Sjdp saved->mri_last_ch = mri_last_ch; 25760484Sobrien#if defined TC_ARM && defined OBJ_ELF 25860484Sobrien saved->symver_state = symver_state; 25960484Sobrien#endif 26033965Sjdp 26177298Sobrien /* do_scrub_begin() is not useful, just wastes time. */ 26233965Sjdp 26333965Sjdp state = 0; 26433965Sjdp saved_input = NULL; 26533965Sjdp 26633965Sjdp return (char *) saved; 26733965Sjdp} 26833965Sjdp 26977298Sobrienvoid 27033965Sjdpapp_pop (arg) 27133965Sjdp char *arg; 27233965Sjdp{ 27333965Sjdp register struct app_save *saved = (struct app_save *) arg; 27433965Sjdp 27577298Sobrien /* There is no do_scrub_end (). */ 27633965Sjdp state = saved->state; 27733965Sjdp old_state = saved->old_state; 27833965Sjdp out_string = saved->out_string; 27933965Sjdp memcpy (out_buf, saved->out_buf, sizeof (out_buf)); 28033965Sjdp add_newlines = saved->add_newlines; 28160484Sobrien if (saved->saved_input == NULL) 28260484Sobrien saved_input = NULL; 28360484Sobrien else 28460484Sobrien { 28560484Sobrien assert (saved->saved_input_len <= (int) (sizeof input_buffer)); 28660484Sobrien memcpy (input_buffer, saved->saved_input, saved->saved_input_len); 28760484Sobrien saved_input = input_buffer; 28860484Sobrien saved_input_len = saved->saved_input_len; 28960484Sobrien free (saved->saved_input); 29060484Sobrien } 29160484Sobrien#ifdef TC_M68K 29233965Sjdp scrub_m68k_mri = saved->scrub_m68k_mri; 29360484Sobrien#endif 29433965Sjdp mri_state = saved->mri_state; 29533965Sjdp mri_last_ch = saved->mri_last_ch; 29660484Sobrien#if defined TC_ARM && defined OBJ_ELF 29760484Sobrien symver_state = saved->symver_state; 29860484Sobrien#endif 29933965Sjdp 30033965Sjdp free (arg); 30133965Sjdp} /* app_pop() */ 30233965Sjdp 30333965Sjdp/* @@ This assumes that \n &c are the same on host and target. This is not 30433965Sjdp necessarily true. */ 30577298Sobrienstatic int 30633965Sjdpprocess_escape (ch) 30733965Sjdp int ch; 30833965Sjdp{ 30933965Sjdp switch (ch) 31033965Sjdp { 31133965Sjdp case 'b': 31233965Sjdp return '\b'; 31333965Sjdp case 'f': 31433965Sjdp return '\f'; 31533965Sjdp case 'n': 31633965Sjdp return '\n'; 31733965Sjdp case 'r': 31833965Sjdp return '\r'; 31933965Sjdp case 't': 32033965Sjdp return '\t'; 32133965Sjdp case '\'': 32233965Sjdp return '\''; 32333965Sjdp case '"': 32433965Sjdp return '\"'; 32533965Sjdp default: 32633965Sjdp return ch; 32733965Sjdp } 32833965Sjdp} 32933965Sjdp 33033965Sjdp/* This function is called to process input characters. The GET 33133965Sjdp parameter is used to retrieve more input characters. GET should 33233965Sjdp set its parameter to point to a buffer, and return the length of 33333965Sjdp the buffer; it should return 0 at end of file. The scrubbed output 33433965Sjdp characters are put into the buffer starting at TOSTART; the TOSTART 33533965Sjdp buffer is TOLEN bytes in length. The function returns the number 33633965Sjdp of scrubbed characters put into TOSTART. This will be TOLEN unless 33733965Sjdp end of file was seen. This function is arranged as a state 33833965Sjdp machine, and saves its state so that it may return at any point. 33933965Sjdp This is the way the old code used to work. */ 34033965Sjdp 34133965Sjdpint 34233965Sjdpdo_scrub_chars (get, tostart, tolen) 34360484Sobrien int (*get) PARAMS ((char *, int)); 34433965Sjdp char *tostart; 34533965Sjdp int tolen; 34633965Sjdp{ 34733965Sjdp char *to = tostart; 34833965Sjdp char *toend = tostart + tolen; 34933965Sjdp char *from; 35033965Sjdp char *fromend; 35133965Sjdp int fromlen; 35233965Sjdp register int ch, ch2 = 0; 35333965Sjdp 35433965Sjdp /*State 0: beginning of normal line 35533965Sjdp 1: After first whitespace on line (flush more white) 35633965Sjdp 2: After first non-white (opcode) on line (keep 1white) 35733965Sjdp 3: after second white on line (into operands) (flush white) 35833965Sjdp 4: after putting out a .line, put out digits 35933965Sjdp 5: parsing a string, then go to old-state 36033965Sjdp 6: putting out \ escape in a "d string. 36133965Sjdp 7: After putting out a .appfile, put out string. 36233965Sjdp 8: After putting out a .appfile string, flush until newline. 36333965Sjdp 9: After seeing symbol char in state 3 (keep 1white after symchar) 36433965Sjdp 10: After seeing whitespace in state 9 (keep white before symchar) 36533965Sjdp 11: After seeing a symbol character in state 0 (eg a label definition) 36633965Sjdp -1: output string in out_string and go to the state in old_state 36733965Sjdp -2: flush text until a '*' '/' is seen, then go to state old_state 36838889Sjdp#ifdef TC_V850 36938889Sjdp 12: After seeing a dash, looking for a second dash as a start of comment. 37038889Sjdp#endif 37160484Sobrien#ifdef DOUBLEBAR_PARALLEL 37238889Sjdp 13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator. 37338889Sjdp#endif 37433965Sjdp */ 37533965Sjdp 37633965Sjdp /* I added states 9 and 10 because the MIPS ECOFF assembler uses 37733965Sjdp constructs like ``.loc 1 20''. This was turning into ``.loc 37833965Sjdp 120''. States 9 and 10 ensure that a space is never dropped in 37933965Sjdp between characters which could appear in a identifier. Ian 38033965Sjdp Taylor, ian@cygnus.com. 38133965Sjdp 38233965Sjdp I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works 38333965Sjdp correctly on the PA (and any other target where colons are optional). 38438889Sjdp Jeff Law, law@cs.utah.edu. 38533965Sjdp 38638889Sjdp I added state 13 so that something like "cmp r1, r2 || trap #1" does not 38738889Sjdp get squashed into "cmp r1,r2||trap#1", with the all important space 38838889Sjdp between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */ 38938889Sjdp 39033965Sjdp /* This macro gets the next input character. */ 39133965Sjdp 39260484Sobrien#define GET() \ 39360484Sobrien (from < fromend \ 39460484Sobrien ? * (unsigned char *) (from++) \ 39560484Sobrien : (saved_input = NULL, \ 39660484Sobrien fromlen = (*get) (input_buffer, sizeof input_buffer), \ 39760484Sobrien from = input_buffer, \ 39860484Sobrien fromend = from + fromlen, \ 39960484Sobrien (fromlen == 0 \ 40060484Sobrien ? EOF \ 40160484Sobrien : * (unsigned char *) (from++)))) 40233965Sjdp 40333965Sjdp /* This macro pushes a character back on the input stream. */ 40433965Sjdp 40533965Sjdp#define UNGET(uch) (*--from = (uch)) 40633965Sjdp 40733965Sjdp /* This macro puts a character into the output buffer. If this 40833965Sjdp character fills the output buffer, this macro jumps to the label 40933965Sjdp TOFULL. We use this rather ugly approach because we need to 41033965Sjdp handle two different termination conditions: EOF on the input 41133965Sjdp stream, and a full output buffer. It would be simpler if we 41233965Sjdp always read in the entire input stream before processing it, but 41333965Sjdp I don't want to make such a significant change to the assembler's 41433965Sjdp memory usage. */ 41533965Sjdp 41633965Sjdp#define PUT(pch) \ 41733965Sjdp do \ 41833965Sjdp { \ 41933965Sjdp *to++ = (pch); \ 42033965Sjdp if (to >= toend) \ 42133965Sjdp goto tofull; \ 42233965Sjdp } \ 42333965Sjdp while (0) 42433965Sjdp 42533965Sjdp if (saved_input != NULL) 42633965Sjdp { 42733965Sjdp from = saved_input; 42833965Sjdp fromend = from + saved_input_len; 42933965Sjdp } 43033965Sjdp else 43133965Sjdp { 43260484Sobrien fromlen = (*get) (input_buffer, sizeof input_buffer); 43333965Sjdp if (fromlen == 0) 43433965Sjdp return 0; 43560484Sobrien from = input_buffer; 43633965Sjdp fromend = from + fromlen; 43733965Sjdp } 43833965Sjdp 43933965Sjdp while (1) 44033965Sjdp { 44133965Sjdp /* The cases in this switch end with continue, in order to 44233965Sjdp branch back to the top of this while loop and generate the 44333965Sjdp next output character in the appropriate state. */ 44433965Sjdp switch (state) 44533965Sjdp { 44633965Sjdp case -1: 44733965Sjdp ch = *out_string++; 44833965Sjdp if (*out_string == '\0') 44933965Sjdp { 45033965Sjdp state = old_state; 45133965Sjdp old_state = 3; 45233965Sjdp } 45333965Sjdp PUT (ch); 45433965Sjdp continue; 45533965Sjdp 45633965Sjdp case -2: 45733965Sjdp for (;;) 45833965Sjdp { 45933965Sjdp do 46033965Sjdp { 46133965Sjdp ch = GET (); 46233965Sjdp 46333965Sjdp if (ch == EOF) 46433965Sjdp { 46560484Sobrien as_warn (_("end of file in comment")); 46633965Sjdp goto fromeof; 46733965Sjdp } 46833965Sjdp 46933965Sjdp if (ch == '\n') 47033965Sjdp PUT ('\n'); 47133965Sjdp } 47233965Sjdp while (ch != '*'); 47333965Sjdp 47433965Sjdp while ((ch = GET ()) == '*') 47533965Sjdp ; 47633965Sjdp 47733965Sjdp if (ch == EOF) 47833965Sjdp { 47960484Sobrien as_warn (_("end of file in comment")); 48033965Sjdp goto fromeof; 48133965Sjdp } 48233965Sjdp 48333965Sjdp if (ch == '/') 48433965Sjdp break; 48533965Sjdp 48633965Sjdp UNGET (ch); 48733965Sjdp } 48833965Sjdp 48933965Sjdp state = old_state; 49033965Sjdp UNGET (' '); 49133965Sjdp continue; 49233965Sjdp 49333965Sjdp case 4: 49433965Sjdp ch = GET (); 49533965Sjdp if (ch == EOF) 49633965Sjdp goto fromeof; 49733965Sjdp else if (ch >= '0' && ch <= '9') 49833965Sjdp PUT (ch); 49933965Sjdp else 50033965Sjdp { 50133965Sjdp while (ch != EOF && IS_WHITESPACE (ch)) 50233965Sjdp ch = GET (); 50333965Sjdp if (ch == '"') 50433965Sjdp { 50533965Sjdp UNGET (ch); 50633965Sjdp if (scrub_m68k_mri) 50733965Sjdp out_string = "\n\tappfile "; 50833965Sjdp else 50933965Sjdp out_string = "\n\t.appfile "; 51033965Sjdp old_state = 7; 51133965Sjdp state = -1; 51233965Sjdp PUT (*out_string++); 51333965Sjdp } 51433965Sjdp else 51533965Sjdp { 51633965Sjdp while (ch != EOF && ch != '\n') 51733965Sjdp ch = GET (); 51833965Sjdp state = 0; 51933965Sjdp PUT (ch); 52033965Sjdp } 52133965Sjdp } 52233965Sjdp continue; 52333965Sjdp 52433965Sjdp case 5: 52533965Sjdp /* We are going to copy everything up to a quote character, 52633965Sjdp with special handling for a backslash. We try to 52733965Sjdp optimize the copying in the simple case without using the 52833965Sjdp GET and PUT macros. */ 52933965Sjdp { 53033965Sjdp char *s; 53133965Sjdp int len; 53233965Sjdp 53333965Sjdp for (s = from; s < fromend; s++) 53433965Sjdp { 53533965Sjdp ch = *s; 53633965Sjdp /* This condition must be changed if the type of any 53733965Sjdp other character can be LEX_IS_STRINGQUOTE. */ 53833965Sjdp if (ch == '\\' 53933965Sjdp || ch == '"' 54033965Sjdp || ch == '\'' 54133965Sjdp || ch == '\n') 54233965Sjdp break; 54333965Sjdp } 54433965Sjdp len = s - from; 54533965Sjdp if (len > toend - to) 54633965Sjdp len = toend - to; 54733965Sjdp if (len > 0) 54833965Sjdp { 54933965Sjdp memcpy (to, from, len); 55033965Sjdp to += len; 55133965Sjdp from += len; 55233965Sjdp } 55333965Sjdp } 55433965Sjdp 55533965Sjdp ch = GET (); 55633965Sjdp if (ch == EOF) 55733965Sjdp { 55860484Sobrien as_warn (_("end of file in string: inserted '\"'")); 55933965Sjdp state = old_state; 56033965Sjdp UNGET ('\n'); 56133965Sjdp PUT ('"'); 56233965Sjdp } 56333965Sjdp else if (lex[ch] == LEX_IS_STRINGQUOTE) 56433965Sjdp { 56533965Sjdp state = old_state; 56633965Sjdp PUT (ch); 56733965Sjdp } 56833965Sjdp#ifndef NO_STRING_ESCAPES 56933965Sjdp else if (ch == '\\') 57033965Sjdp { 57133965Sjdp state = 6; 57233965Sjdp PUT (ch); 57333965Sjdp } 57433965Sjdp#endif 57533965Sjdp else if (scrub_m68k_mri && ch == '\n') 57633965Sjdp { 57733965Sjdp /* Just quietly terminate the string. This permits lines like 57833965Sjdp bne label loop if we haven't reach end yet 57933965Sjdp */ 58033965Sjdp state = old_state; 58133965Sjdp UNGET (ch); 58233965Sjdp PUT ('\''); 58333965Sjdp } 58433965Sjdp else 58533965Sjdp { 58633965Sjdp PUT (ch); 58733965Sjdp } 58833965Sjdp continue; 58933965Sjdp 59033965Sjdp case 6: 59133965Sjdp state = 5; 59233965Sjdp ch = GET (); 59333965Sjdp switch (ch) 59433965Sjdp { 59533965Sjdp /* Handle strings broken across lines, by turning '\n' into 59633965Sjdp '\\' and 'n'. */ 59733965Sjdp case '\n': 59833965Sjdp UNGET ('n'); 59933965Sjdp add_newlines++; 60033965Sjdp PUT ('\\'); 60133965Sjdp continue; 60233965Sjdp 60333965Sjdp case '"': 60433965Sjdp case '\\': 60533965Sjdp case 'b': 60633965Sjdp case 'f': 60733965Sjdp case 'n': 60833965Sjdp case 'r': 60933965Sjdp case 't': 61033965Sjdp case 'v': 61133965Sjdp case 'x': 61233965Sjdp case 'X': 61333965Sjdp case '0': 61433965Sjdp case '1': 61533965Sjdp case '2': 61633965Sjdp case '3': 61733965Sjdp case '4': 61833965Sjdp case '5': 61933965Sjdp case '6': 62033965Sjdp case '7': 62133965Sjdp break; 62233965Sjdp#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES) 62333965Sjdp default: 62460484Sobrien as_warn (_("Unknown escape '\\%c' in string: Ignored"), ch); 62533965Sjdp break; 62633965Sjdp#else /* ONLY_STANDARD_ESCAPES */ 62733965Sjdp default: 62833965Sjdp /* Accept \x as x for any x */ 62933965Sjdp break; 63033965Sjdp#endif /* ONLY_STANDARD_ESCAPES */ 63133965Sjdp 63233965Sjdp case EOF: 63360484Sobrien as_warn (_("End of file in string: '\"' inserted")); 63433965Sjdp PUT ('"'); 63533965Sjdp continue; 63633965Sjdp } 63733965Sjdp PUT (ch); 63833965Sjdp continue; 63933965Sjdp 64033965Sjdp case 7: 64133965Sjdp ch = GET (); 64233965Sjdp state = 5; 64333965Sjdp old_state = 8; 64433965Sjdp if (ch == EOF) 64533965Sjdp goto fromeof; 64633965Sjdp PUT (ch); 64733965Sjdp continue; 64833965Sjdp 64933965Sjdp case 8: 65033965Sjdp do 65133965Sjdp ch = GET (); 65233965Sjdp while (ch != '\n' && ch != EOF); 65333965Sjdp if (ch == EOF) 65433965Sjdp goto fromeof; 65533965Sjdp state = 0; 65633965Sjdp PUT (ch); 65733965Sjdp continue; 65833965Sjdp } 65933965Sjdp 66033965Sjdp /* OK, we are somewhere in states 0 through 4 or 9 through 11 */ 66133965Sjdp 66233965Sjdp /* flushchar: */ 66333965Sjdp ch = GET (); 66433965Sjdp 66533965Sjdp recycle: 66633965Sjdp 66760484Sobrien#if defined TC_ARM && defined OBJ_ELF 66860484Sobrien /* We need to watch out for .symver directives. See the comment later 66960484Sobrien in this function. */ 67060484Sobrien if (symver_state == NULL) 67160484Sobrien { 67260484Sobrien if ((state == 0 || state == 1) && ch == symver_pseudo[0]) 67360484Sobrien symver_state = symver_pseudo + 1; 67460484Sobrien } 67560484Sobrien else 67660484Sobrien { 67760484Sobrien /* We advance to the next state if we find the right 67860484Sobrien character. */ 67960484Sobrien if (ch != '\0' && (*symver_state == ch)) 68060484Sobrien ++symver_state; 68160484Sobrien else if (*symver_state != '\0') 68260484Sobrien /* We did not get the expected character, or we didn't 68360484Sobrien get a valid terminating character after seeing the 68460484Sobrien entire pseudo-op, so we must go back to the beginning. */ 68560484Sobrien symver_state = NULL; 68660484Sobrien else 68760484Sobrien { 68860484Sobrien /* We've read the entire pseudo-op. If this is the end 68960484Sobrien of the line, go back to the beginning. */ 69060484Sobrien if (IS_NEWLINE (ch)) 69160484Sobrien symver_state = NULL; 69260484Sobrien } 69360484Sobrien } 69460484Sobrien#endif /* TC_ARM && OBJ_ELF */ 69560484Sobrien 69633965Sjdp#ifdef TC_M68K 69733965Sjdp /* We want to have pseudo-ops which control whether we are in 69833965Sjdp MRI mode or not. Unfortunately, since m68k MRI mode affects 69933965Sjdp the scrubber, that means that we need a special purpose 70033965Sjdp recognizer here. */ 70133965Sjdp if (mri_state == NULL) 70233965Sjdp { 70333965Sjdp if ((state == 0 || state == 1) 70433965Sjdp && ch == mri_pseudo[0]) 70533965Sjdp mri_state = mri_pseudo + 1; 70633965Sjdp } 70733965Sjdp else 70833965Sjdp { 70933965Sjdp /* We advance to the next state if we find the right 71033965Sjdp character, or if we need a space character and we get any 71133965Sjdp whitespace character, or if we need a '0' and we get a 71233965Sjdp '1' (this is so that we only need one state to handle 71333965Sjdp ``.mri 0'' and ``.mri 1''). */ 71433965Sjdp if (ch != '\0' 71533965Sjdp && (*mri_state == ch 71633965Sjdp || (*mri_state == ' ' 71733965Sjdp && lex[ch] == LEX_IS_WHITESPACE) 71833965Sjdp || (*mri_state == '0' 71933965Sjdp && ch == '1'))) 72033965Sjdp { 72133965Sjdp mri_last_ch = ch; 72233965Sjdp ++mri_state; 72333965Sjdp } 72433965Sjdp else if (*mri_state != '\0' 72533965Sjdp || (lex[ch] != LEX_IS_WHITESPACE 72633965Sjdp && lex[ch] != LEX_IS_NEWLINE)) 72733965Sjdp { 72833965Sjdp /* We did not get the expected character, or we didn't 72933965Sjdp get a valid terminating character after seeing the 73033965Sjdp entire pseudo-op, so we must go back to the 73133965Sjdp beginning. */ 73233965Sjdp mri_state = NULL; 73333965Sjdp } 73433965Sjdp else 73533965Sjdp { 73633965Sjdp /* We've read the entire pseudo-op. mips_last_ch is 73733965Sjdp either '0' or '1' indicating whether to enter or 73833965Sjdp leave MRI mode. */ 73933965Sjdp do_scrub_begin (mri_last_ch == '1'); 74038889Sjdp mri_state = NULL; 74133965Sjdp 74233965Sjdp /* We continue handling the character as usual. The 74333965Sjdp main gas reader must also handle the .mri pseudo-op 74433965Sjdp to control expression parsing and the like. */ 74533965Sjdp } 74633965Sjdp } 74733965Sjdp#endif 74833965Sjdp 74933965Sjdp if (ch == EOF) 75033965Sjdp { 75133965Sjdp if (state != 0) 75233965Sjdp { 75360484Sobrien as_warn (_("end of file not at end of a line; newline inserted")); 75433965Sjdp state = 0; 75533965Sjdp PUT ('\n'); 75633965Sjdp } 75733965Sjdp goto fromeof; 75833965Sjdp } 75933965Sjdp 76033965Sjdp switch (lex[ch]) 76133965Sjdp { 76233965Sjdp case LEX_IS_WHITESPACE: 76333965Sjdp do 76433965Sjdp { 76533965Sjdp ch = GET (); 76633965Sjdp } 76733965Sjdp while (ch != EOF && IS_WHITESPACE (ch)); 76833965Sjdp if (ch == EOF) 76933965Sjdp goto fromeof; 77033965Sjdp 77133965Sjdp if (state == 0) 77233965Sjdp { 77333965Sjdp /* Preserve a single whitespace character at the 77433965Sjdp beginning of a line. */ 77533965Sjdp state = 1; 77633965Sjdp UNGET (ch); 77733965Sjdp PUT (' '); 77833965Sjdp break; 77933965Sjdp } 78033965Sjdp 78160484Sobrien#ifdef KEEP_WHITE_AROUND_COLON 78277298Sobrien if (lex[ch] == LEX_IS_COLON) 78377298Sobrien { 78477298Sobrien /* Only keep this white if there's no white *after* the 78577298Sobrien colon. */ 78677298Sobrien ch2 = GET (); 78777298Sobrien UNGET (ch2); 78877298Sobrien if (!IS_WHITESPACE (ch2)) 78977298Sobrien { 79077298Sobrien state = 9; 79177298Sobrien UNGET (ch); 79277298Sobrien PUT (' '); 79377298Sobrien break; 79477298Sobrien } 79577298Sobrien } 79660484Sobrien#endif 79733965Sjdp if (IS_COMMENT (ch) 79833965Sjdp || ch == '/' 79933965Sjdp || IS_LINE_SEPARATOR (ch)) 80033965Sjdp { 80133965Sjdp if (scrub_m68k_mri) 80233965Sjdp { 80333965Sjdp /* In MRI mode, we keep these spaces. */ 80433965Sjdp UNGET (ch); 80533965Sjdp PUT (' '); 80633965Sjdp break; 80733965Sjdp } 80833965Sjdp goto recycle; 80933965Sjdp } 81033965Sjdp 81133965Sjdp /* If we're in state 2 or 11, we've seen a non-white 81233965Sjdp character followed by whitespace. If the next character 81333965Sjdp is ':', this is whitespace after a label name which we 81433965Sjdp normally must ignore. In MRI mode, though, spaces are 81533965Sjdp not permitted between the label and the colon. */ 81633965Sjdp if ((state == 2 || state == 11) 81733965Sjdp && lex[ch] == LEX_IS_COLON 81833965Sjdp && ! scrub_m68k_mri) 81933965Sjdp { 82033965Sjdp state = 1; 82133965Sjdp PUT (ch); 82233965Sjdp break; 82333965Sjdp } 82433965Sjdp 82533965Sjdp switch (state) 82633965Sjdp { 82733965Sjdp case 0: 82833965Sjdp state++; 82933965Sjdp goto recycle; /* Punted leading sp */ 83033965Sjdp case 1: 83133965Sjdp /* We can arrive here if we leave a leading whitespace 83233965Sjdp character at the beginning of a line. */ 83333965Sjdp goto recycle; 83433965Sjdp case 2: 83533965Sjdp state = 3; 83633965Sjdp if (to + 1 < toend) 83733965Sjdp { 83833965Sjdp /* Optimize common case by skipping UNGET/GET. */ 83933965Sjdp PUT (' '); /* Sp after opco */ 84033965Sjdp goto recycle; 84133965Sjdp } 84233965Sjdp UNGET (ch); 84333965Sjdp PUT (' '); 84433965Sjdp break; 84533965Sjdp case 3: 84633965Sjdp if (scrub_m68k_mri) 84733965Sjdp { 84833965Sjdp /* In MRI mode, we keep these spaces. */ 84933965Sjdp UNGET (ch); 85033965Sjdp PUT (' '); 85133965Sjdp break; 85233965Sjdp } 85333965Sjdp goto recycle; /* Sp in operands */ 85433965Sjdp case 9: 85533965Sjdp case 10: 85633965Sjdp if (scrub_m68k_mri) 85733965Sjdp { 85833965Sjdp /* In MRI mode, we keep these spaces. */ 85933965Sjdp state = 3; 86033965Sjdp UNGET (ch); 86133965Sjdp PUT (' '); 86233965Sjdp break; 86333965Sjdp } 86433965Sjdp state = 10; /* Sp after symbol char */ 86533965Sjdp goto recycle; 86633965Sjdp case 11: 86760484Sobrien if (LABELS_WITHOUT_COLONS || flag_m68k_mri) 86833965Sjdp state = 1; 86933965Sjdp else 87033965Sjdp { 87133965Sjdp /* We know that ch is not ':', since we tested that 87233965Sjdp case above. Therefore this is not a label, so it 87333965Sjdp must be the opcode, and we've just seen the 87433965Sjdp whitespace after it. */ 87533965Sjdp state = 3; 87633965Sjdp } 87733965Sjdp UNGET (ch); 87833965Sjdp PUT (' '); /* Sp after label definition. */ 87933965Sjdp break; 88033965Sjdp default: 88133965Sjdp BAD_CASE (state); 88233965Sjdp } 88333965Sjdp break; 88433965Sjdp 88533965Sjdp case LEX_IS_TWOCHAR_COMMENT_1ST: 88633965Sjdp ch2 = GET (); 88733965Sjdp if (ch2 == '*') 88833965Sjdp { 88933965Sjdp for (;;) 89033965Sjdp { 89133965Sjdp do 89233965Sjdp { 89333965Sjdp ch2 = GET (); 89433965Sjdp if (ch2 != EOF && IS_NEWLINE (ch2)) 89533965Sjdp add_newlines++; 89633965Sjdp } 89733965Sjdp while (ch2 != EOF && ch2 != '*'); 89833965Sjdp 89933965Sjdp while (ch2 == '*') 90033965Sjdp ch2 = GET (); 90133965Sjdp 90233965Sjdp if (ch2 == EOF || ch2 == '/') 90333965Sjdp break; 90433965Sjdp 90533965Sjdp /* This UNGET will ensure that we count newlines 90633965Sjdp correctly. */ 90733965Sjdp UNGET (ch2); 90833965Sjdp } 90933965Sjdp 91033965Sjdp if (ch2 == EOF) 91160484Sobrien as_warn (_("end of file in multiline comment")); 91233965Sjdp 91333965Sjdp ch = ' '; 91433965Sjdp goto recycle; 91533965Sjdp } 91677298Sobrien#ifdef DOUBLESLASH_LINE_COMMENTS 91777298Sobrien else if (ch2 == '/') 91877298Sobrien { 91977298Sobrien do 92077298Sobrien { 92177298Sobrien ch = GET (); 92277298Sobrien } 92377298Sobrien while (ch != EOF && !IS_NEWLINE (ch)); 92477298Sobrien if (ch == EOF) 92577298Sobrien as_warn ("end of file in comment; newline inserted"); 92677298Sobrien state = 0; 92777298Sobrien PUT ('\n'); 92877298Sobrien break; 92977298Sobrien } 93077298Sobrien#endif 93133965Sjdp else 93233965Sjdp { 93333965Sjdp if (ch2 != EOF) 93433965Sjdp UNGET (ch2); 93533965Sjdp if (state == 9 || state == 10) 93633965Sjdp state = 3; 93733965Sjdp PUT (ch); 93833965Sjdp } 93933965Sjdp break; 94033965Sjdp 94133965Sjdp case LEX_IS_STRINGQUOTE: 94233965Sjdp if (state == 10) 94333965Sjdp { 94433965Sjdp /* Preserve the whitespace in foo "bar" */ 94533965Sjdp UNGET (ch); 94633965Sjdp state = 3; 94733965Sjdp PUT (' '); 94833965Sjdp 94933965Sjdp /* PUT didn't jump out. We could just break, but we 95033965Sjdp know what will happen, so optimize a bit. */ 95133965Sjdp ch = GET (); 95233965Sjdp old_state = 3; 95333965Sjdp } 95433965Sjdp else if (state == 9) 95533965Sjdp old_state = 3; 95633965Sjdp else 95733965Sjdp old_state = state; 95833965Sjdp state = 5; 95933965Sjdp PUT (ch); 96033965Sjdp break; 96133965Sjdp 96233965Sjdp#ifndef IEEE_STYLE 96333965Sjdp case LEX_IS_ONECHAR_QUOTE: 96433965Sjdp if (state == 10) 96533965Sjdp { 96633965Sjdp /* Preserve the whitespace in foo 'b' */ 96733965Sjdp UNGET (ch); 96833965Sjdp state = 3; 96933965Sjdp PUT (' '); 97033965Sjdp break; 97133965Sjdp } 97233965Sjdp ch = GET (); 97333965Sjdp if (ch == EOF) 97433965Sjdp { 97560484Sobrien as_warn (_("end of file after a one-character quote; \\0 inserted")); 97633965Sjdp ch = 0; 97733965Sjdp } 97833965Sjdp if (ch == '\\') 97933965Sjdp { 98033965Sjdp ch = GET (); 98133965Sjdp if (ch == EOF) 98233965Sjdp { 98360484Sobrien as_warn (_("end of file in escape character")); 98433965Sjdp ch = '\\'; 98533965Sjdp } 98633965Sjdp else 98733965Sjdp ch = process_escape (ch); 98833965Sjdp } 98933965Sjdp sprintf (out_buf, "%d", (int) (unsigned char) ch); 99033965Sjdp 99133965Sjdp /* None of these 'x constants for us. We want 'x'. */ 99233965Sjdp if ((ch = GET ()) != '\'') 99333965Sjdp { 99433965Sjdp#ifdef REQUIRE_CHAR_CLOSE_QUOTE 99560484Sobrien as_warn (_("Missing close quote: (assumed)")); 99633965Sjdp#else 99733965Sjdp if (ch != EOF) 99833965Sjdp UNGET (ch); 99933965Sjdp#endif 100033965Sjdp } 100133965Sjdp if (strlen (out_buf) == 1) 100233965Sjdp { 100333965Sjdp PUT (out_buf[0]); 100433965Sjdp break; 100533965Sjdp } 100633965Sjdp if (state == 9) 100733965Sjdp old_state = 3; 100833965Sjdp else 100933965Sjdp old_state = state; 101033965Sjdp state = -1; 101133965Sjdp out_string = out_buf; 101233965Sjdp PUT (*out_string++); 101333965Sjdp break; 101433965Sjdp#endif 101533965Sjdp 101633965Sjdp case LEX_IS_COLON: 101760484Sobrien#ifdef KEEP_WHITE_AROUND_COLON 101877298Sobrien state = 9; 101960484Sobrien#else 102033965Sjdp if (state == 9 || state == 10) 102133965Sjdp state = 3; 102233965Sjdp else if (state != 3) 102333965Sjdp state = 1; 102460484Sobrien#endif 102533965Sjdp PUT (ch); 102633965Sjdp break; 102733965Sjdp 102833965Sjdp case LEX_IS_NEWLINE: 102933965Sjdp /* Roll out a bunch of newlines from inside comments, etc. */ 103033965Sjdp if (add_newlines) 103133965Sjdp { 103233965Sjdp --add_newlines; 103333965Sjdp UNGET (ch); 103433965Sjdp } 103577298Sobrien /* Fall through. */ 103633965Sjdp 103733965Sjdp case LEX_IS_LINE_SEPARATOR: 103833965Sjdp state = 0; 103933965Sjdp PUT (ch); 104033965Sjdp break; 104133965Sjdp 104238889Sjdp#ifdef TC_V850 104338889Sjdp case LEX_IS_DOUBLEDASH_1ST: 104477298Sobrien ch2 = GET (); 104538889Sjdp if (ch2 != '-') 104638889Sjdp { 104738889Sjdp UNGET (ch2); 104838889Sjdp goto de_fault; 104938889Sjdp } 105077298Sobrien /* Read and skip to end of line. */ 105138889Sjdp do 105238889Sjdp { 105338889Sjdp ch = GET (); 105438889Sjdp } 105538889Sjdp while (ch != EOF && ch != '\n'); 105638889Sjdp if (ch == EOF) 105738889Sjdp { 105860484Sobrien as_warn (_("end of file in comment; newline inserted")); 105938889Sjdp } 106038889Sjdp state = 0; 106138889Sjdp PUT ('\n'); 106238889Sjdp break; 106377298Sobrien#endif 106460484Sobrien#ifdef DOUBLEBAR_PARALLEL 106538889Sjdp case LEX_IS_DOUBLEBAR_1ST: 106677298Sobrien ch2 = GET (); 106738889Sjdp if (ch2 != '|') 106838889Sjdp { 106938889Sjdp UNGET (ch2); 107038889Sjdp goto de_fault; 107138889Sjdp } 107238889Sjdp /* Reset back to state 1 and pretend that we are parsing a line from 107338889Sjdp just after the first white space. */ 107438889Sjdp state = 1; 107538889Sjdp PUT ('|'); 107638889Sjdp PUT ('|'); 107738889Sjdp break; 107877298Sobrien#endif 107933965Sjdp case LEX_IS_LINE_COMMENT_START: 108033965Sjdp /* FIXME-someday: The two character comment stuff was badly 108133965Sjdp thought out. On i386, we want '/' as line comment start 108233965Sjdp AND we want C style comments. hence this hack. The 108333965Sjdp whole lexical process should be reworked. xoxorich. */ 108433965Sjdp if (ch == '/') 108533965Sjdp { 108633965Sjdp ch2 = GET (); 108733965Sjdp if (ch2 == '*') 108833965Sjdp { 108933965Sjdp old_state = 3; 109033965Sjdp state = -2; 109133965Sjdp break; 109233965Sjdp } 109333965Sjdp else 109433965Sjdp { 109533965Sjdp UNGET (ch2); 109633965Sjdp } 109733965Sjdp } /* bad hack */ 109833965Sjdp 109933965Sjdp if (state == 0 || state == 1) /* Only comment at start of line. */ 110033965Sjdp { 110133965Sjdp int startch; 110233965Sjdp 110333965Sjdp startch = ch; 110433965Sjdp 110533965Sjdp do 110633965Sjdp { 110733965Sjdp ch = GET (); 110833965Sjdp } 110933965Sjdp while (ch != EOF && IS_WHITESPACE (ch)); 111033965Sjdp if (ch == EOF) 111133965Sjdp { 111260484Sobrien as_warn (_("end of file in comment; newline inserted")); 111333965Sjdp PUT ('\n'); 111433965Sjdp break; 111533965Sjdp } 111633965Sjdp if (ch < '0' || ch > '9' || state != 0 || startch != '#') 111733965Sjdp { 111833965Sjdp /* Not a cpp line. */ 111933965Sjdp while (ch != EOF && !IS_NEWLINE (ch)) 112033965Sjdp ch = GET (); 112133965Sjdp if (ch == EOF) 112260484Sobrien as_warn (_("EOF in Comment: Newline inserted")); 112333965Sjdp state = 0; 112433965Sjdp PUT ('\n'); 112533965Sjdp break; 112633965Sjdp } 112777298Sobrien /* Looks like `# 123 "filename"' from cpp. */ 112833965Sjdp UNGET (ch); 112933965Sjdp old_state = 4; 113033965Sjdp state = -1; 113133965Sjdp if (scrub_m68k_mri) 113233965Sjdp out_string = "\tappline "; 113333965Sjdp else 113433965Sjdp out_string = "\t.appline "; 113533965Sjdp PUT (*out_string++); 113633965Sjdp break; 113733965Sjdp } 113833965Sjdp 113938889Sjdp#ifdef TC_D10V 114038889Sjdp /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true. 114138889Sjdp Trap is the only short insn that has a first operand that is 114238889Sjdp neither register nor label. 114338889Sjdp We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 . 114477298Sobrien We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is 114577298Sobrien already LEX_IS_LINE_COMMENT_START. However, it is the 114677298Sobrien only character in line_comment_chars for d10v, hence we 114777298Sobrien can recognize it as such. */ 114838889Sjdp /* An alternative approach would be to reset the state to 1 when 114938889Sjdp we see '||', '<'- or '->', but that seems to be overkill. */ 115077298Sobrien if (state == 10) 115177298Sobrien PUT (' '); 115238889Sjdp#endif 115333965Sjdp /* We have a line comment character which is not at the 115433965Sjdp start of a line. If this is also a normal comment 115533965Sjdp character, fall through. Otherwise treat it as a default 115633965Sjdp character. */ 115733965Sjdp if (strchr (tc_comment_chars, ch) == NULL 115833965Sjdp && (! scrub_m68k_mri 115933965Sjdp || (ch != '!' && ch != '*'))) 116033965Sjdp goto de_fault; 116133965Sjdp if (scrub_m68k_mri 116233965Sjdp && (ch == '!' || ch == '*' || ch == '#') 116333965Sjdp && state != 1 116433965Sjdp && state != 10) 116533965Sjdp goto de_fault; 116633965Sjdp /* Fall through. */ 116733965Sjdp case LEX_IS_COMMENT_START: 116860484Sobrien#if defined TC_ARM && defined OBJ_ELF 116960484Sobrien /* On the ARM, `@' is the comment character. 117060484Sobrien Unfortunately this is also a special character in ELF .symver 117177298Sobrien directives (and .type, though we deal with those another way). 117277298Sobrien So we check if this line is such a directive, and treat 117377298Sobrien the character as default if so. This is a hack. */ 117460484Sobrien if ((symver_state != NULL) && (*symver_state == 0)) 117560484Sobrien goto de_fault; 117660484Sobrien#endif 117777298Sobrien#ifdef WARN_COMMENTS 117877298Sobrien if (!found_comment) 117977298Sobrien as_where (&found_comment_file, &found_comment); 118077298Sobrien#endif 118133965Sjdp do 118233965Sjdp { 118333965Sjdp ch = GET (); 118433965Sjdp } 118533965Sjdp while (ch != EOF && !IS_NEWLINE (ch)); 118633965Sjdp if (ch == EOF) 118760484Sobrien as_warn (_("end of file in comment; newline inserted")); 118833965Sjdp state = 0; 118933965Sjdp PUT ('\n'); 119033965Sjdp break; 119133965Sjdp 119233965Sjdp case LEX_IS_SYMBOL_COMPONENT: 119333965Sjdp if (state == 10) 119433965Sjdp { 119533965Sjdp /* This is a symbol character following another symbol 119633965Sjdp character, with whitespace in between. We skipped 119733965Sjdp the whitespace earlier, so output it now. */ 119833965Sjdp UNGET (ch); 119933965Sjdp state = 3; 120033965Sjdp PUT (' '); 120133965Sjdp break; 120233965Sjdp } 120333965Sjdp 120433965Sjdp if (state == 3) 120533965Sjdp state = 9; 120633965Sjdp 120733965Sjdp /* This is a common case. Quickly copy CH and all the 120833965Sjdp following symbol component or normal characters. */ 120960484Sobrien if (to + 1 < toend 121060484Sobrien && mri_state == NULL 121160484Sobrien#if defined TC_ARM && defined OBJ_ELF 121260484Sobrien && symver_state == NULL 121360484Sobrien#endif 121460484Sobrien ) 121533965Sjdp { 121633965Sjdp char *s; 121733965Sjdp int len; 121833965Sjdp 121933965Sjdp for (s = from; s < fromend; s++) 122033965Sjdp { 122133965Sjdp int type; 122233965Sjdp 122377298Sobrien ch2 = *(unsigned char *) s; 122433965Sjdp type = lex[ch2]; 122533965Sjdp if (type != 0 122633965Sjdp && type != LEX_IS_SYMBOL_COMPONENT) 122733965Sjdp break; 122833965Sjdp } 122933965Sjdp if (s > from) 123033965Sjdp { 123133965Sjdp /* Handle the last character normally, for 123233965Sjdp simplicity. */ 123333965Sjdp --s; 123433965Sjdp } 123533965Sjdp len = s - from; 123633965Sjdp if (len > (toend - to) - 1) 123733965Sjdp len = (toend - to) - 1; 123833965Sjdp if (len > 0) 123933965Sjdp { 124033965Sjdp PUT (ch); 124133965Sjdp if (len > 8) 124233965Sjdp { 124333965Sjdp memcpy (to, from, len); 124433965Sjdp to += len; 124533965Sjdp from += len; 124633965Sjdp } 124733965Sjdp else 124833965Sjdp { 124933965Sjdp switch (len) 125033965Sjdp { 125133965Sjdp case 8: *to++ = *from++; 125233965Sjdp case 7: *to++ = *from++; 125333965Sjdp case 6: *to++ = *from++; 125433965Sjdp case 5: *to++ = *from++; 125533965Sjdp case 4: *to++ = *from++; 125633965Sjdp case 3: *to++ = *from++; 125733965Sjdp case 2: *to++ = *from++; 125833965Sjdp case 1: *to++ = *from++; 125933965Sjdp } 126077298Sobrien } 126133965Sjdp ch = GET (); 126233965Sjdp } 126333965Sjdp } 126433965Sjdp 126533965Sjdp /* Fall through. */ 126633965Sjdp default: 126733965Sjdp de_fault: 126833965Sjdp /* Some relatively `normal' character. */ 126933965Sjdp if (state == 0) 127033965Sjdp { 127133965Sjdp state = 11; /* Now seeing label definition */ 127233965Sjdp } 127333965Sjdp else if (state == 1) 127433965Sjdp { 127533965Sjdp state = 2; /* Ditto */ 127633965Sjdp } 127733965Sjdp else if (state == 9) 127833965Sjdp { 127933965Sjdp if (lex[ch] != LEX_IS_SYMBOL_COMPONENT) 128033965Sjdp state = 3; 128133965Sjdp } 128233965Sjdp else if (state == 10) 128333965Sjdp { 128460484Sobrien if (ch == '\\') 128560484Sobrien { 128660484Sobrien /* Special handling for backslash: a backslash may 128760484Sobrien be the beginning of a formal parameter (of a 128860484Sobrien macro) following another symbol character, with 128960484Sobrien whitespace in between. If that is the case, we 129060484Sobrien output a space before the parameter. Strictly 129160484Sobrien speaking, correct handling depends upon what the 129260484Sobrien macro parameter expands into; if the parameter 129360484Sobrien expands into something which does not start with 129460484Sobrien an operand character, then we don't want to keep 129560484Sobrien the space. We don't have enough information to 129660484Sobrien make the right choice, so here we are making the 129760484Sobrien choice which is more likely to be correct. */ 129860484Sobrien PUT (' '); 129960484Sobrien } 130060484Sobrien 130133965Sjdp state = 3; 130233965Sjdp } 130333965Sjdp PUT (ch); 130433965Sjdp break; 130533965Sjdp } 130633965Sjdp } 130733965Sjdp 130833965Sjdp /*NOTREACHED*/ 130933965Sjdp 131033965Sjdp fromeof: 131133965Sjdp /* We have reached the end of the input. */ 131233965Sjdp return to - tostart; 131333965Sjdp 131433965Sjdp tofull: 131533965Sjdp /* The output buffer is full. Save any input we have not yet 131633965Sjdp processed. */ 131733965Sjdp if (fromend > from) 131833965Sjdp { 131960484Sobrien saved_input = from; 132033965Sjdp saved_input_len = fromend - from; 132133965Sjdp } 132233965Sjdp else 132360484Sobrien saved_input = NULL; 132460484Sobrien 132533965Sjdp return to - tostart; 132633965Sjdp} 132733965Sjdp 132833965Sjdp/* end of app.c */ 1329