app.c revision 77298
133965Sjdp/* This is the Assembler Pre-Processor 260484Sobrien Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 2000 333965Sjdp Free Software Foundation, Inc. 433965Sjdp 533965Sjdp This file is part of GAS, the GNU Assembler. 633965Sjdp 733965Sjdp GAS is free software; you can redistribute it and/or modify 833965Sjdp it under the terms of the GNU General Public License as published by 933965Sjdp the Free Software Foundation; either version 2, or (at your option) 1033965Sjdp any later version. 1133965Sjdp 1233965Sjdp GAS is distributed in the hope that it will be useful, 1333965Sjdp but WITHOUT ANY WARRANTY; without even the implied warranty of 1433965Sjdp MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1533965Sjdp GNU General Public License for more details. 1633965Sjdp 1733965Sjdp You should have received a copy of the GNU General Public License 1833965Sjdp along with GAS; see the file COPYING. If not, write to the Free 1933965Sjdp Software Foundation, 59 Temple Place - Suite 330, Boston, MA 2033965Sjdp 02111-1307, USA. */ 2133965Sjdp 2233965Sjdp/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */ 2333965Sjdp/* App, the assembler pre-processor. This pre-processor strips out excess 2433965Sjdp spaces, turns single-quoted characters into a decimal constant, and turns 2533965Sjdp # <number> <filename> <garbage> into a .line <number>\n.file <filename> 2633965Sjdp pair. This needs better error-handling. */ 2733965Sjdp 2833965Sjdp#include <stdio.h> 2933965Sjdp#include "as.h" /* For BAD_CASE() only */ 3033965Sjdp 3133965Sjdp#if (__STDC__ != 1) 3233965Sjdp#ifndef const 3333965Sjdp#define const /* empty */ 3433965Sjdp#endif 3533965Sjdp#endif 3633965Sjdp 3760484Sobrien#ifdef TC_M68K 3833965Sjdp/* Whether we are scrubbing in m68k MRI mode. This is different from 3933965Sjdp flag_m68k_mri, because the two flags will be affected by the .mri 4033965Sjdp pseudo-op at different times. */ 4133965Sjdpstatic int scrub_m68k_mri; 4260484Sobrien#else 4360484Sobrien#define scrub_m68k_mri 0 4460484Sobrien#endif 4533965Sjdp 4633965Sjdp/* The pseudo-op which switches in and out of MRI mode. See the 4733965Sjdp comment in do_scrub_chars. */ 4833965Sjdpstatic const char mri_pseudo[] = ".mri 0"; 4933965Sjdp 5060484Sobrien#if defined TC_ARM && defined OBJ_ELF 5177298Sobrien/* The pseudo-op for which we need to special-case `@' characters. 5260484Sobrien See the comment in do_scrub_chars. */ 5360484Sobrienstatic const char symver_pseudo[] = ".symver"; 5460484Sobrienstatic const char * symver_state; 5560484Sobrien#endif 5660484Sobrien 5733965Sjdpstatic char lex[256]; 5833965Sjdpstatic const char symbol_chars[] = 5933965Sjdp"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; 6033965Sjdp 6133965Sjdp#define LEX_IS_SYMBOL_COMPONENT 1 6233965Sjdp#define LEX_IS_WHITESPACE 2 6333965Sjdp#define LEX_IS_LINE_SEPARATOR 3 6433965Sjdp#define LEX_IS_COMMENT_START 4 6533965Sjdp#define LEX_IS_LINE_COMMENT_START 5 6633965Sjdp#define LEX_IS_TWOCHAR_COMMENT_1ST 6 6733965Sjdp#define LEX_IS_STRINGQUOTE 8 6833965Sjdp#define LEX_IS_COLON 9 6933965Sjdp#define LEX_IS_NEWLINE 10 7033965Sjdp#define LEX_IS_ONECHAR_QUOTE 11 7138889Sjdp#ifdef TC_V850 7238889Sjdp#define LEX_IS_DOUBLEDASH_1ST 12 7338889Sjdp#endif 7438889Sjdp#ifdef TC_M32R 7560484Sobrien#define DOUBLEBAR_PARALLEL 7660484Sobrien#endif 7760484Sobrien#ifdef DOUBLEBAR_PARALLEL 7838889Sjdp#define LEX_IS_DOUBLEBAR_1ST 13 7938889Sjdp#endif 8033965Sjdp#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT) 8133965Sjdp#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE) 8233965Sjdp#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR) 8333965Sjdp#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START) 8433965Sjdp#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START) 8533965Sjdp#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE) 8633965Sjdp 8733965Sjdpstatic int process_escape PARAMS ((int)); 8833965Sjdp 8933965Sjdp/* FIXME-soon: The entire lexer/parser thingy should be 9033965Sjdp built statically at compile time rather than dynamically 9177298Sobrien each and every time the assembler is run. xoxorich. */ 9233965Sjdp 9377298Sobrienvoid 9433965Sjdpdo_scrub_begin (m68k_mri) 9560484Sobrien int m68k_mri ATTRIBUTE_UNUSED; 9633965Sjdp{ 9733965Sjdp const char *p; 9860484Sobrien int c; 9933965Sjdp 10033965Sjdp lex[' '] = LEX_IS_WHITESPACE; 10133965Sjdp lex['\t'] = LEX_IS_WHITESPACE; 10238889Sjdp lex['\r'] = LEX_IS_WHITESPACE; 10333965Sjdp lex['\n'] = LEX_IS_NEWLINE; 10433965Sjdp lex[':'] = LEX_IS_COLON; 10533965Sjdp 10660484Sobrien#ifdef TC_M68K 10760484Sobrien scrub_m68k_mri = m68k_mri; 10860484Sobrien 10933965Sjdp if (! m68k_mri) 11060484Sobrien#endif 11133965Sjdp { 11233965Sjdp lex['"'] = LEX_IS_STRINGQUOTE; 11333965Sjdp 11460484Sobrien#if ! defined (TC_HPPA) && ! defined (TC_I370) 11560484Sobrien /* I370 uses single-quotes to delimit integer, float constants */ 11633965Sjdp lex['\''] = LEX_IS_ONECHAR_QUOTE; 11733965Sjdp#endif 11833965Sjdp 11933965Sjdp#ifdef SINGLE_QUOTE_STRINGS 12033965Sjdp lex['\''] = LEX_IS_STRINGQUOTE; 12133965Sjdp#endif 12233965Sjdp } 12333965Sjdp 12433965Sjdp /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop 12533965Sjdp in state 5 of do_scrub_chars must be changed. */ 12633965Sjdp 12733965Sjdp /* Note that these override the previous defaults, e.g. if ';' is a 12833965Sjdp comment char, then it isn't a line separator. */ 12933965Sjdp for (p = symbol_chars; *p; ++p) 13033965Sjdp { 13133965Sjdp lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; 13233965Sjdp } /* declare symbol characters */ 13333965Sjdp 13460484Sobrien for (c = 128; c < 256; ++c) 13560484Sobrien lex[c] = LEX_IS_SYMBOL_COMPONENT; 13660484Sobrien 13760484Sobrien#ifdef tc_symbol_chars 13860484Sobrien /* This macro permits the processor to specify all characters which 13960484Sobrien may appears in an operand. This will prevent the scrubber from 14060484Sobrien discarding meaningful whitespace in certain cases. The i386 14160484Sobrien backend uses this to support prefixes, which can confuse the 14260484Sobrien scrubber as to whether it is parsing operands or opcodes. */ 14360484Sobrien for (p = tc_symbol_chars; *p; ++p) 14460484Sobrien lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; 14560484Sobrien#endif 14660484Sobrien 14733965Sjdp /* The m68k backend wants to be able to change comment_chars. */ 14833965Sjdp#ifndef tc_comment_chars 14933965Sjdp#define tc_comment_chars comment_chars 15033965Sjdp#endif 15133965Sjdp for (p = tc_comment_chars; *p; p++) 15233965Sjdp { 15333965Sjdp lex[(unsigned char) *p] = LEX_IS_COMMENT_START; 15433965Sjdp } /* declare comment chars */ 15533965Sjdp 15633965Sjdp for (p = line_comment_chars; *p; p++) 15733965Sjdp { 15833965Sjdp lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START; 15933965Sjdp } /* declare line comment chars */ 16033965Sjdp 16133965Sjdp for (p = line_separator_chars; *p; p++) 16233965Sjdp { 16333965Sjdp lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR; 16433965Sjdp } /* declare line separators */ 16533965Sjdp 16633965Sjdp /* Only allow slash-star comments if slash is not in use. 16733965Sjdp FIXME: This isn't right. We should always permit them. */ 16833965Sjdp if (lex['/'] == 0) 16933965Sjdp { 17033965Sjdp lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST; 17133965Sjdp } 17233965Sjdp 17360484Sobrien#ifdef TC_M68K 17433965Sjdp if (m68k_mri) 17533965Sjdp { 17633965Sjdp lex['\''] = LEX_IS_STRINGQUOTE; 17733965Sjdp lex[';'] = LEX_IS_COMMENT_START; 17833965Sjdp lex['*'] = LEX_IS_LINE_COMMENT_START; 17933965Sjdp /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but 18033965Sjdp then it can't be used in an expression. */ 18133965Sjdp lex['!'] = LEX_IS_LINE_COMMENT_START; 18233965Sjdp } 18360484Sobrien#endif 18438889Sjdp 18538889Sjdp#ifdef TC_V850 18638889Sjdp lex['-'] = LEX_IS_DOUBLEDASH_1ST; 18738889Sjdp#endif 18860484Sobrien#ifdef DOUBLEBAR_PARALLEL 18938889Sjdp lex['|'] = LEX_IS_DOUBLEBAR_1ST; 19038889Sjdp#endif 19160484Sobrien#ifdef TC_D30V 19260484Sobrien /* must do this is we want VLIW instruction with "->" or "<-" */ 19360484Sobrien lex['-'] = LEX_IS_SYMBOL_COMPONENT; 19460484Sobrien#endif 19533965Sjdp} /* do_scrub_begin() */ 19633965Sjdp 19733965Sjdp/* Saved state of the scrubber */ 19833965Sjdpstatic int state; 19933965Sjdpstatic int old_state; 20033965Sjdpstatic char *out_string; 20133965Sjdpstatic char out_buf[20]; 20233965Sjdpstatic int add_newlines; 20333965Sjdpstatic char *saved_input; 20433965Sjdpstatic int saved_input_len; 20560484Sobrienstatic char input_buffer[32 * 1024]; 20633965Sjdpstatic const char *mri_state; 20733965Sjdpstatic char mri_last_ch; 20833965Sjdp 20933965Sjdp/* Data structure for saving the state of app across #include's. Note that 21033965Sjdp app is called asynchronously to the parsing of the .include's, so our 21133965Sjdp state at the time .include is interpreted is completely unrelated. 21233965Sjdp That's why we have to save it all. */ 21333965Sjdp 21477298Sobrienstruct app_save { 21577298Sobrien int state; 21677298Sobrien int old_state; 21777298Sobrien char * out_string; 21877298Sobrien char out_buf[sizeof (out_buf)]; 21977298Sobrien int add_newlines; 22077298Sobrien char * saved_input; 22177298Sobrien int saved_input_len; 22260484Sobrien#ifdef TC_M68K 22377298Sobrien int scrub_m68k_mri; 22460484Sobrien#endif 22577298Sobrien const char * mri_state; 22677298Sobrien char mri_last_ch; 22760484Sobrien#if defined TC_ARM && defined OBJ_ELF 22877298Sobrien const char * symver_state; 22960484Sobrien#endif 23077298Sobrien}; 23133965Sjdp 23233965Sjdpchar * 23333965Sjdpapp_push () 23433965Sjdp{ 23533965Sjdp register struct app_save *saved; 23633965Sjdp 23733965Sjdp saved = (struct app_save *) xmalloc (sizeof (*saved)); 23833965Sjdp saved->state = state; 23933965Sjdp saved->old_state = old_state; 24033965Sjdp saved->out_string = out_string; 24133965Sjdp memcpy (saved->out_buf, out_buf, sizeof (out_buf)); 24233965Sjdp saved->add_newlines = add_newlines; 24360484Sobrien if (saved_input == NULL) 24460484Sobrien saved->saved_input = NULL; 24560484Sobrien else 24660484Sobrien { 24760484Sobrien saved->saved_input = xmalloc (saved_input_len); 24860484Sobrien memcpy (saved->saved_input, saved_input, saved_input_len); 24960484Sobrien saved->saved_input_len = saved_input_len; 25060484Sobrien } 25160484Sobrien#ifdef TC_M68K 25233965Sjdp saved->scrub_m68k_mri = scrub_m68k_mri; 25360484Sobrien#endif 25433965Sjdp saved->mri_state = mri_state; 25533965Sjdp saved->mri_last_ch = mri_last_ch; 25660484Sobrien#if defined TC_ARM && defined OBJ_ELF 25760484Sobrien saved->symver_state = symver_state; 25860484Sobrien#endif 25933965Sjdp 26077298Sobrien /* do_scrub_begin() is not useful, just wastes time. */ 26133965Sjdp 26233965Sjdp state = 0; 26333965Sjdp saved_input = NULL; 26433965Sjdp 26533965Sjdp return (char *) saved; 26633965Sjdp} 26733965Sjdp 26877298Sobrienvoid 26933965Sjdpapp_pop (arg) 27033965Sjdp char *arg; 27133965Sjdp{ 27233965Sjdp register struct app_save *saved = (struct app_save *) arg; 27333965Sjdp 27477298Sobrien /* There is no do_scrub_end (). */ 27533965Sjdp state = saved->state; 27633965Sjdp old_state = saved->old_state; 27733965Sjdp out_string = saved->out_string; 27833965Sjdp memcpy (out_buf, saved->out_buf, sizeof (out_buf)); 27933965Sjdp add_newlines = saved->add_newlines; 28060484Sobrien if (saved->saved_input == NULL) 28160484Sobrien saved_input = NULL; 28260484Sobrien else 28360484Sobrien { 28460484Sobrien assert (saved->saved_input_len <= (int) (sizeof input_buffer)); 28560484Sobrien memcpy (input_buffer, saved->saved_input, saved->saved_input_len); 28660484Sobrien saved_input = input_buffer; 28760484Sobrien saved_input_len = saved->saved_input_len; 28860484Sobrien free (saved->saved_input); 28960484Sobrien } 29060484Sobrien#ifdef TC_M68K 29133965Sjdp scrub_m68k_mri = saved->scrub_m68k_mri; 29260484Sobrien#endif 29333965Sjdp mri_state = saved->mri_state; 29433965Sjdp mri_last_ch = saved->mri_last_ch; 29560484Sobrien#if defined TC_ARM && defined OBJ_ELF 29660484Sobrien symver_state = saved->symver_state; 29760484Sobrien#endif 29833965Sjdp 29933965Sjdp free (arg); 30033965Sjdp} /* app_pop() */ 30133965Sjdp 30233965Sjdp/* @@ This assumes that \n &c are the same on host and target. This is not 30333965Sjdp necessarily true. */ 30477298Sobrienstatic int 30533965Sjdpprocess_escape (ch) 30633965Sjdp int ch; 30733965Sjdp{ 30833965Sjdp switch (ch) 30933965Sjdp { 31033965Sjdp case 'b': 31133965Sjdp return '\b'; 31233965Sjdp case 'f': 31333965Sjdp return '\f'; 31433965Sjdp case 'n': 31533965Sjdp return '\n'; 31633965Sjdp case 'r': 31733965Sjdp return '\r'; 31833965Sjdp case 't': 31933965Sjdp return '\t'; 32033965Sjdp case '\'': 32133965Sjdp return '\''; 32233965Sjdp case '"': 32333965Sjdp return '\"'; 32433965Sjdp default: 32533965Sjdp return ch; 32633965Sjdp } 32733965Sjdp} 32833965Sjdp 32933965Sjdp/* This function is called to process input characters. The GET 33033965Sjdp parameter is used to retrieve more input characters. GET should 33133965Sjdp set its parameter to point to a buffer, and return the length of 33233965Sjdp the buffer; it should return 0 at end of file. The scrubbed output 33333965Sjdp characters are put into the buffer starting at TOSTART; the TOSTART 33433965Sjdp buffer is TOLEN bytes in length. The function returns the number 33533965Sjdp of scrubbed characters put into TOSTART. This will be TOLEN unless 33633965Sjdp end of file was seen. This function is arranged as a state 33733965Sjdp machine, and saves its state so that it may return at any point. 33833965Sjdp This is the way the old code used to work. */ 33933965Sjdp 34033965Sjdpint 34133965Sjdpdo_scrub_chars (get, tostart, tolen) 34260484Sobrien int (*get) PARAMS ((char *, int)); 34333965Sjdp char *tostart; 34433965Sjdp int tolen; 34533965Sjdp{ 34633965Sjdp char *to = tostart; 34733965Sjdp char *toend = tostart + tolen; 34833965Sjdp char *from; 34933965Sjdp char *fromend; 35033965Sjdp int fromlen; 35133965Sjdp register int ch, ch2 = 0; 35233965Sjdp 35333965Sjdp /*State 0: beginning of normal line 35433965Sjdp 1: After first whitespace on line (flush more white) 35533965Sjdp 2: After first non-white (opcode) on line (keep 1white) 35633965Sjdp 3: after second white on line (into operands) (flush white) 35733965Sjdp 4: after putting out a .line, put out digits 35833965Sjdp 5: parsing a string, then go to old-state 35933965Sjdp 6: putting out \ escape in a "d string. 36033965Sjdp 7: After putting out a .appfile, put out string. 36133965Sjdp 8: After putting out a .appfile string, flush until newline. 36233965Sjdp 9: After seeing symbol char in state 3 (keep 1white after symchar) 36333965Sjdp 10: After seeing whitespace in state 9 (keep white before symchar) 36433965Sjdp 11: After seeing a symbol character in state 0 (eg a label definition) 36533965Sjdp -1: output string in out_string and go to the state in old_state 36633965Sjdp -2: flush text until a '*' '/' is seen, then go to state old_state 36738889Sjdp#ifdef TC_V850 36838889Sjdp 12: After seeing a dash, looking for a second dash as a start of comment. 36938889Sjdp#endif 37060484Sobrien#ifdef DOUBLEBAR_PARALLEL 37138889Sjdp 13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator. 37238889Sjdp#endif 37333965Sjdp */ 37433965Sjdp 37533965Sjdp /* I added states 9 and 10 because the MIPS ECOFF assembler uses 37633965Sjdp constructs like ``.loc 1 20''. This was turning into ``.loc 37733965Sjdp 120''. States 9 and 10 ensure that a space is never dropped in 37833965Sjdp between characters which could appear in a identifier. Ian 37933965Sjdp Taylor, ian@cygnus.com. 38033965Sjdp 38133965Sjdp I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works 38233965Sjdp correctly on the PA (and any other target where colons are optional). 38338889Sjdp Jeff Law, law@cs.utah.edu. 38433965Sjdp 38538889Sjdp I added state 13 so that something like "cmp r1, r2 || trap #1" does not 38638889Sjdp get squashed into "cmp r1,r2||trap#1", with the all important space 38738889Sjdp between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */ 38838889Sjdp 38933965Sjdp /* This macro gets the next input character. */ 39033965Sjdp 39160484Sobrien#define GET() \ 39260484Sobrien (from < fromend \ 39360484Sobrien ? * (unsigned char *) (from++) \ 39460484Sobrien : (saved_input = NULL, \ 39560484Sobrien fromlen = (*get) (input_buffer, sizeof input_buffer), \ 39660484Sobrien from = input_buffer, \ 39760484Sobrien fromend = from + fromlen, \ 39860484Sobrien (fromlen == 0 \ 39960484Sobrien ? EOF \ 40060484Sobrien : * (unsigned char *) (from++)))) 40133965Sjdp 40233965Sjdp /* This macro pushes a character back on the input stream. */ 40333965Sjdp 40433965Sjdp#define UNGET(uch) (*--from = (uch)) 40533965Sjdp 40633965Sjdp /* This macro puts a character into the output buffer. If this 40733965Sjdp character fills the output buffer, this macro jumps to the label 40833965Sjdp TOFULL. We use this rather ugly approach because we need to 40933965Sjdp handle two different termination conditions: EOF on the input 41033965Sjdp stream, and a full output buffer. It would be simpler if we 41133965Sjdp always read in the entire input stream before processing it, but 41233965Sjdp I don't want to make such a significant change to the assembler's 41333965Sjdp memory usage. */ 41433965Sjdp 41533965Sjdp#define PUT(pch) \ 41633965Sjdp do \ 41733965Sjdp { \ 41833965Sjdp *to++ = (pch); \ 41933965Sjdp if (to >= toend) \ 42033965Sjdp goto tofull; \ 42133965Sjdp } \ 42233965Sjdp while (0) 42333965Sjdp 42433965Sjdp if (saved_input != NULL) 42533965Sjdp { 42633965Sjdp from = saved_input; 42733965Sjdp fromend = from + saved_input_len; 42833965Sjdp } 42933965Sjdp else 43033965Sjdp { 43160484Sobrien fromlen = (*get) (input_buffer, sizeof input_buffer); 43233965Sjdp if (fromlen == 0) 43333965Sjdp return 0; 43460484Sobrien from = input_buffer; 43533965Sjdp fromend = from + fromlen; 43633965Sjdp } 43733965Sjdp 43833965Sjdp while (1) 43933965Sjdp { 44033965Sjdp /* The cases in this switch end with continue, in order to 44133965Sjdp branch back to the top of this while loop and generate the 44233965Sjdp next output character in the appropriate state. */ 44333965Sjdp switch (state) 44433965Sjdp { 44533965Sjdp case -1: 44633965Sjdp ch = *out_string++; 44733965Sjdp if (*out_string == '\0') 44833965Sjdp { 44933965Sjdp state = old_state; 45033965Sjdp old_state = 3; 45133965Sjdp } 45233965Sjdp PUT (ch); 45333965Sjdp continue; 45433965Sjdp 45533965Sjdp case -2: 45633965Sjdp for (;;) 45733965Sjdp { 45833965Sjdp do 45933965Sjdp { 46033965Sjdp ch = GET (); 46133965Sjdp 46233965Sjdp if (ch == EOF) 46333965Sjdp { 46460484Sobrien as_warn (_("end of file in comment")); 46533965Sjdp goto fromeof; 46633965Sjdp } 46733965Sjdp 46833965Sjdp if (ch == '\n') 46933965Sjdp PUT ('\n'); 47033965Sjdp } 47133965Sjdp while (ch != '*'); 47233965Sjdp 47333965Sjdp while ((ch = GET ()) == '*') 47433965Sjdp ; 47533965Sjdp 47633965Sjdp if (ch == EOF) 47733965Sjdp { 47860484Sobrien as_warn (_("end of file in comment")); 47933965Sjdp goto fromeof; 48033965Sjdp } 48133965Sjdp 48233965Sjdp if (ch == '/') 48333965Sjdp break; 48433965Sjdp 48533965Sjdp UNGET (ch); 48633965Sjdp } 48733965Sjdp 48833965Sjdp state = old_state; 48933965Sjdp UNGET (' '); 49033965Sjdp continue; 49133965Sjdp 49233965Sjdp case 4: 49333965Sjdp ch = GET (); 49433965Sjdp if (ch == EOF) 49533965Sjdp goto fromeof; 49633965Sjdp else if (ch >= '0' && ch <= '9') 49733965Sjdp PUT (ch); 49833965Sjdp else 49933965Sjdp { 50033965Sjdp while (ch != EOF && IS_WHITESPACE (ch)) 50133965Sjdp ch = GET (); 50233965Sjdp if (ch == '"') 50333965Sjdp { 50433965Sjdp UNGET (ch); 50533965Sjdp if (scrub_m68k_mri) 50633965Sjdp out_string = "\n\tappfile "; 50733965Sjdp else 50833965Sjdp out_string = "\n\t.appfile "; 50933965Sjdp old_state = 7; 51033965Sjdp state = -1; 51133965Sjdp PUT (*out_string++); 51233965Sjdp } 51333965Sjdp else 51433965Sjdp { 51533965Sjdp while (ch != EOF && ch != '\n') 51633965Sjdp ch = GET (); 51733965Sjdp state = 0; 51833965Sjdp PUT (ch); 51933965Sjdp } 52033965Sjdp } 52133965Sjdp continue; 52233965Sjdp 52333965Sjdp case 5: 52433965Sjdp /* We are going to copy everything up to a quote character, 52533965Sjdp with special handling for a backslash. We try to 52633965Sjdp optimize the copying in the simple case without using the 52733965Sjdp GET and PUT macros. */ 52833965Sjdp { 52933965Sjdp char *s; 53033965Sjdp int len; 53133965Sjdp 53233965Sjdp for (s = from; s < fromend; s++) 53333965Sjdp { 53433965Sjdp ch = *s; 53533965Sjdp /* This condition must be changed if the type of any 53633965Sjdp other character can be LEX_IS_STRINGQUOTE. */ 53733965Sjdp if (ch == '\\' 53833965Sjdp || ch == '"' 53933965Sjdp || ch == '\'' 54033965Sjdp || ch == '\n') 54133965Sjdp break; 54233965Sjdp } 54333965Sjdp len = s - from; 54433965Sjdp if (len > toend - to) 54533965Sjdp len = toend - to; 54633965Sjdp if (len > 0) 54733965Sjdp { 54833965Sjdp memcpy (to, from, len); 54933965Sjdp to += len; 55033965Sjdp from += len; 55133965Sjdp } 55233965Sjdp } 55333965Sjdp 55433965Sjdp ch = GET (); 55533965Sjdp if (ch == EOF) 55633965Sjdp { 55760484Sobrien as_warn (_("end of file in string: inserted '\"'")); 55833965Sjdp state = old_state; 55933965Sjdp UNGET ('\n'); 56033965Sjdp PUT ('"'); 56133965Sjdp } 56233965Sjdp else if (lex[ch] == LEX_IS_STRINGQUOTE) 56333965Sjdp { 56433965Sjdp state = old_state; 56533965Sjdp PUT (ch); 56633965Sjdp } 56733965Sjdp#ifndef NO_STRING_ESCAPES 56833965Sjdp else if (ch == '\\') 56933965Sjdp { 57033965Sjdp state = 6; 57133965Sjdp PUT (ch); 57233965Sjdp } 57333965Sjdp#endif 57433965Sjdp else if (scrub_m68k_mri && ch == '\n') 57533965Sjdp { 57633965Sjdp /* Just quietly terminate the string. This permits lines like 57733965Sjdp bne label loop if we haven't reach end yet 57833965Sjdp */ 57933965Sjdp state = old_state; 58033965Sjdp UNGET (ch); 58133965Sjdp PUT ('\''); 58233965Sjdp } 58333965Sjdp else 58433965Sjdp { 58533965Sjdp PUT (ch); 58633965Sjdp } 58733965Sjdp continue; 58833965Sjdp 58933965Sjdp case 6: 59033965Sjdp state = 5; 59133965Sjdp ch = GET (); 59233965Sjdp switch (ch) 59333965Sjdp { 59433965Sjdp /* Handle strings broken across lines, by turning '\n' into 59533965Sjdp '\\' and 'n'. */ 59633965Sjdp case '\n': 59733965Sjdp UNGET ('n'); 59833965Sjdp add_newlines++; 59933965Sjdp PUT ('\\'); 60033965Sjdp continue; 60133965Sjdp 60233965Sjdp case '"': 60333965Sjdp case '\\': 60433965Sjdp case 'b': 60533965Sjdp case 'f': 60633965Sjdp case 'n': 60733965Sjdp case 'r': 60833965Sjdp case 't': 60933965Sjdp case 'v': 61033965Sjdp case 'x': 61133965Sjdp case 'X': 61233965Sjdp case '0': 61333965Sjdp case '1': 61433965Sjdp case '2': 61533965Sjdp case '3': 61633965Sjdp case '4': 61733965Sjdp case '5': 61833965Sjdp case '6': 61933965Sjdp case '7': 62033965Sjdp break; 62133965Sjdp#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES) 62233965Sjdp default: 62360484Sobrien as_warn (_("Unknown escape '\\%c' in string: Ignored"), ch); 62433965Sjdp break; 62533965Sjdp#else /* ONLY_STANDARD_ESCAPES */ 62633965Sjdp default: 62733965Sjdp /* Accept \x as x for any x */ 62833965Sjdp break; 62933965Sjdp#endif /* ONLY_STANDARD_ESCAPES */ 63033965Sjdp 63133965Sjdp case EOF: 63260484Sobrien as_warn (_("End of file in string: '\"' inserted")); 63333965Sjdp PUT ('"'); 63433965Sjdp continue; 63533965Sjdp } 63633965Sjdp PUT (ch); 63733965Sjdp continue; 63833965Sjdp 63933965Sjdp case 7: 64033965Sjdp ch = GET (); 64133965Sjdp state = 5; 64233965Sjdp old_state = 8; 64333965Sjdp if (ch == EOF) 64433965Sjdp goto fromeof; 64533965Sjdp PUT (ch); 64633965Sjdp continue; 64733965Sjdp 64833965Sjdp case 8: 64933965Sjdp do 65033965Sjdp ch = GET (); 65133965Sjdp while (ch != '\n' && ch != EOF); 65233965Sjdp if (ch == EOF) 65333965Sjdp goto fromeof; 65433965Sjdp state = 0; 65533965Sjdp PUT (ch); 65633965Sjdp continue; 65733965Sjdp } 65833965Sjdp 65933965Sjdp /* OK, we are somewhere in states 0 through 4 or 9 through 11 */ 66033965Sjdp 66133965Sjdp /* flushchar: */ 66233965Sjdp ch = GET (); 66333965Sjdp 66433965Sjdp recycle: 66533965Sjdp 66660484Sobrien#if defined TC_ARM && defined OBJ_ELF 66760484Sobrien /* We need to watch out for .symver directives. See the comment later 66860484Sobrien in this function. */ 66960484Sobrien if (symver_state == NULL) 67060484Sobrien { 67160484Sobrien if ((state == 0 || state == 1) && ch == symver_pseudo[0]) 67260484Sobrien symver_state = symver_pseudo + 1; 67360484Sobrien } 67460484Sobrien else 67560484Sobrien { 67660484Sobrien /* We advance to the next state if we find the right 67760484Sobrien character. */ 67860484Sobrien if (ch != '\0' && (*symver_state == ch)) 67960484Sobrien ++symver_state; 68060484Sobrien else if (*symver_state != '\0') 68160484Sobrien /* We did not get the expected character, or we didn't 68260484Sobrien get a valid terminating character after seeing the 68360484Sobrien entire pseudo-op, so we must go back to the beginning. */ 68460484Sobrien symver_state = NULL; 68560484Sobrien else 68660484Sobrien { 68760484Sobrien /* We've read the entire pseudo-op. If this is the end 68860484Sobrien of the line, go back to the beginning. */ 68960484Sobrien if (IS_NEWLINE (ch)) 69060484Sobrien symver_state = NULL; 69160484Sobrien } 69260484Sobrien } 69360484Sobrien#endif /* TC_ARM && OBJ_ELF */ 69460484Sobrien 69533965Sjdp#ifdef TC_M68K 69633965Sjdp /* We want to have pseudo-ops which control whether we are in 69733965Sjdp MRI mode or not. Unfortunately, since m68k MRI mode affects 69833965Sjdp the scrubber, that means that we need a special purpose 69933965Sjdp recognizer here. */ 70033965Sjdp if (mri_state == NULL) 70133965Sjdp { 70233965Sjdp if ((state == 0 || state == 1) 70333965Sjdp && ch == mri_pseudo[0]) 70433965Sjdp mri_state = mri_pseudo + 1; 70533965Sjdp } 70633965Sjdp else 70733965Sjdp { 70833965Sjdp /* We advance to the next state if we find the right 70933965Sjdp character, or if we need a space character and we get any 71033965Sjdp whitespace character, or if we need a '0' and we get a 71133965Sjdp '1' (this is so that we only need one state to handle 71233965Sjdp ``.mri 0'' and ``.mri 1''). */ 71333965Sjdp if (ch != '\0' 71433965Sjdp && (*mri_state == ch 71533965Sjdp || (*mri_state == ' ' 71633965Sjdp && lex[ch] == LEX_IS_WHITESPACE) 71733965Sjdp || (*mri_state == '0' 71833965Sjdp && ch == '1'))) 71933965Sjdp { 72033965Sjdp mri_last_ch = ch; 72133965Sjdp ++mri_state; 72233965Sjdp } 72333965Sjdp else if (*mri_state != '\0' 72433965Sjdp || (lex[ch] != LEX_IS_WHITESPACE 72533965Sjdp && lex[ch] != LEX_IS_NEWLINE)) 72633965Sjdp { 72733965Sjdp /* We did not get the expected character, or we didn't 72833965Sjdp get a valid terminating character after seeing the 72933965Sjdp entire pseudo-op, so we must go back to the 73033965Sjdp beginning. */ 73133965Sjdp mri_state = NULL; 73233965Sjdp } 73333965Sjdp else 73433965Sjdp { 73533965Sjdp /* We've read the entire pseudo-op. mips_last_ch is 73633965Sjdp either '0' or '1' indicating whether to enter or 73733965Sjdp leave MRI mode. */ 73833965Sjdp do_scrub_begin (mri_last_ch == '1'); 73938889Sjdp mri_state = NULL; 74033965Sjdp 74133965Sjdp /* We continue handling the character as usual. The 74233965Sjdp main gas reader must also handle the .mri pseudo-op 74333965Sjdp to control expression parsing and the like. */ 74433965Sjdp } 74533965Sjdp } 74633965Sjdp#endif 74733965Sjdp 74833965Sjdp if (ch == EOF) 74933965Sjdp { 75033965Sjdp if (state != 0) 75133965Sjdp { 75260484Sobrien as_warn (_("end of file not at end of a line; newline inserted")); 75333965Sjdp state = 0; 75433965Sjdp PUT ('\n'); 75533965Sjdp } 75633965Sjdp goto fromeof; 75733965Sjdp } 75833965Sjdp 75933965Sjdp switch (lex[ch]) 76033965Sjdp { 76133965Sjdp case LEX_IS_WHITESPACE: 76233965Sjdp do 76333965Sjdp { 76433965Sjdp ch = GET (); 76533965Sjdp } 76633965Sjdp while (ch != EOF && IS_WHITESPACE (ch)); 76733965Sjdp if (ch == EOF) 76833965Sjdp goto fromeof; 76933965Sjdp 77033965Sjdp if (state == 0) 77133965Sjdp { 77233965Sjdp /* Preserve a single whitespace character at the 77333965Sjdp beginning of a line. */ 77433965Sjdp state = 1; 77533965Sjdp UNGET (ch); 77633965Sjdp PUT (' '); 77733965Sjdp break; 77833965Sjdp } 77933965Sjdp 78060484Sobrien#ifdef KEEP_WHITE_AROUND_COLON 78177298Sobrien if (lex[ch] == LEX_IS_COLON) 78277298Sobrien { 78377298Sobrien /* Only keep this white if there's no white *after* the 78477298Sobrien colon. */ 78577298Sobrien ch2 = GET (); 78677298Sobrien UNGET (ch2); 78777298Sobrien if (!IS_WHITESPACE (ch2)) 78877298Sobrien { 78977298Sobrien state = 9; 79077298Sobrien UNGET (ch); 79177298Sobrien PUT (' '); 79277298Sobrien break; 79377298Sobrien } 79477298Sobrien } 79560484Sobrien#endif 79633965Sjdp if (IS_COMMENT (ch) 79733965Sjdp || ch == '/' 79833965Sjdp || IS_LINE_SEPARATOR (ch)) 79933965Sjdp { 80033965Sjdp if (scrub_m68k_mri) 80133965Sjdp { 80233965Sjdp /* In MRI mode, we keep these spaces. */ 80333965Sjdp UNGET (ch); 80433965Sjdp PUT (' '); 80533965Sjdp break; 80633965Sjdp } 80733965Sjdp goto recycle; 80833965Sjdp } 80933965Sjdp 81033965Sjdp /* If we're in state 2 or 11, we've seen a non-white 81133965Sjdp character followed by whitespace. If the next character 81233965Sjdp is ':', this is whitespace after a label name which we 81333965Sjdp normally must ignore. In MRI mode, though, spaces are 81433965Sjdp not permitted between the label and the colon. */ 81533965Sjdp if ((state == 2 || state == 11) 81633965Sjdp && lex[ch] == LEX_IS_COLON 81733965Sjdp && ! scrub_m68k_mri) 81833965Sjdp { 81933965Sjdp state = 1; 82033965Sjdp PUT (ch); 82133965Sjdp break; 82233965Sjdp } 82333965Sjdp 82433965Sjdp switch (state) 82533965Sjdp { 82633965Sjdp case 0: 82733965Sjdp state++; 82833965Sjdp goto recycle; /* Punted leading sp */ 82933965Sjdp case 1: 83033965Sjdp /* We can arrive here if we leave a leading whitespace 83133965Sjdp character at the beginning of a line. */ 83233965Sjdp goto recycle; 83333965Sjdp case 2: 83433965Sjdp state = 3; 83533965Sjdp if (to + 1 < toend) 83633965Sjdp { 83733965Sjdp /* Optimize common case by skipping UNGET/GET. */ 83833965Sjdp PUT (' '); /* Sp after opco */ 83933965Sjdp goto recycle; 84033965Sjdp } 84133965Sjdp UNGET (ch); 84233965Sjdp PUT (' '); 84333965Sjdp break; 84433965Sjdp case 3: 84533965Sjdp if (scrub_m68k_mri) 84633965Sjdp { 84733965Sjdp /* In MRI mode, we keep these spaces. */ 84833965Sjdp UNGET (ch); 84933965Sjdp PUT (' '); 85033965Sjdp break; 85133965Sjdp } 85233965Sjdp goto recycle; /* Sp in operands */ 85333965Sjdp case 9: 85433965Sjdp case 10: 85533965Sjdp if (scrub_m68k_mri) 85633965Sjdp { 85733965Sjdp /* In MRI mode, we keep these spaces. */ 85833965Sjdp state = 3; 85933965Sjdp UNGET (ch); 86033965Sjdp PUT (' '); 86133965Sjdp break; 86233965Sjdp } 86333965Sjdp state = 10; /* Sp after symbol char */ 86433965Sjdp goto recycle; 86533965Sjdp case 11: 86660484Sobrien if (LABELS_WITHOUT_COLONS || flag_m68k_mri) 86733965Sjdp state = 1; 86833965Sjdp else 86933965Sjdp { 87033965Sjdp /* We know that ch is not ':', since we tested that 87133965Sjdp case above. Therefore this is not a label, so it 87233965Sjdp must be the opcode, and we've just seen the 87333965Sjdp whitespace after it. */ 87433965Sjdp state = 3; 87533965Sjdp } 87633965Sjdp UNGET (ch); 87733965Sjdp PUT (' '); /* Sp after label definition. */ 87833965Sjdp break; 87933965Sjdp default: 88033965Sjdp BAD_CASE (state); 88133965Sjdp } 88233965Sjdp break; 88333965Sjdp 88433965Sjdp case LEX_IS_TWOCHAR_COMMENT_1ST: 88533965Sjdp ch2 = GET (); 88633965Sjdp if (ch2 == '*') 88733965Sjdp { 88833965Sjdp for (;;) 88933965Sjdp { 89033965Sjdp do 89133965Sjdp { 89233965Sjdp ch2 = GET (); 89333965Sjdp if (ch2 != EOF && IS_NEWLINE (ch2)) 89433965Sjdp add_newlines++; 89533965Sjdp } 89633965Sjdp while (ch2 != EOF && ch2 != '*'); 89733965Sjdp 89833965Sjdp while (ch2 == '*') 89933965Sjdp ch2 = GET (); 90033965Sjdp 90133965Sjdp if (ch2 == EOF || ch2 == '/') 90233965Sjdp break; 90333965Sjdp 90433965Sjdp /* This UNGET will ensure that we count newlines 90533965Sjdp correctly. */ 90633965Sjdp UNGET (ch2); 90733965Sjdp } 90833965Sjdp 90933965Sjdp if (ch2 == EOF) 91060484Sobrien as_warn (_("end of file in multiline comment")); 91133965Sjdp 91233965Sjdp ch = ' '; 91333965Sjdp goto recycle; 91433965Sjdp } 91577298Sobrien#ifdef DOUBLESLASH_LINE_COMMENTS 91677298Sobrien else if (ch2 == '/') 91777298Sobrien { 91877298Sobrien do 91977298Sobrien { 92077298Sobrien ch = GET (); 92177298Sobrien } 92277298Sobrien while (ch != EOF && !IS_NEWLINE (ch)); 92377298Sobrien if (ch == EOF) 92477298Sobrien as_warn ("end of file in comment; newline inserted"); 92577298Sobrien state = 0; 92677298Sobrien PUT ('\n'); 92777298Sobrien break; 92877298Sobrien } 92977298Sobrien#endif 93033965Sjdp else 93133965Sjdp { 93233965Sjdp if (ch2 != EOF) 93333965Sjdp UNGET (ch2); 93433965Sjdp if (state == 9 || state == 10) 93533965Sjdp state = 3; 93633965Sjdp PUT (ch); 93733965Sjdp } 93833965Sjdp break; 93933965Sjdp 94033965Sjdp case LEX_IS_STRINGQUOTE: 94133965Sjdp if (state == 10) 94233965Sjdp { 94333965Sjdp /* Preserve the whitespace in foo "bar" */ 94433965Sjdp UNGET (ch); 94533965Sjdp state = 3; 94633965Sjdp PUT (' '); 94733965Sjdp 94833965Sjdp /* PUT didn't jump out. We could just break, but we 94933965Sjdp know what will happen, so optimize a bit. */ 95033965Sjdp ch = GET (); 95133965Sjdp old_state = 3; 95233965Sjdp } 95333965Sjdp else if (state == 9) 95433965Sjdp old_state = 3; 95533965Sjdp else 95633965Sjdp old_state = state; 95733965Sjdp state = 5; 95833965Sjdp PUT (ch); 95933965Sjdp break; 96033965Sjdp 96133965Sjdp#ifndef IEEE_STYLE 96233965Sjdp case LEX_IS_ONECHAR_QUOTE: 96333965Sjdp if (state == 10) 96433965Sjdp { 96533965Sjdp /* Preserve the whitespace in foo 'b' */ 96633965Sjdp UNGET (ch); 96733965Sjdp state = 3; 96833965Sjdp PUT (' '); 96933965Sjdp break; 97033965Sjdp } 97133965Sjdp ch = GET (); 97233965Sjdp if (ch == EOF) 97333965Sjdp { 97460484Sobrien as_warn (_("end of file after a one-character quote; \\0 inserted")); 97533965Sjdp ch = 0; 97633965Sjdp } 97733965Sjdp if (ch == '\\') 97833965Sjdp { 97933965Sjdp ch = GET (); 98033965Sjdp if (ch == EOF) 98133965Sjdp { 98260484Sobrien as_warn (_("end of file in escape character")); 98333965Sjdp ch = '\\'; 98433965Sjdp } 98533965Sjdp else 98633965Sjdp ch = process_escape (ch); 98733965Sjdp } 98833965Sjdp sprintf (out_buf, "%d", (int) (unsigned char) ch); 98933965Sjdp 99033965Sjdp /* None of these 'x constants for us. We want 'x'. */ 99133965Sjdp if ((ch = GET ()) != '\'') 99233965Sjdp { 99333965Sjdp#ifdef REQUIRE_CHAR_CLOSE_QUOTE 99460484Sobrien as_warn (_("Missing close quote: (assumed)")); 99533965Sjdp#else 99633965Sjdp if (ch != EOF) 99733965Sjdp UNGET (ch); 99833965Sjdp#endif 99933965Sjdp } 100033965Sjdp if (strlen (out_buf) == 1) 100133965Sjdp { 100233965Sjdp PUT (out_buf[0]); 100333965Sjdp break; 100433965Sjdp } 100533965Sjdp if (state == 9) 100633965Sjdp old_state = 3; 100733965Sjdp else 100833965Sjdp old_state = state; 100933965Sjdp state = -1; 101033965Sjdp out_string = out_buf; 101133965Sjdp PUT (*out_string++); 101233965Sjdp break; 101333965Sjdp#endif 101433965Sjdp 101533965Sjdp case LEX_IS_COLON: 101660484Sobrien#ifdef KEEP_WHITE_AROUND_COLON 101777298Sobrien state = 9; 101860484Sobrien#else 101933965Sjdp if (state == 9 || state == 10) 102033965Sjdp state = 3; 102133965Sjdp else if (state != 3) 102233965Sjdp state = 1; 102360484Sobrien#endif 102433965Sjdp PUT (ch); 102533965Sjdp break; 102633965Sjdp 102733965Sjdp case LEX_IS_NEWLINE: 102833965Sjdp /* Roll out a bunch of newlines from inside comments, etc. */ 102933965Sjdp if (add_newlines) 103033965Sjdp { 103133965Sjdp --add_newlines; 103233965Sjdp UNGET (ch); 103333965Sjdp } 103477298Sobrien /* Fall through. */ 103533965Sjdp 103633965Sjdp case LEX_IS_LINE_SEPARATOR: 103733965Sjdp state = 0; 103833965Sjdp PUT (ch); 103933965Sjdp break; 104033965Sjdp 104138889Sjdp#ifdef TC_V850 104238889Sjdp case LEX_IS_DOUBLEDASH_1ST: 104377298Sobrien ch2 = GET (); 104438889Sjdp if (ch2 != '-') 104538889Sjdp { 104638889Sjdp UNGET (ch2); 104738889Sjdp goto de_fault; 104838889Sjdp } 104977298Sobrien /* Read and skip to end of line. */ 105038889Sjdp do 105138889Sjdp { 105238889Sjdp ch = GET (); 105338889Sjdp } 105438889Sjdp while (ch != EOF && ch != '\n'); 105538889Sjdp if (ch == EOF) 105638889Sjdp { 105760484Sobrien as_warn (_("end of file in comment; newline inserted")); 105838889Sjdp } 105938889Sjdp state = 0; 106038889Sjdp PUT ('\n'); 106138889Sjdp break; 106277298Sobrien#endif 106360484Sobrien#ifdef DOUBLEBAR_PARALLEL 106438889Sjdp case LEX_IS_DOUBLEBAR_1ST: 106577298Sobrien ch2 = GET (); 106638889Sjdp if (ch2 != '|') 106738889Sjdp { 106838889Sjdp UNGET (ch2); 106938889Sjdp goto de_fault; 107038889Sjdp } 107138889Sjdp /* Reset back to state 1 and pretend that we are parsing a line from 107238889Sjdp just after the first white space. */ 107338889Sjdp state = 1; 107438889Sjdp PUT ('|'); 107538889Sjdp PUT ('|'); 107638889Sjdp break; 107777298Sobrien#endif 107833965Sjdp case LEX_IS_LINE_COMMENT_START: 107933965Sjdp /* FIXME-someday: The two character comment stuff was badly 108033965Sjdp thought out. On i386, we want '/' as line comment start 108133965Sjdp AND we want C style comments. hence this hack. The 108233965Sjdp whole lexical process should be reworked. xoxorich. */ 108333965Sjdp if (ch == '/') 108433965Sjdp { 108533965Sjdp ch2 = GET (); 108633965Sjdp if (ch2 == '*') 108733965Sjdp { 108833965Sjdp old_state = 3; 108933965Sjdp state = -2; 109033965Sjdp break; 109133965Sjdp } 109233965Sjdp else 109333965Sjdp { 109433965Sjdp UNGET (ch2); 109533965Sjdp } 109633965Sjdp } /* bad hack */ 109733965Sjdp 109833965Sjdp if (state == 0 || state == 1) /* Only comment at start of line. */ 109933965Sjdp { 110033965Sjdp int startch; 110133965Sjdp 110233965Sjdp startch = ch; 110333965Sjdp 110433965Sjdp do 110533965Sjdp { 110633965Sjdp ch = GET (); 110733965Sjdp } 110833965Sjdp while (ch != EOF && IS_WHITESPACE (ch)); 110933965Sjdp if (ch == EOF) 111033965Sjdp { 111160484Sobrien as_warn (_("end of file in comment; newline inserted")); 111233965Sjdp PUT ('\n'); 111333965Sjdp break; 111433965Sjdp } 111533965Sjdp if (ch < '0' || ch > '9' || state != 0 || startch != '#') 111633965Sjdp { 111733965Sjdp /* Not a cpp line. */ 111833965Sjdp while (ch != EOF && !IS_NEWLINE (ch)) 111933965Sjdp ch = GET (); 112033965Sjdp if (ch == EOF) 112160484Sobrien as_warn (_("EOF in Comment: Newline inserted")); 112233965Sjdp state = 0; 112333965Sjdp PUT ('\n'); 112433965Sjdp break; 112533965Sjdp } 112677298Sobrien /* Looks like `# 123 "filename"' from cpp. */ 112733965Sjdp UNGET (ch); 112833965Sjdp old_state = 4; 112933965Sjdp state = -1; 113033965Sjdp if (scrub_m68k_mri) 113133965Sjdp out_string = "\tappline "; 113233965Sjdp else 113333965Sjdp out_string = "\t.appline "; 113433965Sjdp PUT (*out_string++); 113533965Sjdp break; 113633965Sjdp } 113733965Sjdp 113838889Sjdp#ifdef TC_D10V 113938889Sjdp /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true. 114038889Sjdp Trap is the only short insn that has a first operand that is 114138889Sjdp neither register nor label. 114238889Sjdp We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 . 114377298Sobrien We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is 114477298Sobrien already LEX_IS_LINE_COMMENT_START. However, it is the 114577298Sobrien only character in line_comment_chars for d10v, hence we 114677298Sobrien can recognize it as such. */ 114738889Sjdp /* An alternative approach would be to reset the state to 1 when 114838889Sjdp we see '||', '<'- or '->', but that seems to be overkill. */ 114977298Sobrien if (state == 10) 115077298Sobrien PUT (' '); 115138889Sjdp#endif 115233965Sjdp /* We have a line comment character which is not at the 115333965Sjdp start of a line. If this is also a normal comment 115433965Sjdp character, fall through. Otherwise treat it as a default 115533965Sjdp character. */ 115633965Sjdp if (strchr (tc_comment_chars, ch) == NULL 115733965Sjdp && (! scrub_m68k_mri 115833965Sjdp || (ch != '!' && ch != '*'))) 115933965Sjdp goto de_fault; 116033965Sjdp if (scrub_m68k_mri 116133965Sjdp && (ch == '!' || ch == '*' || ch == '#') 116233965Sjdp && state != 1 116333965Sjdp && state != 10) 116433965Sjdp goto de_fault; 116533965Sjdp /* Fall through. */ 116633965Sjdp case LEX_IS_COMMENT_START: 116760484Sobrien#if defined TC_ARM && defined OBJ_ELF 116860484Sobrien /* On the ARM, `@' is the comment character. 116960484Sobrien Unfortunately this is also a special character in ELF .symver 117077298Sobrien directives (and .type, though we deal with those another way). 117177298Sobrien So we check if this line is such a directive, and treat 117277298Sobrien the character as default if so. This is a hack. */ 117360484Sobrien if ((symver_state != NULL) && (*symver_state == 0)) 117460484Sobrien goto de_fault; 117560484Sobrien#endif 117677298Sobrien#ifdef WARN_COMMENTS 117777298Sobrien if (!found_comment) 117877298Sobrien as_where (&found_comment_file, &found_comment); 117977298Sobrien#endif 118033965Sjdp do 118133965Sjdp { 118233965Sjdp ch = GET (); 118333965Sjdp } 118433965Sjdp while (ch != EOF && !IS_NEWLINE (ch)); 118533965Sjdp if (ch == EOF) 118660484Sobrien as_warn (_("end of file in comment; newline inserted")); 118733965Sjdp state = 0; 118833965Sjdp PUT ('\n'); 118933965Sjdp break; 119033965Sjdp 119133965Sjdp case LEX_IS_SYMBOL_COMPONENT: 119233965Sjdp if (state == 10) 119333965Sjdp { 119433965Sjdp /* This is a symbol character following another symbol 119533965Sjdp character, with whitespace in between. We skipped 119633965Sjdp the whitespace earlier, so output it now. */ 119733965Sjdp UNGET (ch); 119833965Sjdp state = 3; 119933965Sjdp PUT (' '); 120033965Sjdp break; 120133965Sjdp } 120233965Sjdp 120333965Sjdp if (state == 3) 120433965Sjdp state = 9; 120533965Sjdp 120633965Sjdp /* This is a common case. Quickly copy CH and all the 120733965Sjdp following symbol component or normal characters. */ 120860484Sobrien if (to + 1 < toend 120960484Sobrien && mri_state == NULL 121060484Sobrien#if defined TC_ARM && defined OBJ_ELF 121160484Sobrien && symver_state == NULL 121260484Sobrien#endif 121360484Sobrien ) 121433965Sjdp { 121533965Sjdp char *s; 121633965Sjdp int len; 121733965Sjdp 121833965Sjdp for (s = from; s < fromend; s++) 121933965Sjdp { 122033965Sjdp int type; 122133965Sjdp 122277298Sobrien ch2 = *(unsigned char *) s; 122333965Sjdp type = lex[ch2]; 122433965Sjdp if (type != 0 122533965Sjdp && type != LEX_IS_SYMBOL_COMPONENT) 122633965Sjdp break; 122733965Sjdp } 122833965Sjdp if (s > from) 122933965Sjdp { 123033965Sjdp /* Handle the last character normally, for 123133965Sjdp simplicity. */ 123233965Sjdp --s; 123333965Sjdp } 123433965Sjdp len = s - from; 123533965Sjdp if (len > (toend - to) - 1) 123633965Sjdp len = (toend - to) - 1; 123733965Sjdp if (len > 0) 123833965Sjdp { 123933965Sjdp PUT (ch); 124033965Sjdp if (len > 8) 124133965Sjdp { 124233965Sjdp memcpy (to, from, len); 124333965Sjdp to += len; 124433965Sjdp from += len; 124533965Sjdp } 124633965Sjdp else 124733965Sjdp { 124833965Sjdp switch (len) 124933965Sjdp { 125033965Sjdp case 8: *to++ = *from++; 125133965Sjdp case 7: *to++ = *from++; 125233965Sjdp case 6: *to++ = *from++; 125333965Sjdp case 5: *to++ = *from++; 125433965Sjdp case 4: *to++ = *from++; 125533965Sjdp case 3: *to++ = *from++; 125633965Sjdp case 2: *to++ = *from++; 125733965Sjdp case 1: *to++ = *from++; 125833965Sjdp } 125977298Sobrien } 126033965Sjdp ch = GET (); 126133965Sjdp } 126233965Sjdp } 126333965Sjdp 126433965Sjdp /* Fall through. */ 126533965Sjdp default: 126633965Sjdp de_fault: 126733965Sjdp /* Some relatively `normal' character. */ 126833965Sjdp if (state == 0) 126933965Sjdp { 127033965Sjdp state = 11; /* Now seeing label definition */ 127133965Sjdp } 127233965Sjdp else if (state == 1) 127333965Sjdp { 127433965Sjdp state = 2; /* Ditto */ 127533965Sjdp } 127633965Sjdp else if (state == 9) 127733965Sjdp { 127833965Sjdp if (lex[ch] != LEX_IS_SYMBOL_COMPONENT) 127933965Sjdp state = 3; 128033965Sjdp } 128133965Sjdp else if (state == 10) 128233965Sjdp { 128360484Sobrien if (ch == '\\') 128460484Sobrien { 128560484Sobrien /* Special handling for backslash: a backslash may 128660484Sobrien be the beginning of a formal parameter (of a 128760484Sobrien macro) following another symbol character, with 128860484Sobrien whitespace in between. If that is the case, we 128960484Sobrien output a space before the parameter. Strictly 129060484Sobrien speaking, correct handling depends upon what the 129160484Sobrien macro parameter expands into; if the parameter 129260484Sobrien expands into something which does not start with 129360484Sobrien an operand character, then we don't want to keep 129460484Sobrien the space. We don't have enough information to 129560484Sobrien make the right choice, so here we are making the 129660484Sobrien choice which is more likely to be correct. */ 129760484Sobrien PUT (' '); 129860484Sobrien } 129960484Sobrien 130033965Sjdp state = 3; 130133965Sjdp } 130233965Sjdp PUT (ch); 130333965Sjdp break; 130433965Sjdp } 130533965Sjdp } 130633965Sjdp 130733965Sjdp /*NOTREACHED*/ 130833965Sjdp 130933965Sjdp fromeof: 131033965Sjdp /* We have reached the end of the input. */ 131133965Sjdp return to - tostart; 131233965Sjdp 131333965Sjdp tofull: 131433965Sjdp /* The output buffer is full. Save any input we have not yet 131533965Sjdp processed. */ 131633965Sjdp if (fromend > from) 131733965Sjdp { 131860484Sobrien saved_input = from; 131933965Sjdp saved_input_len = fromend - from; 132033965Sjdp } 132133965Sjdp else 132260484Sobrien saved_input = NULL; 132360484Sobrien 132433965Sjdp return to - tostart; 132533965Sjdp} 132633965Sjdp 132733965Sjdp/* end of app.c */ 1328