app.c revision 33965
133965Sjdp/* This is the Assembler Pre-Processor 233965Sjdp Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 1997 333965Sjdp Free Software Foundation, Inc. 433965Sjdp 533965Sjdp This file is part of GAS, the GNU Assembler. 633965Sjdp 733965Sjdp GAS is free software; you can redistribute it and/or modify 833965Sjdp it under the terms of the GNU General Public License as published by 933965Sjdp the Free Software Foundation; either version 2, or (at your option) 1033965Sjdp any later version. 1133965Sjdp 1233965Sjdp GAS is distributed in the hope that it will be useful, 1333965Sjdp but WITHOUT ANY WARRANTY; without even the implied warranty of 1433965Sjdp MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1533965Sjdp GNU General Public License for more details. 1633965Sjdp 1733965Sjdp You should have received a copy of the GNU General Public License 1833965Sjdp along with GAS; see the file COPYING. If not, write to the Free 1933965Sjdp Software Foundation, 59 Temple Place - Suite 330, Boston, MA 2033965Sjdp 02111-1307, USA. */ 2133965Sjdp 2233965Sjdp/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */ 2333965Sjdp/* App, the assembler pre-processor. This pre-processor strips out excess 2433965Sjdp spaces, turns single-quoted characters into a decimal constant, and turns 2533965Sjdp # <number> <filename> <garbage> into a .line <number>\n.file <filename> 2633965Sjdp pair. This needs better error-handling. */ 2733965Sjdp 2833965Sjdp#include <stdio.h> 2933965Sjdp#include "as.h" /* For BAD_CASE() only */ 3033965Sjdp 3133965Sjdp#if (__STDC__ != 1) 3233965Sjdp#ifndef const 3333965Sjdp#define const /* empty */ 3433965Sjdp#endif 3533965Sjdp#endif 3633965Sjdp 3733965Sjdp/* Whether we are scrubbing in m68k MRI mode. This is different from 3833965Sjdp flag_m68k_mri, because the two flags will be affected by the .mri 3933965Sjdp pseudo-op at different times. */ 4033965Sjdpstatic int scrub_m68k_mri; 4133965Sjdp 4233965Sjdp/* The pseudo-op which switches in and out of MRI mode. See the 4333965Sjdp comment in do_scrub_chars. */ 4433965Sjdpstatic const char mri_pseudo[] = ".mri 0"; 4533965Sjdp 4633965Sjdpstatic char lex[256]; 4733965Sjdpstatic const char symbol_chars[] = 4833965Sjdp"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; 4933965Sjdp 5033965Sjdp#define LEX_IS_SYMBOL_COMPONENT 1 5133965Sjdp#define LEX_IS_WHITESPACE 2 5233965Sjdp#define LEX_IS_LINE_SEPARATOR 3 5333965Sjdp#define LEX_IS_COMMENT_START 4 5433965Sjdp#define LEX_IS_LINE_COMMENT_START 5 5533965Sjdp#define LEX_IS_TWOCHAR_COMMENT_1ST 6 5633965Sjdp#define LEX_IS_STRINGQUOTE 8 5733965Sjdp#define LEX_IS_COLON 9 5833965Sjdp#define LEX_IS_NEWLINE 10 5933965Sjdp#define LEX_IS_ONECHAR_QUOTE 11 6033965Sjdp#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT) 6133965Sjdp#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE) 6233965Sjdp#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR) 6333965Sjdp#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START) 6433965Sjdp#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START) 6533965Sjdp#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE) 6633965Sjdp 6733965Sjdpstatic int process_escape PARAMS ((int)); 6833965Sjdp 6933965Sjdp/* FIXME-soon: The entire lexer/parser thingy should be 7033965Sjdp built statically at compile time rather than dynamically 7133965Sjdp each and every time the assembler is run. xoxorich. */ 7233965Sjdp 7333965Sjdpvoid 7433965Sjdpdo_scrub_begin (m68k_mri) 7533965Sjdp int m68k_mri; 7633965Sjdp{ 7733965Sjdp const char *p; 7833965Sjdp 7933965Sjdp scrub_m68k_mri = m68k_mri; 8033965Sjdp 8133965Sjdp lex[' '] = LEX_IS_WHITESPACE; 8233965Sjdp lex['\t'] = LEX_IS_WHITESPACE; 8333965Sjdp lex['\n'] = LEX_IS_NEWLINE; 8433965Sjdp lex[';'] = LEX_IS_LINE_SEPARATOR; 8533965Sjdp lex[':'] = LEX_IS_COLON; 8633965Sjdp 8733965Sjdp if (! m68k_mri) 8833965Sjdp { 8933965Sjdp lex['"'] = LEX_IS_STRINGQUOTE; 9033965Sjdp 9133965Sjdp#ifndef TC_HPPA 9233965Sjdp lex['\''] = LEX_IS_ONECHAR_QUOTE; 9333965Sjdp#endif 9433965Sjdp 9533965Sjdp#ifdef SINGLE_QUOTE_STRINGS 9633965Sjdp lex['\''] = LEX_IS_STRINGQUOTE; 9733965Sjdp#endif 9833965Sjdp } 9933965Sjdp 10033965Sjdp /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop 10133965Sjdp in state 5 of do_scrub_chars must be changed. */ 10233965Sjdp 10333965Sjdp /* Note that these override the previous defaults, e.g. if ';' is a 10433965Sjdp comment char, then it isn't a line separator. */ 10533965Sjdp for (p = symbol_chars; *p; ++p) 10633965Sjdp { 10733965Sjdp lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; 10833965Sjdp } /* declare symbol characters */ 10933965Sjdp 11033965Sjdp /* The m68k backend wants to be able to change comment_chars. */ 11133965Sjdp#ifndef tc_comment_chars 11233965Sjdp#define tc_comment_chars comment_chars 11333965Sjdp#endif 11433965Sjdp for (p = tc_comment_chars; *p; p++) 11533965Sjdp { 11633965Sjdp lex[(unsigned char) *p] = LEX_IS_COMMENT_START; 11733965Sjdp } /* declare comment chars */ 11833965Sjdp 11933965Sjdp for (p = line_comment_chars; *p; p++) 12033965Sjdp { 12133965Sjdp lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START; 12233965Sjdp } /* declare line comment chars */ 12333965Sjdp 12433965Sjdp for (p = line_separator_chars; *p; p++) 12533965Sjdp { 12633965Sjdp lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR; 12733965Sjdp } /* declare line separators */ 12833965Sjdp 12933965Sjdp /* Only allow slash-star comments if slash is not in use. 13033965Sjdp FIXME: This isn't right. We should always permit them. */ 13133965Sjdp if (lex['/'] == 0) 13233965Sjdp { 13333965Sjdp lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST; 13433965Sjdp } 13533965Sjdp 13633965Sjdp if (m68k_mri) 13733965Sjdp { 13833965Sjdp lex['\''] = LEX_IS_STRINGQUOTE; 13933965Sjdp lex[';'] = LEX_IS_COMMENT_START; 14033965Sjdp lex['*'] = LEX_IS_LINE_COMMENT_START; 14133965Sjdp /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but 14233965Sjdp then it can't be used in an expression. */ 14333965Sjdp lex['!'] = LEX_IS_LINE_COMMENT_START; 14433965Sjdp } 14533965Sjdp} /* do_scrub_begin() */ 14633965Sjdp 14733965Sjdp/* Saved state of the scrubber */ 14833965Sjdpstatic int state; 14933965Sjdpstatic int old_state; 15033965Sjdpstatic char *out_string; 15133965Sjdpstatic char out_buf[20]; 15233965Sjdpstatic int add_newlines; 15333965Sjdpstatic char *saved_input; 15433965Sjdpstatic int saved_input_len; 15533965Sjdpstatic const char *mri_state; 15633965Sjdpstatic char mri_last_ch; 15733965Sjdp 15833965Sjdp/* Data structure for saving the state of app across #include's. Note that 15933965Sjdp app is called asynchronously to the parsing of the .include's, so our 16033965Sjdp state at the time .include is interpreted is completely unrelated. 16133965Sjdp That's why we have to save it all. */ 16233965Sjdp 16333965Sjdpstruct app_save 16433965Sjdp { 16533965Sjdp int state; 16633965Sjdp int old_state; 16733965Sjdp char *out_string; 16833965Sjdp char out_buf[sizeof (out_buf)]; 16933965Sjdp int add_newlines; 17033965Sjdp char *saved_input; 17133965Sjdp int saved_input_len; 17233965Sjdp int scrub_m68k_mri; 17333965Sjdp const char *mri_state; 17433965Sjdp char mri_last_ch; 17533965Sjdp }; 17633965Sjdp 17733965Sjdpchar * 17833965Sjdpapp_push () 17933965Sjdp{ 18033965Sjdp register struct app_save *saved; 18133965Sjdp 18233965Sjdp saved = (struct app_save *) xmalloc (sizeof (*saved)); 18333965Sjdp saved->state = state; 18433965Sjdp saved->old_state = old_state; 18533965Sjdp saved->out_string = out_string; 18633965Sjdp memcpy (saved->out_buf, out_buf, sizeof (out_buf)); 18733965Sjdp saved->add_newlines = add_newlines; 18833965Sjdp saved->saved_input = saved_input; 18933965Sjdp saved->saved_input_len = saved_input_len; 19033965Sjdp saved->scrub_m68k_mri = scrub_m68k_mri; 19133965Sjdp saved->mri_state = mri_state; 19233965Sjdp saved->mri_last_ch = mri_last_ch; 19333965Sjdp 19433965Sjdp /* do_scrub_begin() is not useful, just wastes time. */ 19533965Sjdp 19633965Sjdp state = 0; 19733965Sjdp saved_input = NULL; 19833965Sjdp 19933965Sjdp return (char *) saved; 20033965Sjdp} 20133965Sjdp 20233965Sjdpvoid 20333965Sjdpapp_pop (arg) 20433965Sjdp char *arg; 20533965Sjdp{ 20633965Sjdp register struct app_save *saved = (struct app_save *) arg; 20733965Sjdp 20833965Sjdp /* There is no do_scrub_end (). */ 20933965Sjdp state = saved->state; 21033965Sjdp old_state = saved->old_state; 21133965Sjdp out_string = saved->out_string; 21233965Sjdp memcpy (out_buf, saved->out_buf, sizeof (out_buf)); 21333965Sjdp add_newlines = saved->add_newlines; 21433965Sjdp saved_input = saved->saved_input; 21533965Sjdp saved_input_len = saved->saved_input_len; 21633965Sjdp scrub_m68k_mri = saved->scrub_m68k_mri; 21733965Sjdp mri_state = saved->mri_state; 21833965Sjdp mri_last_ch = saved->mri_last_ch; 21933965Sjdp 22033965Sjdp free (arg); 22133965Sjdp} /* app_pop() */ 22233965Sjdp 22333965Sjdp/* @@ This assumes that \n &c are the same on host and target. This is not 22433965Sjdp necessarily true. */ 22533965Sjdpstatic int 22633965Sjdpprocess_escape (ch) 22733965Sjdp int ch; 22833965Sjdp{ 22933965Sjdp switch (ch) 23033965Sjdp { 23133965Sjdp case 'b': 23233965Sjdp return '\b'; 23333965Sjdp case 'f': 23433965Sjdp return '\f'; 23533965Sjdp case 'n': 23633965Sjdp return '\n'; 23733965Sjdp case 'r': 23833965Sjdp return '\r'; 23933965Sjdp case 't': 24033965Sjdp return '\t'; 24133965Sjdp case '\'': 24233965Sjdp return '\''; 24333965Sjdp case '"': 24433965Sjdp return '\"'; 24533965Sjdp default: 24633965Sjdp return ch; 24733965Sjdp } 24833965Sjdp} 24933965Sjdp 25033965Sjdp/* This function is called to process input characters. The GET 25133965Sjdp parameter is used to retrieve more input characters. GET should 25233965Sjdp set its parameter to point to a buffer, and return the length of 25333965Sjdp the buffer; it should return 0 at end of file. The scrubbed output 25433965Sjdp characters are put into the buffer starting at TOSTART; the TOSTART 25533965Sjdp buffer is TOLEN bytes in length. The function returns the number 25633965Sjdp of scrubbed characters put into TOSTART. This will be TOLEN unless 25733965Sjdp end of file was seen. This function is arranged as a state 25833965Sjdp machine, and saves its state so that it may return at any point. 25933965Sjdp This is the way the old code used to work. */ 26033965Sjdp 26133965Sjdpint 26233965Sjdpdo_scrub_chars (get, tostart, tolen) 26333965Sjdp int (*get) PARAMS ((char **)); 26433965Sjdp char *tostart; 26533965Sjdp int tolen; 26633965Sjdp{ 26733965Sjdp char *to = tostart; 26833965Sjdp char *toend = tostart + tolen; 26933965Sjdp char *from; 27033965Sjdp char *fromend; 27133965Sjdp int fromlen; 27233965Sjdp register int ch, ch2 = 0; 27333965Sjdp 27433965Sjdp /*State 0: beginning of normal line 27533965Sjdp 1: After first whitespace on line (flush more white) 27633965Sjdp 2: After first non-white (opcode) on line (keep 1white) 27733965Sjdp 3: after second white on line (into operands) (flush white) 27833965Sjdp 4: after putting out a .line, put out digits 27933965Sjdp 5: parsing a string, then go to old-state 28033965Sjdp 6: putting out \ escape in a "d string. 28133965Sjdp 7: After putting out a .appfile, put out string. 28233965Sjdp 8: After putting out a .appfile string, flush until newline. 28333965Sjdp 9: After seeing symbol char in state 3 (keep 1white after symchar) 28433965Sjdp 10: After seeing whitespace in state 9 (keep white before symchar) 28533965Sjdp 11: After seeing a symbol character in state 0 (eg a label definition) 28633965Sjdp -1: output string in out_string and go to the state in old_state 28733965Sjdp -2: flush text until a '*' '/' is seen, then go to state old_state 28833965Sjdp */ 28933965Sjdp 29033965Sjdp /* I added states 9 and 10 because the MIPS ECOFF assembler uses 29133965Sjdp constructs like ``.loc 1 20''. This was turning into ``.loc 29233965Sjdp 120''. States 9 and 10 ensure that a space is never dropped in 29333965Sjdp between characters which could appear in a identifier. Ian 29433965Sjdp Taylor, ian@cygnus.com. 29533965Sjdp 29633965Sjdp I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works 29733965Sjdp correctly on the PA (and any other target where colons are optional). 29833965Sjdp Jeff Law, law@cs.utah.edu. */ 29933965Sjdp 30033965Sjdp /* This macro gets the next input character. */ 30133965Sjdp 30233965Sjdp#define GET() \ 30333965Sjdp (from < fromend \ 30433965Sjdp ? *from++ \ 30533965Sjdp : ((saved_input != NULL \ 30633965Sjdp ? (free (saved_input), \ 30733965Sjdp saved_input = NULL, \ 30833965Sjdp 0) \ 30933965Sjdp : 0), \ 31033965Sjdp fromlen = (*get) (&from), \ 31133965Sjdp fromend = from + fromlen, \ 31233965Sjdp (fromlen == 0 \ 31333965Sjdp ? EOF \ 31433965Sjdp : *from++))) 31533965Sjdp 31633965Sjdp /* This macro pushes a character back on the input stream. */ 31733965Sjdp 31833965Sjdp#define UNGET(uch) (*--from = (uch)) 31933965Sjdp 32033965Sjdp /* This macro puts a character into the output buffer. If this 32133965Sjdp character fills the output buffer, this macro jumps to the label 32233965Sjdp TOFULL. We use this rather ugly approach because we need to 32333965Sjdp handle two different termination conditions: EOF on the input 32433965Sjdp stream, and a full output buffer. It would be simpler if we 32533965Sjdp always read in the entire input stream before processing it, but 32633965Sjdp I don't want to make such a significant change to the assembler's 32733965Sjdp memory usage. */ 32833965Sjdp 32933965Sjdp#define PUT(pch) \ 33033965Sjdp do \ 33133965Sjdp { \ 33233965Sjdp *to++ = (pch); \ 33333965Sjdp if (to >= toend) \ 33433965Sjdp goto tofull; \ 33533965Sjdp } \ 33633965Sjdp while (0) 33733965Sjdp 33833965Sjdp if (saved_input != NULL) 33933965Sjdp { 34033965Sjdp from = saved_input; 34133965Sjdp fromend = from + saved_input_len; 34233965Sjdp } 34333965Sjdp else 34433965Sjdp { 34533965Sjdp fromlen = (*get) (&from); 34633965Sjdp if (fromlen == 0) 34733965Sjdp return 0; 34833965Sjdp fromend = from + fromlen; 34933965Sjdp } 35033965Sjdp 35133965Sjdp while (1) 35233965Sjdp { 35333965Sjdp /* The cases in this switch end with continue, in order to 35433965Sjdp branch back to the top of this while loop and generate the 35533965Sjdp next output character in the appropriate state. */ 35633965Sjdp switch (state) 35733965Sjdp { 35833965Sjdp case -1: 35933965Sjdp ch = *out_string++; 36033965Sjdp if (*out_string == '\0') 36133965Sjdp { 36233965Sjdp state = old_state; 36333965Sjdp old_state = 3; 36433965Sjdp } 36533965Sjdp PUT (ch); 36633965Sjdp continue; 36733965Sjdp 36833965Sjdp case -2: 36933965Sjdp for (;;) 37033965Sjdp { 37133965Sjdp do 37233965Sjdp { 37333965Sjdp ch = GET (); 37433965Sjdp 37533965Sjdp if (ch == EOF) 37633965Sjdp { 37733965Sjdp as_warn ("end of file in comment"); 37833965Sjdp goto fromeof; 37933965Sjdp } 38033965Sjdp 38133965Sjdp if (ch == '\n') 38233965Sjdp PUT ('\n'); 38333965Sjdp } 38433965Sjdp while (ch != '*'); 38533965Sjdp 38633965Sjdp while ((ch = GET ()) == '*') 38733965Sjdp ; 38833965Sjdp 38933965Sjdp if (ch == EOF) 39033965Sjdp { 39133965Sjdp as_warn ("end of file in comment"); 39233965Sjdp goto fromeof; 39333965Sjdp } 39433965Sjdp 39533965Sjdp if (ch == '/') 39633965Sjdp break; 39733965Sjdp 39833965Sjdp UNGET (ch); 39933965Sjdp } 40033965Sjdp 40133965Sjdp state = old_state; 40233965Sjdp UNGET (' '); 40333965Sjdp continue; 40433965Sjdp 40533965Sjdp case 4: 40633965Sjdp ch = GET (); 40733965Sjdp if (ch == EOF) 40833965Sjdp goto fromeof; 40933965Sjdp else if (ch >= '0' && ch <= '9') 41033965Sjdp PUT (ch); 41133965Sjdp else 41233965Sjdp { 41333965Sjdp while (ch != EOF && IS_WHITESPACE (ch)) 41433965Sjdp ch = GET (); 41533965Sjdp if (ch == '"') 41633965Sjdp { 41733965Sjdp UNGET (ch); 41833965Sjdp if (scrub_m68k_mri) 41933965Sjdp out_string = "\n\tappfile "; 42033965Sjdp else 42133965Sjdp out_string = "\n\t.appfile "; 42233965Sjdp old_state = 7; 42333965Sjdp state = -1; 42433965Sjdp PUT (*out_string++); 42533965Sjdp } 42633965Sjdp else 42733965Sjdp { 42833965Sjdp while (ch != EOF && ch != '\n') 42933965Sjdp ch = GET (); 43033965Sjdp state = 0; 43133965Sjdp PUT (ch); 43233965Sjdp } 43333965Sjdp } 43433965Sjdp continue; 43533965Sjdp 43633965Sjdp case 5: 43733965Sjdp /* We are going to copy everything up to a quote character, 43833965Sjdp with special handling for a backslash. We try to 43933965Sjdp optimize the copying in the simple case without using the 44033965Sjdp GET and PUT macros. */ 44133965Sjdp { 44233965Sjdp char *s; 44333965Sjdp int len; 44433965Sjdp 44533965Sjdp for (s = from; s < fromend; s++) 44633965Sjdp { 44733965Sjdp ch = *s; 44833965Sjdp /* This condition must be changed if the type of any 44933965Sjdp other character can be LEX_IS_STRINGQUOTE. */ 45033965Sjdp if (ch == '\\' 45133965Sjdp || ch == '"' 45233965Sjdp || ch == '\'' 45333965Sjdp || ch == '\n') 45433965Sjdp break; 45533965Sjdp } 45633965Sjdp len = s - from; 45733965Sjdp if (len > toend - to) 45833965Sjdp len = toend - to; 45933965Sjdp if (len > 0) 46033965Sjdp { 46133965Sjdp memcpy (to, from, len); 46233965Sjdp to += len; 46333965Sjdp from += len; 46433965Sjdp } 46533965Sjdp } 46633965Sjdp 46733965Sjdp ch = GET (); 46833965Sjdp if (ch == EOF) 46933965Sjdp { 47033965Sjdp as_warn ("end of file in string: inserted '\"'"); 47133965Sjdp state = old_state; 47233965Sjdp UNGET ('\n'); 47333965Sjdp PUT ('"'); 47433965Sjdp } 47533965Sjdp else if (lex[ch] == LEX_IS_STRINGQUOTE) 47633965Sjdp { 47733965Sjdp state = old_state; 47833965Sjdp PUT (ch); 47933965Sjdp } 48033965Sjdp#ifndef NO_STRING_ESCAPES 48133965Sjdp else if (ch == '\\') 48233965Sjdp { 48333965Sjdp state = 6; 48433965Sjdp PUT (ch); 48533965Sjdp } 48633965Sjdp#endif 48733965Sjdp else if (scrub_m68k_mri && ch == '\n') 48833965Sjdp { 48933965Sjdp /* Just quietly terminate the string. This permits lines like 49033965Sjdp bne label loop if we haven't reach end yet 49133965Sjdp */ 49233965Sjdp state = old_state; 49333965Sjdp UNGET (ch); 49433965Sjdp PUT ('\''); 49533965Sjdp } 49633965Sjdp else 49733965Sjdp { 49833965Sjdp PUT (ch); 49933965Sjdp } 50033965Sjdp continue; 50133965Sjdp 50233965Sjdp case 6: 50333965Sjdp state = 5; 50433965Sjdp ch = GET (); 50533965Sjdp switch (ch) 50633965Sjdp { 50733965Sjdp /* Handle strings broken across lines, by turning '\n' into 50833965Sjdp '\\' and 'n'. */ 50933965Sjdp case '\n': 51033965Sjdp UNGET ('n'); 51133965Sjdp add_newlines++; 51233965Sjdp PUT ('\\'); 51333965Sjdp continue; 51433965Sjdp 51533965Sjdp case '"': 51633965Sjdp case '\\': 51733965Sjdp case 'b': 51833965Sjdp case 'f': 51933965Sjdp case 'n': 52033965Sjdp case 'r': 52133965Sjdp case 't': 52233965Sjdp case 'v': 52333965Sjdp case 'x': 52433965Sjdp case 'X': 52533965Sjdp case '0': 52633965Sjdp case '1': 52733965Sjdp case '2': 52833965Sjdp case '3': 52933965Sjdp case '4': 53033965Sjdp case '5': 53133965Sjdp case '6': 53233965Sjdp case '7': 53333965Sjdp break; 53433965Sjdp#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES) 53533965Sjdp default: 53633965Sjdp as_warn ("Unknown escape '\\%c' in string: Ignored", ch); 53733965Sjdp break; 53833965Sjdp#else /* ONLY_STANDARD_ESCAPES */ 53933965Sjdp default: 54033965Sjdp /* Accept \x as x for any x */ 54133965Sjdp break; 54233965Sjdp#endif /* ONLY_STANDARD_ESCAPES */ 54333965Sjdp 54433965Sjdp case EOF: 54533965Sjdp as_warn ("End of file in string: '\"' inserted"); 54633965Sjdp PUT ('"'); 54733965Sjdp continue; 54833965Sjdp } 54933965Sjdp PUT (ch); 55033965Sjdp continue; 55133965Sjdp 55233965Sjdp case 7: 55333965Sjdp ch = GET (); 55433965Sjdp state = 5; 55533965Sjdp old_state = 8; 55633965Sjdp if (ch == EOF) 55733965Sjdp goto fromeof; 55833965Sjdp PUT (ch); 55933965Sjdp continue; 56033965Sjdp 56133965Sjdp case 8: 56233965Sjdp do 56333965Sjdp ch = GET (); 56433965Sjdp while (ch != '\n' && ch != EOF); 56533965Sjdp if (ch == EOF) 56633965Sjdp goto fromeof; 56733965Sjdp state = 0; 56833965Sjdp PUT (ch); 56933965Sjdp continue; 57033965Sjdp } 57133965Sjdp 57233965Sjdp /* OK, we are somewhere in states 0 through 4 or 9 through 11 */ 57333965Sjdp 57433965Sjdp /* flushchar: */ 57533965Sjdp ch = GET (); 57633965Sjdp 57733965Sjdp recycle: 57833965Sjdp 57933965Sjdp#ifdef TC_M68K 58033965Sjdp /* We want to have pseudo-ops which control whether we are in 58133965Sjdp MRI mode or not. Unfortunately, since m68k MRI mode affects 58233965Sjdp the scrubber, that means that we need a special purpose 58333965Sjdp recognizer here. */ 58433965Sjdp if (mri_state == NULL) 58533965Sjdp { 58633965Sjdp if ((state == 0 || state == 1) 58733965Sjdp && ch == mri_pseudo[0]) 58833965Sjdp mri_state = mri_pseudo + 1; 58933965Sjdp } 59033965Sjdp else 59133965Sjdp { 59233965Sjdp /* We advance to the next state if we find the right 59333965Sjdp character, or if we need a space character and we get any 59433965Sjdp whitespace character, or if we need a '0' and we get a 59533965Sjdp '1' (this is so that we only need one state to handle 59633965Sjdp ``.mri 0'' and ``.mri 1''). */ 59733965Sjdp if (ch != '\0' 59833965Sjdp && (*mri_state == ch 59933965Sjdp || (*mri_state == ' ' 60033965Sjdp && lex[ch] == LEX_IS_WHITESPACE) 60133965Sjdp || (*mri_state == '0' 60233965Sjdp && ch == '1'))) 60333965Sjdp { 60433965Sjdp mri_last_ch = ch; 60533965Sjdp ++mri_state; 60633965Sjdp } 60733965Sjdp else if (*mri_state != '\0' 60833965Sjdp || (lex[ch] != LEX_IS_WHITESPACE 60933965Sjdp && lex[ch] != LEX_IS_NEWLINE)) 61033965Sjdp { 61133965Sjdp /* We did not get the expected character, or we didn't 61233965Sjdp get a valid terminating character after seeing the 61333965Sjdp entire pseudo-op, so we must go back to the 61433965Sjdp beginning. */ 61533965Sjdp mri_state = NULL; 61633965Sjdp } 61733965Sjdp else 61833965Sjdp { 61933965Sjdp /* We've read the entire pseudo-op. mips_last_ch is 62033965Sjdp either '0' or '1' indicating whether to enter or 62133965Sjdp leave MRI mode. */ 62233965Sjdp do_scrub_begin (mri_last_ch == '1'); 62333965Sjdp 62433965Sjdp /* We continue handling the character as usual. The 62533965Sjdp main gas reader must also handle the .mri pseudo-op 62633965Sjdp to control expression parsing and the like. */ 62733965Sjdp } 62833965Sjdp } 62933965Sjdp#endif 63033965Sjdp 63133965Sjdp if (ch == EOF) 63233965Sjdp { 63333965Sjdp if (state != 0) 63433965Sjdp { 63533965Sjdp as_warn ("end of file not at end of a line; newline inserted"); 63633965Sjdp state = 0; 63733965Sjdp PUT ('\n'); 63833965Sjdp } 63933965Sjdp goto fromeof; 64033965Sjdp } 64133965Sjdp 64233965Sjdp switch (lex[ch]) 64333965Sjdp { 64433965Sjdp case LEX_IS_WHITESPACE: 64533965Sjdp do 64633965Sjdp { 64733965Sjdp ch = GET (); 64833965Sjdp } 64933965Sjdp while (ch != EOF && IS_WHITESPACE (ch)); 65033965Sjdp if (ch == EOF) 65133965Sjdp goto fromeof; 65233965Sjdp 65333965Sjdp if (state == 0) 65433965Sjdp { 65533965Sjdp /* Preserve a single whitespace character at the 65633965Sjdp beginning of a line. */ 65733965Sjdp state = 1; 65833965Sjdp UNGET (ch); 65933965Sjdp PUT (' '); 66033965Sjdp break; 66133965Sjdp } 66233965Sjdp 66333965Sjdp if (IS_COMMENT (ch) 66433965Sjdp || ch == '/' 66533965Sjdp || IS_LINE_SEPARATOR (ch)) 66633965Sjdp { 66733965Sjdp if (scrub_m68k_mri) 66833965Sjdp { 66933965Sjdp /* In MRI mode, we keep these spaces. */ 67033965Sjdp UNGET (ch); 67133965Sjdp PUT (' '); 67233965Sjdp break; 67333965Sjdp } 67433965Sjdp goto recycle; 67533965Sjdp } 67633965Sjdp 67733965Sjdp /* If we're in state 2 or 11, we've seen a non-white 67833965Sjdp character followed by whitespace. If the next character 67933965Sjdp is ':', this is whitespace after a label name which we 68033965Sjdp normally must ignore. In MRI mode, though, spaces are 68133965Sjdp not permitted between the label and the colon. */ 68233965Sjdp if ((state == 2 || state == 11) 68333965Sjdp && lex[ch] == LEX_IS_COLON 68433965Sjdp && ! scrub_m68k_mri) 68533965Sjdp { 68633965Sjdp state = 1; 68733965Sjdp PUT (ch); 68833965Sjdp break; 68933965Sjdp } 69033965Sjdp 69133965Sjdp switch (state) 69233965Sjdp { 69333965Sjdp case 0: 69433965Sjdp state++; 69533965Sjdp goto recycle; /* Punted leading sp */ 69633965Sjdp case 1: 69733965Sjdp /* We can arrive here if we leave a leading whitespace 69833965Sjdp character at the beginning of a line. */ 69933965Sjdp goto recycle; 70033965Sjdp case 2: 70133965Sjdp state = 3; 70233965Sjdp if (to + 1 < toend) 70333965Sjdp { 70433965Sjdp /* Optimize common case by skipping UNGET/GET. */ 70533965Sjdp PUT (' '); /* Sp after opco */ 70633965Sjdp goto recycle; 70733965Sjdp } 70833965Sjdp UNGET (ch); 70933965Sjdp PUT (' '); 71033965Sjdp break; 71133965Sjdp case 3: 71233965Sjdp if (scrub_m68k_mri) 71333965Sjdp { 71433965Sjdp /* In MRI mode, we keep these spaces. */ 71533965Sjdp UNGET (ch); 71633965Sjdp PUT (' '); 71733965Sjdp break; 71833965Sjdp } 71933965Sjdp goto recycle; /* Sp in operands */ 72033965Sjdp case 9: 72133965Sjdp case 10: 72233965Sjdp if (scrub_m68k_mri) 72333965Sjdp { 72433965Sjdp /* In MRI mode, we keep these spaces. */ 72533965Sjdp state = 3; 72633965Sjdp UNGET (ch); 72733965Sjdp PUT (' '); 72833965Sjdp break; 72933965Sjdp } 73033965Sjdp state = 10; /* Sp after symbol char */ 73133965Sjdp goto recycle; 73233965Sjdp case 11: 73333965Sjdp if (flag_m68k_mri 73433965Sjdp#ifdef LABELS_WITHOUT_COLONS 73533965Sjdp || 1 73633965Sjdp#endif 73733965Sjdp ) 73833965Sjdp state = 1; 73933965Sjdp else 74033965Sjdp { 74133965Sjdp /* We know that ch is not ':', since we tested that 74233965Sjdp case above. Therefore this is not a label, so it 74333965Sjdp must be the opcode, and we've just seen the 74433965Sjdp whitespace after it. */ 74533965Sjdp state = 3; 74633965Sjdp } 74733965Sjdp UNGET (ch); 74833965Sjdp PUT (' '); /* Sp after label definition. */ 74933965Sjdp break; 75033965Sjdp default: 75133965Sjdp BAD_CASE (state); 75233965Sjdp } 75333965Sjdp break; 75433965Sjdp 75533965Sjdp case LEX_IS_TWOCHAR_COMMENT_1ST: 75633965Sjdp ch2 = GET (); 75733965Sjdp if (ch2 == '*') 75833965Sjdp { 75933965Sjdp for (;;) 76033965Sjdp { 76133965Sjdp do 76233965Sjdp { 76333965Sjdp ch2 = GET (); 76433965Sjdp if (ch2 != EOF && IS_NEWLINE (ch2)) 76533965Sjdp add_newlines++; 76633965Sjdp } 76733965Sjdp while (ch2 != EOF && ch2 != '*'); 76833965Sjdp 76933965Sjdp while (ch2 == '*') 77033965Sjdp ch2 = GET (); 77133965Sjdp 77233965Sjdp if (ch2 == EOF || ch2 == '/') 77333965Sjdp break; 77433965Sjdp 77533965Sjdp /* This UNGET will ensure that we count newlines 77633965Sjdp correctly. */ 77733965Sjdp UNGET (ch2); 77833965Sjdp } 77933965Sjdp 78033965Sjdp if (ch2 == EOF) 78133965Sjdp as_warn ("end of file in multiline comment"); 78233965Sjdp 78333965Sjdp ch = ' '; 78433965Sjdp goto recycle; 78533965Sjdp } 78633965Sjdp else 78733965Sjdp { 78833965Sjdp if (ch2 != EOF) 78933965Sjdp UNGET (ch2); 79033965Sjdp if (state == 9 || state == 10) 79133965Sjdp state = 3; 79233965Sjdp PUT (ch); 79333965Sjdp } 79433965Sjdp break; 79533965Sjdp 79633965Sjdp case LEX_IS_STRINGQUOTE: 79733965Sjdp if (state == 10) 79833965Sjdp { 79933965Sjdp /* Preserve the whitespace in foo "bar" */ 80033965Sjdp UNGET (ch); 80133965Sjdp state = 3; 80233965Sjdp PUT (' '); 80333965Sjdp 80433965Sjdp /* PUT didn't jump out. We could just break, but we 80533965Sjdp know what will happen, so optimize a bit. */ 80633965Sjdp ch = GET (); 80733965Sjdp old_state = 3; 80833965Sjdp } 80933965Sjdp else if (state == 9) 81033965Sjdp old_state = 3; 81133965Sjdp else 81233965Sjdp old_state = state; 81333965Sjdp state = 5; 81433965Sjdp PUT (ch); 81533965Sjdp break; 81633965Sjdp 81733965Sjdp#ifndef IEEE_STYLE 81833965Sjdp case LEX_IS_ONECHAR_QUOTE: 81933965Sjdp if (state == 10) 82033965Sjdp { 82133965Sjdp /* Preserve the whitespace in foo 'b' */ 82233965Sjdp UNGET (ch); 82333965Sjdp state = 3; 82433965Sjdp PUT (' '); 82533965Sjdp break; 82633965Sjdp } 82733965Sjdp ch = GET (); 82833965Sjdp if (ch == EOF) 82933965Sjdp { 83033965Sjdp as_warn ("end of file after a one-character quote; \\0 inserted"); 83133965Sjdp ch = 0; 83233965Sjdp } 83333965Sjdp if (ch == '\\') 83433965Sjdp { 83533965Sjdp ch = GET (); 83633965Sjdp if (ch == EOF) 83733965Sjdp { 83833965Sjdp as_warn ("end of file in escape character"); 83933965Sjdp ch = '\\'; 84033965Sjdp } 84133965Sjdp else 84233965Sjdp ch = process_escape (ch); 84333965Sjdp } 84433965Sjdp sprintf (out_buf, "%d", (int) (unsigned char) ch); 84533965Sjdp 84633965Sjdp /* None of these 'x constants for us. We want 'x'. */ 84733965Sjdp if ((ch = GET ()) != '\'') 84833965Sjdp { 84933965Sjdp#ifdef REQUIRE_CHAR_CLOSE_QUOTE 85033965Sjdp as_warn ("Missing close quote: (assumed)"); 85133965Sjdp#else 85233965Sjdp if (ch != EOF) 85333965Sjdp UNGET (ch); 85433965Sjdp#endif 85533965Sjdp } 85633965Sjdp if (strlen (out_buf) == 1) 85733965Sjdp { 85833965Sjdp PUT (out_buf[0]); 85933965Sjdp break; 86033965Sjdp } 86133965Sjdp if (state == 9) 86233965Sjdp old_state = 3; 86333965Sjdp else 86433965Sjdp old_state = state; 86533965Sjdp state = -1; 86633965Sjdp out_string = out_buf; 86733965Sjdp PUT (*out_string++); 86833965Sjdp break; 86933965Sjdp#endif 87033965Sjdp 87133965Sjdp case LEX_IS_COLON: 87233965Sjdp if (state == 9 || state == 10) 87333965Sjdp state = 3; 87433965Sjdp else if (state != 3) 87533965Sjdp state = 1; 87633965Sjdp PUT (ch); 87733965Sjdp break; 87833965Sjdp 87933965Sjdp case LEX_IS_NEWLINE: 88033965Sjdp /* Roll out a bunch of newlines from inside comments, etc. */ 88133965Sjdp if (add_newlines) 88233965Sjdp { 88333965Sjdp --add_newlines; 88433965Sjdp UNGET (ch); 88533965Sjdp } 88633965Sjdp /* fall thru into... */ 88733965Sjdp 88833965Sjdp case LEX_IS_LINE_SEPARATOR: 88933965Sjdp state = 0; 89033965Sjdp PUT (ch); 89133965Sjdp break; 89233965Sjdp 89333965Sjdp case LEX_IS_LINE_COMMENT_START: 89433965Sjdp /* FIXME-someday: The two character comment stuff was badly 89533965Sjdp thought out. On i386, we want '/' as line comment start 89633965Sjdp AND we want C style comments. hence this hack. The 89733965Sjdp whole lexical process should be reworked. xoxorich. */ 89833965Sjdp if (ch == '/') 89933965Sjdp { 90033965Sjdp ch2 = GET (); 90133965Sjdp if (ch2 == '*') 90233965Sjdp { 90333965Sjdp old_state = 3; 90433965Sjdp state = -2; 90533965Sjdp break; 90633965Sjdp } 90733965Sjdp else 90833965Sjdp { 90933965Sjdp UNGET (ch2); 91033965Sjdp } 91133965Sjdp } /* bad hack */ 91233965Sjdp 91333965Sjdp if (state == 0 || state == 1) /* Only comment at start of line. */ 91433965Sjdp { 91533965Sjdp int startch; 91633965Sjdp 91733965Sjdp startch = ch; 91833965Sjdp 91933965Sjdp do 92033965Sjdp { 92133965Sjdp ch = GET (); 92233965Sjdp } 92333965Sjdp while (ch != EOF && IS_WHITESPACE (ch)); 92433965Sjdp if (ch == EOF) 92533965Sjdp { 92633965Sjdp as_warn ("end of file in comment; newline inserted"); 92733965Sjdp PUT ('\n'); 92833965Sjdp break; 92933965Sjdp } 93033965Sjdp if (ch < '0' || ch > '9' || state != 0 || startch != '#') 93133965Sjdp { 93233965Sjdp /* Not a cpp line. */ 93333965Sjdp while (ch != EOF && !IS_NEWLINE (ch)) 93433965Sjdp ch = GET (); 93533965Sjdp if (ch == EOF) 93633965Sjdp as_warn ("EOF in Comment: Newline inserted"); 93733965Sjdp state = 0; 93833965Sjdp PUT ('\n'); 93933965Sjdp break; 94033965Sjdp } 94133965Sjdp /* Loks like `# 123 "filename"' from cpp. */ 94233965Sjdp UNGET (ch); 94333965Sjdp old_state = 4; 94433965Sjdp state = -1; 94533965Sjdp if (scrub_m68k_mri) 94633965Sjdp out_string = "\tappline "; 94733965Sjdp else 94833965Sjdp out_string = "\t.appline "; 94933965Sjdp PUT (*out_string++); 95033965Sjdp break; 95133965Sjdp } 95233965Sjdp 95333965Sjdp /* We have a line comment character which is not at the 95433965Sjdp start of a line. If this is also a normal comment 95533965Sjdp character, fall through. Otherwise treat it as a default 95633965Sjdp character. */ 95733965Sjdp if (strchr (tc_comment_chars, ch) == NULL 95833965Sjdp && (! scrub_m68k_mri 95933965Sjdp || (ch != '!' && ch != '*'))) 96033965Sjdp goto de_fault; 96133965Sjdp if (scrub_m68k_mri 96233965Sjdp && (ch == '!' || ch == '*' || ch == '#') 96333965Sjdp && state != 1 96433965Sjdp && state != 10) 96533965Sjdp goto de_fault; 96633965Sjdp /* Fall through. */ 96733965Sjdp case LEX_IS_COMMENT_START: 96833965Sjdp do 96933965Sjdp { 97033965Sjdp ch = GET (); 97133965Sjdp } 97233965Sjdp while (ch != EOF && !IS_NEWLINE (ch)); 97333965Sjdp if (ch == EOF) 97433965Sjdp as_warn ("end of file in comment; newline inserted"); 97533965Sjdp state = 0; 97633965Sjdp PUT ('\n'); 97733965Sjdp break; 97833965Sjdp 97933965Sjdp case LEX_IS_SYMBOL_COMPONENT: 98033965Sjdp if (state == 10) 98133965Sjdp { 98233965Sjdp /* This is a symbol character following another symbol 98333965Sjdp character, with whitespace in between. We skipped 98433965Sjdp the whitespace earlier, so output it now. */ 98533965Sjdp UNGET (ch); 98633965Sjdp state = 3; 98733965Sjdp PUT (' '); 98833965Sjdp break; 98933965Sjdp } 99033965Sjdp 99133965Sjdp if (state == 3) 99233965Sjdp state = 9; 99333965Sjdp 99433965Sjdp /* This is a common case. Quickly copy CH and all the 99533965Sjdp following symbol component or normal characters. */ 99633965Sjdp if (to + 1 < toend && mri_state == NULL) 99733965Sjdp { 99833965Sjdp char *s; 99933965Sjdp int len; 100033965Sjdp 100133965Sjdp for (s = from; s < fromend; s++) 100233965Sjdp { 100333965Sjdp int type; 100433965Sjdp 100533965Sjdp ch2 = *s; 100633965Sjdp type = lex[ch2]; 100733965Sjdp if (type != 0 100833965Sjdp && type != LEX_IS_SYMBOL_COMPONENT) 100933965Sjdp break; 101033965Sjdp } 101133965Sjdp if (s > from) 101233965Sjdp { 101333965Sjdp /* Handle the last character normally, for 101433965Sjdp simplicity. */ 101533965Sjdp --s; 101633965Sjdp } 101733965Sjdp len = s - from; 101833965Sjdp if (len > (toend - to) - 1) 101933965Sjdp len = (toend - to) - 1; 102033965Sjdp if (len > 0) 102133965Sjdp { 102233965Sjdp PUT (ch); 102333965Sjdp if (len > 8) 102433965Sjdp { 102533965Sjdp memcpy (to, from, len); 102633965Sjdp to += len; 102733965Sjdp from += len; 102833965Sjdp } 102933965Sjdp else 103033965Sjdp { 103133965Sjdp switch (len) 103233965Sjdp { 103333965Sjdp case 8: *to++ = *from++; 103433965Sjdp case 7: *to++ = *from++; 103533965Sjdp case 6: *to++ = *from++; 103633965Sjdp case 5: *to++ = *from++; 103733965Sjdp case 4: *to++ = *from++; 103833965Sjdp case 3: *to++ = *from++; 103933965Sjdp case 2: *to++ = *from++; 104033965Sjdp case 1: *to++ = *from++; 104133965Sjdp } 104233965Sjdp } 104333965Sjdp ch = GET (); 104433965Sjdp } 104533965Sjdp } 104633965Sjdp 104733965Sjdp /* Fall through. */ 104833965Sjdp default: 104933965Sjdp de_fault: 105033965Sjdp /* Some relatively `normal' character. */ 105133965Sjdp if (state == 0) 105233965Sjdp { 105333965Sjdp state = 11; /* Now seeing label definition */ 105433965Sjdp } 105533965Sjdp else if (state == 1) 105633965Sjdp { 105733965Sjdp state = 2; /* Ditto */ 105833965Sjdp } 105933965Sjdp else if (state == 9) 106033965Sjdp { 106133965Sjdp if (lex[ch] != LEX_IS_SYMBOL_COMPONENT) 106233965Sjdp state = 3; 106333965Sjdp } 106433965Sjdp else if (state == 10) 106533965Sjdp { 106633965Sjdp state = 3; 106733965Sjdp } 106833965Sjdp PUT (ch); 106933965Sjdp break; 107033965Sjdp } 107133965Sjdp } 107233965Sjdp 107333965Sjdp /*NOTREACHED*/ 107433965Sjdp 107533965Sjdp fromeof: 107633965Sjdp /* We have reached the end of the input. */ 107733965Sjdp return to - tostart; 107833965Sjdp 107933965Sjdp tofull: 108033965Sjdp /* The output buffer is full. Save any input we have not yet 108133965Sjdp processed. */ 108233965Sjdp if (fromend > from) 108333965Sjdp { 108433965Sjdp char *save; 108533965Sjdp 108633965Sjdp save = (char *) xmalloc (fromend - from); 108733965Sjdp memcpy (save, from, fromend - from); 108833965Sjdp if (saved_input != NULL) 108933965Sjdp free (saved_input); 109033965Sjdp saved_input = save; 109133965Sjdp saved_input_len = fromend - from; 109233965Sjdp } 109333965Sjdp else 109433965Sjdp { 109533965Sjdp if (saved_input != NULL) 109633965Sjdp { 109733965Sjdp free (saved_input); 109833965Sjdp saved_input = NULL; 109933965Sjdp } 110033965Sjdp } 110133965Sjdp return to - tostart; 110233965Sjdp} 110333965Sjdp 110433965Sjdp/* end of app.c */ 1105