app.c revision 77298
133965Sjdp/* This is the Assembler Pre-Processor
260484Sobrien   Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 2000
333965Sjdp   Free Software Foundation, Inc.
433965Sjdp
533965Sjdp   This file is part of GAS, the GNU Assembler.
633965Sjdp
733965Sjdp   GAS is free software; you can redistribute it and/or modify
833965Sjdp   it under the terms of the GNU General Public License as published by
933965Sjdp   the Free Software Foundation; either version 2, or (at your option)
1033965Sjdp   any later version.
1133965Sjdp
1233965Sjdp   GAS is distributed in the hope that it will be useful,
1333965Sjdp   but WITHOUT ANY WARRANTY; without even the implied warranty of
1433965Sjdp   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1533965Sjdp   GNU General Public License for more details.
1633965Sjdp
1733965Sjdp   You should have received a copy of the GNU General Public License
1833965Sjdp   along with GAS; see the file COPYING.  If not, write to the Free
1933965Sjdp   Software Foundation, 59 Temple Place - Suite 330, Boston, MA
2033965Sjdp   02111-1307, USA.  */
2133965Sjdp
2233965Sjdp/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
2333965Sjdp/* App, the assembler pre-processor.  This pre-processor strips out excess
2433965Sjdp   spaces, turns single-quoted characters into a decimal constant, and turns
2533965Sjdp   # <number> <filename> <garbage> into a .line <number>\n.file <filename>
2633965Sjdp   pair.  This needs better error-handling.  */
2733965Sjdp
2833965Sjdp#include <stdio.h>
2933965Sjdp#include "as.h"			/* For BAD_CASE() only */
3033965Sjdp
3133965Sjdp#if (__STDC__ != 1)
3233965Sjdp#ifndef const
3333965Sjdp#define const  /* empty */
3433965Sjdp#endif
3533965Sjdp#endif
3633965Sjdp
3760484Sobrien#ifdef TC_M68K
3833965Sjdp/* Whether we are scrubbing in m68k MRI mode.  This is different from
3933965Sjdp   flag_m68k_mri, because the two flags will be affected by the .mri
4033965Sjdp   pseudo-op at different times.  */
4133965Sjdpstatic int scrub_m68k_mri;
4260484Sobrien#else
4360484Sobrien#define scrub_m68k_mri 0
4460484Sobrien#endif
4533965Sjdp
4633965Sjdp/* The pseudo-op which switches in and out of MRI mode.  See the
4733965Sjdp   comment in do_scrub_chars.  */
4833965Sjdpstatic const char mri_pseudo[] = ".mri 0";
4933965Sjdp
5060484Sobrien#if defined TC_ARM && defined OBJ_ELF
5177298Sobrien/* The pseudo-op for which we need to special-case `@' characters.
5260484Sobrien   See the comment in do_scrub_chars.  */
5360484Sobrienstatic const char   symver_pseudo[] = ".symver";
5460484Sobrienstatic const char * symver_state;
5560484Sobrien#endif
5660484Sobrien
5733965Sjdpstatic char lex[256];
5833965Sjdpstatic const char symbol_chars[] =
5933965Sjdp"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
6033965Sjdp
6133965Sjdp#define LEX_IS_SYMBOL_COMPONENT		1
6233965Sjdp#define LEX_IS_WHITESPACE		2
6333965Sjdp#define LEX_IS_LINE_SEPARATOR		3
6433965Sjdp#define LEX_IS_COMMENT_START		4
6533965Sjdp#define LEX_IS_LINE_COMMENT_START	5
6633965Sjdp#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
6733965Sjdp#define	LEX_IS_STRINGQUOTE		8
6833965Sjdp#define	LEX_IS_COLON			9
6933965Sjdp#define	LEX_IS_NEWLINE			10
7033965Sjdp#define	LEX_IS_ONECHAR_QUOTE		11
7138889Sjdp#ifdef TC_V850
7238889Sjdp#define LEX_IS_DOUBLEDASH_1ST		12
7338889Sjdp#endif
7438889Sjdp#ifdef TC_M32R
7560484Sobrien#define DOUBLEBAR_PARALLEL
7660484Sobrien#endif
7760484Sobrien#ifdef DOUBLEBAR_PARALLEL
7838889Sjdp#define LEX_IS_DOUBLEBAR_1ST		13
7938889Sjdp#endif
8033965Sjdp#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
8133965Sjdp#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
8233965Sjdp#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
8333965Sjdp#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
8433965Sjdp#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
8533965Sjdp#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
8633965Sjdp
8733965Sjdpstatic int process_escape PARAMS ((int));
8833965Sjdp
8933965Sjdp/* FIXME-soon: The entire lexer/parser thingy should be
9033965Sjdp   built statically at compile time rather than dynamically
9177298Sobrien   each and every time the assembler is run.  xoxorich.  */
9233965Sjdp
9377298Sobrienvoid
9433965Sjdpdo_scrub_begin (m68k_mri)
9560484Sobrien     int m68k_mri ATTRIBUTE_UNUSED;
9633965Sjdp{
9733965Sjdp  const char *p;
9860484Sobrien  int c;
9933965Sjdp
10033965Sjdp  lex[' '] = LEX_IS_WHITESPACE;
10133965Sjdp  lex['\t'] = LEX_IS_WHITESPACE;
10238889Sjdp  lex['\r'] = LEX_IS_WHITESPACE;
10333965Sjdp  lex['\n'] = LEX_IS_NEWLINE;
10433965Sjdp  lex[':'] = LEX_IS_COLON;
10533965Sjdp
10660484Sobrien#ifdef TC_M68K
10760484Sobrien  scrub_m68k_mri = m68k_mri;
10860484Sobrien
10933965Sjdp  if (! m68k_mri)
11060484Sobrien#endif
11133965Sjdp    {
11233965Sjdp      lex['"'] = LEX_IS_STRINGQUOTE;
11333965Sjdp
11460484Sobrien#if ! defined (TC_HPPA) && ! defined (TC_I370)
11560484Sobrien      /* I370 uses single-quotes to delimit integer, float constants */
11633965Sjdp      lex['\''] = LEX_IS_ONECHAR_QUOTE;
11733965Sjdp#endif
11833965Sjdp
11933965Sjdp#ifdef SINGLE_QUOTE_STRINGS
12033965Sjdp      lex['\''] = LEX_IS_STRINGQUOTE;
12133965Sjdp#endif
12233965Sjdp    }
12333965Sjdp
12433965Sjdp  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
12533965Sjdp     in state 5 of do_scrub_chars must be changed.  */
12633965Sjdp
12733965Sjdp  /* Note that these override the previous defaults, e.g. if ';' is a
12833965Sjdp     comment char, then it isn't a line separator.  */
12933965Sjdp  for (p = symbol_chars; *p; ++p)
13033965Sjdp    {
13133965Sjdp      lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
13233965Sjdp    }				/* declare symbol characters */
13333965Sjdp
13460484Sobrien  for (c = 128; c < 256; ++c)
13560484Sobrien    lex[c] = LEX_IS_SYMBOL_COMPONENT;
13660484Sobrien
13760484Sobrien#ifdef tc_symbol_chars
13860484Sobrien  /* This macro permits the processor to specify all characters which
13960484Sobrien     may appears in an operand.  This will prevent the scrubber from
14060484Sobrien     discarding meaningful whitespace in certain cases.  The i386
14160484Sobrien     backend uses this to support prefixes, which can confuse the
14260484Sobrien     scrubber as to whether it is parsing operands or opcodes.  */
14360484Sobrien  for (p = tc_symbol_chars; *p; ++p)
14460484Sobrien    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
14560484Sobrien#endif
14660484Sobrien
14733965Sjdp  /* The m68k backend wants to be able to change comment_chars.  */
14833965Sjdp#ifndef tc_comment_chars
14933965Sjdp#define tc_comment_chars comment_chars
15033965Sjdp#endif
15133965Sjdp  for (p = tc_comment_chars; *p; p++)
15233965Sjdp    {
15333965Sjdp      lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
15433965Sjdp    }				/* declare comment chars */
15533965Sjdp
15633965Sjdp  for (p = line_comment_chars; *p; p++)
15733965Sjdp    {
15833965Sjdp      lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
15933965Sjdp    }				/* declare line comment chars */
16033965Sjdp
16133965Sjdp  for (p = line_separator_chars; *p; p++)
16233965Sjdp    {
16333965Sjdp      lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
16433965Sjdp    }				/* declare line separators */
16533965Sjdp
16633965Sjdp  /* Only allow slash-star comments if slash is not in use.
16733965Sjdp     FIXME: This isn't right.  We should always permit them.  */
16833965Sjdp  if (lex['/'] == 0)
16933965Sjdp    {
17033965Sjdp      lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
17133965Sjdp    }
17233965Sjdp
17360484Sobrien#ifdef TC_M68K
17433965Sjdp  if (m68k_mri)
17533965Sjdp    {
17633965Sjdp      lex['\''] = LEX_IS_STRINGQUOTE;
17733965Sjdp      lex[';'] = LEX_IS_COMMENT_START;
17833965Sjdp      lex['*'] = LEX_IS_LINE_COMMENT_START;
17933965Sjdp      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
18033965Sjdp         then it can't be used in an expression.  */
18133965Sjdp      lex['!'] = LEX_IS_LINE_COMMENT_START;
18233965Sjdp    }
18360484Sobrien#endif
18438889Sjdp
18538889Sjdp#ifdef TC_V850
18638889Sjdp  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
18738889Sjdp#endif
18860484Sobrien#ifdef DOUBLEBAR_PARALLEL
18938889Sjdp  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
19038889Sjdp#endif
19160484Sobrien#ifdef TC_D30V
19260484Sobrien  /* must do this is we want VLIW instruction with "->" or "<-" */
19360484Sobrien  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
19460484Sobrien#endif
19533965Sjdp}				/* do_scrub_begin() */
19633965Sjdp
19733965Sjdp/* Saved state of the scrubber */
19833965Sjdpstatic int state;
19933965Sjdpstatic int old_state;
20033965Sjdpstatic char *out_string;
20133965Sjdpstatic char out_buf[20];
20233965Sjdpstatic int add_newlines;
20333965Sjdpstatic char *saved_input;
20433965Sjdpstatic int saved_input_len;
20560484Sobrienstatic char input_buffer[32 * 1024];
20633965Sjdpstatic const char *mri_state;
20733965Sjdpstatic char mri_last_ch;
20833965Sjdp
20933965Sjdp/* Data structure for saving the state of app across #include's.  Note that
21033965Sjdp   app is called asynchronously to the parsing of the .include's, so our
21133965Sjdp   state at the time .include is interpreted is completely unrelated.
21233965Sjdp   That's why we have to save it all.  */
21333965Sjdp
21477298Sobrienstruct app_save {
21577298Sobrien  int          state;
21677298Sobrien  int          old_state;
21777298Sobrien  char *       out_string;
21877298Sobrien  char         out_buf[sizeof (out_buf)];
21977298Sobrien  int          add_newlines;
22077298Sobrien  char *       saved_input;
22177298Sobrien  int          saved_input_len;
22260484Sobrien#ifdef TC_M68K
22377298Sobrien  int          scrub_m68k_mri;
22460484Sobrien#endif
22577298Sobrien  const char * mri_state;
22677298Sobrien  char         mri_last_ch;
22760484Sobrien#if defined TC_ARM && defined OBJ_ELF
22877298Sobrien  const char * symver_state;
22960484Sobrien#endif
23077298Sobrien};
23133965Sjdp
23233965Sjdpchar *
23333965Sjdpapp_push ()
23433965Sjdp{
23533965Sjdp  register struct app_save *saved;
23633965Sjdp
23733965Sjdp  saved = (struct app_save *) xmalloc (sizeof (*saved));
23833965Sjdp  saved->state = state;
23933965Sjdp  saved->old_state = old_state;
24033965Sjdp  saved->out_string = out_string;
24133965Sjdp  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
24233965Sjdp  saved->add_newlines = add_newlines;
24360484Sobrien  if (saved_input == NULL)
24460484Sobrien    saved->saved_input = NULL;
24560484Sobrien  else
24660484Sobrien    {
24760484Sobrien      saved->saved_input = xmalloc (saved_input_len);
24860484Sobrien      memcpy (saved->saved_input, saved_input, saved_input_len);
24960484Sobrien      saved->saved_input_len = saved_input_len;
25060484Sobrien    }
25160484Sobrien#ifdef TC_M68K
25233965Sjdp  saved->scrub_m68k_mri = scrub_m68k_mri;
25360484Sobrien#endif
25433965Sjdp  saved->mri_state = mri_state;
25533965Sjdp  saved->mri_last_ch = mri_last_ch;
25660484Sobrien#if defined TC_ARM && defined OBJ_ELF
25760484Sobrien  saved->symver_state = symver_state;
25860484Sobrien#endif
25933965Sjdp
26077298Sobrien  /* do_scrub_begin() is not useful, just wastes time.  */
26133965Sjdp
26233965Sjdp  state = 0;
26333965Sjdp  saved_input = NULL;
26433965Sjdp
26533965Sjdp  return (char *) saved;
26633965Sjdp}
26733965Sjdp
26877298Sobrienvoid
26933965Sjdpapp_pop (arg)
27033965Sjdp     char *arg;
27133965Sjdp{
27233965Sjdp  register struct app_save *saved = (struct app_save *) arg;
27333965Sjdp
27477298Sobrien  /* There is no do_scrub_end ().  */
27533965Sjdp  state = saved->state;
27633965Sjdp  old_state = saved->old_state;
27733965Sjdp  out_string = saved->out_string;
27833965Sjdp  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
27933965Sjdp  add_newlines = saved->add_newlines;
28060484Sobrien  if (saved->saved_input == NULL)
28160484Sobrien    saved_input = NULL;
28260484Sobrien  else
28360484Sobrien    {
28460484Sobrien      assert (saved->saved_input_len <= (int) (sizeof input_buffer));
28560484Sobrien      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
28660484Sobrien      saved_input = input_buffer;
28760484Sobrien      saved_input_len = saved->saved_input_len;
28860484Sobrien      free (saved->saved_input);
28960484Sobrien    }
29060484Sobrien#ifdef TC_M68K
29133965Sjdp  scrub_m68k_mri = saved->scrub_m68k_mri;
29260484Sobrien#endif
29333965Sjdp  mri_state = saved->mri_state;
29433965Sjdp  mri_last_ch = saved->mri_last_ch;
29560484Sobrien#if defined TC_ARM && defined OBJ_ELF
29660484Sobrien  symver_state = saved->symver_state;
29760484Sobrien#endif
29833965Sjdp
29933965Sjdp  free (arg);
30033965Sjdp}				/* app_pop() */
30133965Sjdp
30233965Sjdp/* @@ This assumes that \n &c are the same on host and target.  This is not
30333965Sjdp   necessarily true.  */
30477298Sobrienstatic int
30533965Sjdpprocess_escape (ch)
30633965Sjdp     int ch;
30733965Sjdp{
30833965Sjdp  switch (ch)
30933965Sjdp    {
31033965Sjdp    case 'b':
31133965Sjdp      return '\b';
31233965Sjdp    case 'f':
31333965Sjdp      return '\f';
31433965Sjdp    case 'n':
31533965Sjdp      return '\n';
31633965Sjdp    case 'r':
31733965Sjdp      return '\r';
31833965Sjdp    case 't':
31933965Sjdp      return '\t';
32033965Sjdp    case '\'':
32133965Sjdp      return '\'';
32233965Sjdp    case '"':
32333965Sjdp      return '\"';
32433965Sjdp    default:
32533965Sjdp      return ch;
32633965Sjdp    }
32733965Sjdp}
32833965Sjdp
32933965Sjdp/* This function is called to process input characters.  The GET
33033965Sjdp   parameter is used to retrieve more input characters.  GET should
33133965Sjdp   set its parameter to point to a buffer, and return the length of
33233965Sjdp   the buffer; it should return 0 at end of file.  The scrubbed output
33333965Sjdp   characters are put into the buffer starting at TOSTART; the TOSTART
33433965Sjdp   buffer is TOLEN bytes in length.  The function returns the number
33533965Sjdp   of scrubbed characters put into TOSTART.  This will be TOLEN unless
33633965Sjdp   end of file was seen.  This function is arranged as a state
33733965Sjdp   machine, and saves its state so that it may return at any point.
33833965Sjdp   This is the way the old code used to work.  */
33933965Sjdp
34033965Sjdpint
34133965Sjdpdo_scrub_chars (get, tostart, tolen)
34260484Sobrien     int (*get) PARAMS ((char *, int));
34333965Sjdp     char *tostart;
34433965Sjdp     int tolen;
34533965Sjdp{
34633965Sjdp  char *to = tostart;
34733965Sjdp  char *toend = tostart + tolen;
34833965Sjdp  char *from;
34933965Sjdp  char *fromend;
35033965Sjdp  int fromlen;
35133965Sjdp  register int ch, ch2 = 0;
35233965Sjdp
35333965Sjdp  /*State 0: beginning of normal line
35433965Sjdp	  1: After first whitespace on line (flush more white)
35533965Sjdp	  2: After first non-white (opcode) on line (keep 1white)
35633965Sjdp	  3: after second white on line (into operands) (flush white)
35733965Sjdp	  4: after putting out a .line, put out digits
35833965Sjdp	  5: parsing a string, then go to old-state
35933965Sjdp	  6: putting out \ escape in a "d string.
36033965Sjdp	  7: After putting out a .appfile, put out string.
36133965Sjdp	  8: After putting out a .appfile string, flush until newline.
36233965Sjdp	  9: After seeing symbol char in state 3 (keep 1white after symchar)
36333965Sjdp	 10: After seeing whitespace in state 9 (keep white before symchar)
36433965Sjdp	 11: After seeing a symbol character in state 0 (eg a label definition)
36533965Sjdp	 -1: output string in out_string and go to the state in old_state
36633965Sjdp	 -2: flush text until a '*' '/' is seen, then go to state old_state
36738889Sjdp#ifdef TC_V850
36838889Sjdp         12: After seeing a dash, looking for a second dash as a start of comment.
36938889Sjdp#endif
37060484Sobrien#ifdef DOUBLEBAR_PARALLEL
37138889Sjdp	 13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
37238889Sjdp#endif
37333965Sjdp	  */
37433965Sjdp
37533965Sjdp  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
37633965Sjdp     constructs like ``.loc 1 20''.  This was turning into ``.loc
37733965Sjdp     120''.  States 9 and 10 ensure that a space is never dropped in
37833965Sjdp     between characters which could appear in a identifier.  Ian
37933965Sjdp     Taylor, ian@cygnus.com.
38033965Sjdp
38133965Sjdp     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
38233965Sjdp     correctly on the PA (and any other target where colons are optional).
38338889Sjdp     Jeff Law, law@cs.utah.edu.
38433965Sjdp
38538889Sjdp     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
38638889Sjdp     get squashed into "cmp r1,r2||trap#1", with the all important space
38738889Sjdp     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
38838889Sjdp
38933965Sjdp  /* This macro gets the next input character.  */
39033965Sjdp
39160484Sobrien#define GET()							\
39260484Sobrien  (from < fromend						\
39360484Sobrien   ? * (unsigned char *) (from++)				\
39460484Sobrien   : (saved_input = NULL,					\
39560484Sobrien      fromlen = (*get) (input_buffer, sizeof input_buffer),	\
39660484Sobrien      from = input_buffer,					\
39760484Sobrien      fromend = from + fromlen,					\
39860484Sobrien      (fromlen == 0						\
39960484Sobrien       ? EOF							\
40060484Sobrien       : * (unsigned char *) (from++))))
40133965Sjdp
40233965Sjdp  /* This macro pushes a character back on the input stream.  */
40333965Sjdp
40433965Sjdp#define UNGET(uch) (*--from = (uch))
40533965Sjdp
40633965Sjdp  /* This macro puts a character into the output buffer.  If this
40733965Sjdp     character fills the output buffer, this macro jumps to the label
40833965Sjdp     TOFULL.  We use this rather ugly approach because we need to
40933965Sjdp     handle two different termination conditions: EOF on the input
41033965Sjdp     stream, and a full output buffer.  It would be simpler if we
41133965Sjdp     always read in the entire input stream before processing it, but
41233965Sjdp     I don't want to make such a significant change to the assembler's
41333965Sjdp     memory usage.  */
41433965Sjdp
41533965Sjdp#define PUT(pch)			\
41633965Sjdp  do					\
41733965Sjdp    {					\
41833965Sjdp      *to++ = (pch);			\
41933965Sjdp      if (to >= toend)			\
42033965Sjdp        goto tofull;			\
42133965Sjdp    }					\
42233965Sjdp  while (0)
42333965Sjdp
42433965Sjdp  if (saved_input != NULL)
42533965Sjdp    {
42633965Sjdp      from = saved_input;
42733965Sjdp      fromend = from + saved_input_len;
42833965Sjdp    }
42933965Sjdp  else
43033965Sjdp    {
43160484Sobrien      fromlen = (*get) (input_buffer, sizeof input_buffer);
43233965Sjdp      if (fromlen == 0)
43333965Sjdp	return 0;
43460484Sobrien      from = input_buffer;
43533965Sjdp      fromend = from + fromlen;
43633965Sjdp    }
43733965Sjdp
43833965Sjdp  while (1)
43933965Sjdp    {
44033965Sjdp      /* The cases in this switch end with continue, in order to
44133965Sjdp         branch back to the top of this while loop and generate the
44233965Sjdp         next output character in the appropriate state.  */
44333965Sjdp      switch (state)
44433965Sjdp	{
44533965Sjdp	case -1:
44633965Sjdp	  ch = *out_string++;
44733965Sjdp	  if (*out_string == '\0')
44833965Sjdp	    {
44933965Sjdp	      state = old_state;
45033965Sjdp	      old_state = 3;
45133965Sjdp	    }
45233965Sjdp	  PUT (ch);
45333965Sjdp	  continue;
45433965Sjdp
45533965Sjdp	case -2:
45633965Sjdp	  for (;;)
45733965Sjdp	    {
45833965Sjdp	      do
45933965Sjdp		{
46033965Sjdp		  ch = GET ();
46133965Sjdp
46233965Sjdp		  if (ch == EOF)
46333965Sjdp		    {
46460484Sobrien		      as_warn (_("end of file in comment"));
46533965Sjdp		      goto fromeof;
46633965Sjdp		    }
46733965Sjdp
46833965Sjdp		  if (ch == '\n')
46933965Sjdp		    PUT ('\n');
47033965Sjdp		}
47133965Sjdp	      while (ch != '*');
47233965Sjdp
47333965Sjdp	      while ((ch = GET ()) == '*')
47433965Sjdp		;
47533965Sjdp
47633965Sjdp	      if (ch == EOF)
47733965Sjdp		{
47860484Sobrien		  as_warn (_("end of file in comment"));
47933965Sjdp		  goto fromeof;
48033965Sjdp		}
48133965Sjdp
48233965Sjdp	      if (ch == '/')
48333965Sjdp		break;
48433965Sjdp
48533965Sjdp	      UNGET (ch);
48633965Sjdp	    }
48733965Sjdp
48833965Sjdp	  state = old_state;
48933965Sjdp	  UNGET (' ');
49033965Sjdp	  continue;
49133965Sjdp
49233965Sjdp	case 4:
49333965Sjdp	  ch = GET ();
49433965Sjdp	  if (ch == EOF)
49533965Sjdp	    goto fromeof;
49633965Sjdp	  else if (ch >= '0' && ch <= '9')
49733965Sjdp	    PUT (ch);
49833965Sjdp	  else
49933965Sjdp	    {
50033965Sjdp	      while (ch != EOF && IS_WHITESPACE (ch))
50133965Sjdp		ch = GET ();
50233965Sjdp	      if (ch == '"')
50333965Sjdp		{
50433965Sjdp		  UNGET (ch);
50533965Sjdp		  if (scrub_m68k_mri)
50633965Sjdp		    out_string = "\n\tappfile ";
50733965Sjdp		  else
50833965Sjdp		    out_string = "\n\t.appfile ";
50933965Sjdp		  old_state = 7;
51033965Sjdp		  state = -1;
51133965Sjdp		  PUT (*out_string++);
51233965Sjdp		}
51333965Sjdp	      else
51433965Sjdp		{
51533965Sjdp		  while (ch != EOF && ch != '\n')
51633965Sjdp		    ch = GET ();
51733965Sjdp		  state = 0;
51833965Sjdp		  PUT (ch);
51933965Sjdp		}
52033965Sjdp	    }
52133965Sjdp	  continue;
52233965Sjdp
52333965Sjdp	case 5:
52433965Sjdp	  /* We are going to copy everything up to a quote character,
52533965Sjdp             with special handling for a backslash.  We try to
52633965Sjdp             optimize the copying in the simple case without using the
52733965Sjdp             GET and PUT macros.  */
52833965Sjdp	  {
52933965Sjdp	    char *s;
53033965Sjdp	    int len;
53133965Sjdp
53233965Sjdp	    for (s = from; s < fromend; s++)
53333965Sjdp	      {
53433965Sjdp		ch = *s;
53533965Sjdp		/* This condition must be changed if the type of any
53633965Sjdp                   other character can be LEX_IS_STRINGQUOTE.  */
53733965Sjdp		if (ch == '\\'
53833965Sjdp		    || ch == '"'
53933965Sjdp		    || ch == '\''
54033965Sjdp		    || ch == '\n')
54133965Sjdp		  break;
54233965Sjdp	      }
54333965Sjdp	    len = s - from;
54433965Sjdp	    if (len > toend - to)
54533965Sjdp	      len = toend - to;
54633965Sjdp	    if (len > 0)
54733965Sjdp	      {
54833965Sjdp		memcpy (to, from, len);
54933965Sjdp		to += len;
55033965Sjdp		from += len;
55133965Sjdp	      }
55233965Sjdp	  }
55333965Sjdp
55433965Sjdp	  ch = GET ();
55533965Sjdp	  if (ch == EOF)
55633965Sjdp	    {
55760484Sobrien	      as_warn (_("end of file in string: inserted '\"'"));
55833965Sjdp	      state = old_state;
55933965Sjdp	      UNGET ('\n');
56033965Sjdp	      PUT ('"');
56133965Sjdp	    }
56233965Sjdp	  else if (lex[ch] == LEX_IS_STRINGQUOTE)
56333965Sjdp	    {
56433965Sjdp	      state = old_state;
56533965Sjdp	      PUT (ch);
56633965Sjdp	    }
56733965Sjdp#ifndef NO_STRING_ESCAPES
56833965Sjdp	  else if (ch == '\\')
56933965Sjdp	    {
57033965Sjdp	      state = 6;
57133965Sjdp	      PUT (ch);
57233965Sjdp	    }
57333965Sjdp#endif
57433965Sjdp	  else if (scrub_m68k_mri && ch == '\n')
57533965Sjdp	    {
57633965Sjdp	      /* Just quietly terminate the string.  This permits lines like
57733965Sjdp		   bne	label	loop if we haven't reach end yet
57833965Sjdp		 */
57933965Sjdp	      state = old_state;
58033965Sjdp	      UNGET (ch);
58133965Sjdp	      PUT ('\'');
58233965Sjdp	    }
58333965Sjdp	  else
58433965Sjdp	    {
58533965Sjdp	      PUT (ch);
58633965Sjdp	    }
58733965Sjdp	  continue;
58833965Sjdp
58933965Sjdp	case 6:
59033965Sjdp	  state = 5;
59133965Sjdp	  ch = GET ();
59233965Sjdp	  switch (ch)
59333965Sjdp	    {
59433965Sjdp	      /* Handle strings broken across lines, by turning '\n' into
59533965Sjdp		 '\\' and 'n'.  */
59633965Sjdp	    case '\n':
59733965Sjdp	      UNGET ('n');
59833965Sjdp	      add_newlines++;
59933965Sjdp	      PUT ('\\');
60033965Sjdp	      continue;
60133965Sjdp
60233965Sjdp	    case '"':
60333965Sjdp	    case '\\':
60433965Sjdp	    case 'b':
60533965Sjdp	    case 'f':
60633965Sjdp	    case 'n':
60733965Sjdp	    case 'r':
60833965Sjdp	    case 't':
60933965Sjdp	    case 'v':
61033965Sjdp	    case 'x':
61133965Sjdp	    case 'X':
61233965Sjdp	    case '0':
61333965Sjdp	    case '1':
61433965Sjdp	    case '2':
61533965Sjdp	    case '3':
61633965Sjdp	    case '4':
61733965Sjdp	    case '5':
61833965Sjdp	    case '6':
61933965Sjdp	    case '7':
62033965Sjdp	      break;
62133965Sjdp#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
62233965Sjdp	    default:
62360484Sobrien	      as_warn (_("Unknown escape '\\%c' in string: Ignored"), ch);
62433965Sjdp	      break;
62533965Sjdp#else  /* ONLY_STANDARD_ESCAPES */
62633965Sjdp	    default:
62733965Sjdp	      /* Accept \x as x for any x */
62833965Sjdp	      break;
62933965Sjdp#endif /* ONLY_STANDARD_ESCAPES */
63033965Sjdp
63133965Sjdp	    case EOF:
63260484Sobrien	      as_warn (_("End of file in string: '\"' inserted"));
63333965Sjdp	      PUT ('"');
63433965Sjdp	      continue;
63533965Sjdp	    }
63633965Sjdp	  PUT (ch);
63733965Sjdp	  continue;
63833965Sjdp
63933965Sjdp	case 7:
64033965Sjdp	  ch = GET ();
64133965Sjdp	  state = 5;
64233965Sjdp	  old_state = 8;
64333965Sjdp	  if (ch == EOF)
64433965Sjdp	    goto fromeof;
64533965Sjdp	  PUT (ch);
64633965Sjdp	  continue;
64733965Sjdp
64833965Sjdp	case 8:
64933965Sjdp	  do
65033965Sjdp	    ch = GET ();
65133965Sjdp	  while (ch != '\n' && ch != EOF);
65233965Sjdp	  if (ch == EOF)
65333965Sjdp	    goto fromeof;
65433965Sjdp	  state = 0;
65533965Sjdp	  PUT (ch);
65633965Sjdp	  continue;
65733965Sjdp	}
65833965Sjdp
65933965Sjdp      /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
66033965Sjdp
66133965Sjdp      /* flushchar: */
66233965Sjdp      ch = GET ();
66333965Sjdp
66433965Sjdp    recycle:
66533965Sjdp
66660484Sobrien#if defined TC_ARM && defined OBJ_ELF
66760484Sobrien      /* We need to watch out for .symver directives.  See the comment later
66860484Sobrien	 in this function.  */
66960484Sobrien      if (symver_state == NULL)
67060484Sobrien	{
67160484Sobrien	  if ((state == 0 || state == 1) && ch == symver_pseudo[0])
67260484Sobrien	    symver_state = symver_pseudo + 1;
67360484Sobrien	}
67460484Sobrien      else
67560484Sobrien	{
67660484Sobrien	  /* We advance to the next state if we find the right
67760484Sobrien	     character.  */
67860484Sobrien	  if (ch != '\0' && (*symver_state == ch))
67960484Sobrien	    ++symver_state;
68060484Sobrien	  else if (*symver_state != '\0')
68160484Sobrien	    /* We did not get the expected character, or we didn't
68260484Sobrien	       get a valid terminating character after seeing the
68360484Sobrien	       entire pseudo-op, so we must go back to the beginning.  */
68460484Sobrien	    symver_state = NULL;
68560484Sobrien	  else
68660484Sobrien	    {
68760484Sobrien	      /* We've read the entire pseudo-op.  If this is the end
68860484Sobrien		 of the line, go back to the beginning.  */
68960484Sobrien	      if (IS_NEWLINE (ch))
69060484Sobrien		symver_state = NULL;
69160484Sobrien	    }
69260484Sobrien	}
69360484Sobrien#endif /* TC_ARM && OBJ_ELF */
69460484Sobrien
69533965Sjdp#ifdef TC_M68K
69633965Sjdp      /* We want to have pseudo-ops which control whether we are in
69733965Sjdp         MRI mode or not.  Unfortunately, since m68k MRI mode affects
69833965Sjdp         the scrubber, that means that we need a special purpose
69933965Sjdp         recognizer here.  */
70033965Sjdp      if (mri_state == NULL)
70133965Sjdp	{
70233965Sjdp	  if ((state == 0 || state == 1)
70333965Sjdp	      && ch == mri_pseudo[0])
70433965Sjdp	    mri_state = mri_pseudo + 1;
70533965Sjdp	}
70633965Sjdp      else
70733965Sjdp	{
70833965Sjdp	  /* We advance to the next state if we find the right
70933965Sjdp	     character, or if we need a space character and we get any
71033965Sjdp	     whitespace character, or if we need a '0' and we get a
71133965Sjdp	     '1' (this is so that we only need one state to handle
71233965Sjdp	     ``.mri 0'' and ``.mri 1'').  */
71333965Sjdp	  if (ch != '\0'
71433965Sjdp	      && (*mri_state == ch
71533965Sjdp		  || (*mri_state == ' '
71633965Sjdp		      && lex[ch] == LEX_IS_WHITESPACE)
71733965Sjdp		  || (*mri_state == '0'
71833965Sjdp		      && ch == '1')))
71933965Sjdp	    {
72033965Sjdp	      mri_last_ch = ch;
72133965Sjdp	      ++mri_state;
72233965Sjdp	    }
72333965Sjdp	  else if (*mri_state != '\0'
72433965Sjdp		   || (lex[ch] != LEX_IS_WHITESPACE
72533965Sjdp		       && lex[ch] != LEX_IS_NEWLINE))
72633965Sjdp	    {
72733965Sjdp	      /* We did not get the expected character, or we didn't
72833965Sjdp		 get a valid terminating character after seeing the
72933965Sjdp		 entire pseudo-op, so we must go back to the
73033965Sjdp		 beginning.  */
73133965Sjdp	      mri_state = NULL;
73233965Sjdp	    }
73333965Sjdp	  else
73433965Sjdp	    {
73533965Sjdp	      /* We've read the entire pseudo-op.  mips_last_ch is
73633965Sjdp                 either '0' or '1' indicating whether to enter or
73733965Sjdp                 leave MRI mode.  */
73833965Sjdp	      do_scrub_begin (mri_last_ch == '1');
73938889Sjdp	      mri_state = NULL;
74033965Sjdp
74133965Sjdp	      /* We continue handling the character as usual.  The
74233965Sjdp                 main gas reader must also handle the .mri pseudo-op
74333965Sjdp                 to control expression parsing and the like.  */
74433965Sjdp	    }
74533965Sjdp	}
74633965Sjdp#endif
74733965Sjdp
74833965Sjdp      if (ch == EOF)
74933965Sjdp	{
75033965Sjdp	  if (state != 0)
75133965Sjdp	    {
75260484Sobrien	      as_warn (_("end of file not at end of a line; newline inserted"));
75333965Sjdp	      state = 0;
75433965Sjdp	      PUT ('\n');
75533965Sjdp	    }
75633965Sjdp	  goto fromeof;
75733965Sjdp	}
75833965Sjdp
75933965Sjdp      switch (lex[ch])
76033965Sjdp	{
76133965Sjdp	case LEX_IS_WHITESPACE:
76233965Sjdp	  do
76333965Sjdp	    {
76433965Sjdp	      ch = GET ();
76533965Sjdp	    }
76633965Sjdp	  while (ch != EOF && IS_WHITESPACE (ch));
76733965Sjdp	  if (ch == EOF)
76833965Sjdp	    goto fromeof;
76933965Sjdp
77033965Sjdp	  if (state == 0)
77133965Sjdp	    {
77233965Sjdp	      /* Preserve a single whitespace character at the
77333965Sjdp		 beginning of a line.  */
77433965Sjdp	      state = 1;
77533965Sjdp	      UNGET (ch);
77633965Sjdp	      PUT (' ');
77733965Sjdp	      break;
77833965Sjdp	    }
77933965Sjdp
78060484Sobrien#ifdef KEEP_WHITE_AROUND_COLON
78177298Sobrien	  if (lex[ch] == LEX_IS_COLON)
78277298Sobrien	    {
78377298Sobrien	      /* Only keep this white if there's no white *after* the
78477298Sobrien                 colon.  */
78577298Sobrien	      ch2 = GET ();
78677298Sobrien	      UNGET (ch2);
78777298Sobrien	      if (!IS_WHITESPACE (ch2))
78877298Sobrien		{
78977298Sobrien		  state = 9;
79077298Sobrien		  UNGET (ch);
79177298Sobrien		  PUT (' ');
79277298Sobrien		  break;
79377298Sobrien		}
79477298Sobrien	    }
79560484Sobrien#endif
79633965Sjdp	  if (IS_COMMENT (ch)
79733965Sjdp	      || ch == '/'
79833965Sjdp	      || IS_LINE_SEPARATOR (ch))
79933965Sjdp	    {
80033965Sjdp	      if (scrub_m68k_mri)
80133965Sjdp		{
80233965Sjdp		  /* In MRI mode, we keep these spaces.  */
80333965Sjdp		  UNGET (ch);
80433965Sjdp		  PUT (' ');
80533965Sjdp		  break;
80633965Sjdp		}
80733965Sjdp	      goto recycle;
80833965Sjdp	    }
80933965Sjdp
81033965Sjdp	  /* If we're in state 2 or 11, we've seen a non-white
81133965Sjdp	     character followed by whitespace.  If the next character
81233965Sjdp	     is ':', this is whitespace after a label name which we
81333965Sjdp	     normally must ignore.  In MRI mode, though, spaces are
81433965Sjdp	     not permitted between the label and the colon.  */
81533965Sjdp	  if ((state == 2 || state == 11)
81633965Sjdp	      && lex[ch] == LEX_IS_COLON
81733965Sjdp	      && ! scrub_m68k_mri)
81833965Sjdp	    {
81933965Sjdp	      state = 1;
82033965Sjdp	      PUT (ch);
82133965Sjdp	      break;
82233965Sjdp	    }
82333965Sjdp
82433965Sjdp	  switch (state)
82533965Sjdp	    {
82633965Sjdp	    case 0:
82733965Sjdp	      state++;
82833965Sjdp	      goto recycle;	/* Punted leading sp */
82933965Sjdp	    case 1:
83033965Sjdp	      /* We can arrive here if we leave a leading whitespace
83133965Sjdp		 character at the beginning of a line.  */
83233965Sjdp	      goto recycle;
83333965Sjdp	    case 2:
83433965Sjdp	      state = 3;
83533965Sjdp	      if (to + 1 < toend)
83633965Sjdp		{
83733965Sjdp		  /* Optimize common case by skipping UNGET/GET.  */
83833965Sjdp		  PUT (' ');	/* Sp after opco */
83933965Sjdp		  goto recycle;
84033965Sjdp		}
84133965Sjdp	      UNGET (ch);
84233965Sjdp	      PUT (' ');
84333965Sjdp	      break;
84433965Sjdp	    case 3:
84533965Sjdp	      if (scrub_m68k_mri)
84633965Sjdp		{
84733965Sjdp		  /* In MRI mode, we keep these spaces.  */
84833965Sjdp		  UNGET (ch);
84933965Sjdp		  PUT (' ');
85033965Sjdp		  break;
85133965Sjdp		}
85233965Sjdp	      goto recycle;	/* Sp in operands */
85333965Sjdp	    case 9:
85433965Sjdp	    case 10:
85533965Sjdp	      if (scrub_m68k_mri)
85633965Sjdp		{
85733965Sjdp		  /* In MRI mode, we keep these spaces.  */
85833965Sjdp		  state = 3;
85933965Sjdp		  UNGET (ch);
86033965Sjdp		  PUT (' ');
86133965Sjdp		  break;
86233965Sjdp		}
86333965Sjdp	      state = 10;	/* Sp after symbol char */
86433965Sjdp	      goto recycle;
86533965Sjdp	    case 11:
86660484Sobrien	      if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
86733965Sjdp		state = 1;
86833965Sjdp	      else
86933965Sjdp		{
87033965Sjdp		  /* We know that ch is not ':', since we tested that
87133965Sjdp                     case above.  Therefore this is not a label, so it
87233965Sjdp                     must be the opcode, and we've just seen the
87333965Sjdp                     whitespace after it.  */
87433965Sjdp		  state = 3;
87533965Sjdp		}
87633965Sjdp	      UNGET (ch);
87733965Sjdp	      PUT (' ');	/* Sp after label definition.  */
87833965Sjdp	      break;
87933965Sjdp	    default:
88033965Sjdp	      BAD_CASE (state);
88133965Sjdp	    }
88233965Sjdp	  break;
88333965Sjdp
88433965Sjdp	case LEX_IS_TWOCHAR_COMMENT_1ST:
88533965Sjdp	  ch2 = GET ();
88633965Sjdp	  if (ch2 == '*')
88733965Sjdp	    {
88833965Sjdp	      for (;;)
88933965Sjdp		{
89033965Sjdp		  do
89133965Sjdp		    {
89233965Sjdp		      ch2 = GET ();
89333965Sjdp		      if (ch2 != EOF && IS_NEWLINE (ch2))
89433965Sjdp			add_newlines++;
89533965Sjdp		    }
89633965Sjdp		  while (ch2 != EOF && ch2 != '*');
89733965Sjdp
89833965Sjdp		  while (ch2 == '*')
89933965Sjdp		    ch2 = GET ();
90033965Sjdp
90133965Sjdp		  if (ch2 == EOF || ch2 == '/')
90233965Sjdp		    break;
90333965Sjdp
90433965Sjdp		  /* This UNGET will ensure that we count newlines
90533965Sjdp                     correctly.  */
90633965Sjdp		  UNGET (ch2);
90733965Sjdp		}
90833965Sjdp
90933965Sjdp	      if (ch2 == EOF)
91060484Sobrien		as_warn (_("end of file in multiline comment"));
91133965Sjdp
91233965Sjdp	      ch = ' ';
91333965Sjdp	      goto recycle;
91433965Sjdp	    }
91577298Sobrien#ifdef DOUBLESLASH_LINE_COMMENTS
91677298Sobrien	  else if (ch2 == '/')
91777298Sobrien	    {
91877298Sobrien	      do
91977298Sobrien		{
92077298Sobrien		  ch = GET ();
92177298Sobrien		}
92277298Sobrien	      while (ch != EOF && !IS_NEWLINE (ch));
92377298Sobrien	      if (ch == EOF)
92477298Sobrien		as_warn ("end of file in comment; newline inserted");
92577298Sobrien	      state = 0;
92677298Sobrien	      PUT ('\n');
92777298Sobrien	      break;
92877298Sobrien	    }
92977298Sobrien#endif
93033965Sjdp	  else
93133965Sjdp	    {
93233965Sjdp	      if (ch2 != EOF)
93333965Sjdp		UNGET (ch2);
93433965Sjdp	      if (state == 9 || state == 10)
93533965Sjdp		state = 3;
93633965Sjdp	      PUT (ch);
93733965Sjdp	    }
93833965Sjdp	  break;
93933965Sjdp
94033965Sjdp	case LEX_IS_STRINGQUOTE:
94133965Sjdp	  if (state == 10)
94233965Sjdp	    {
94333965Sjdp	      /* Preserve the whitespace in foo "bar" */
94433965Sjdp	      UNGET (ch);
94533965Sjdp	      state = 3;
94633965Sjdp	      PUT (' ');
94733965Sjdp
94833965Sjdp	      /* PUT didn't jump out.  We could just break, but we
94933965Sjdp                 know what will happen, so optimize a bit.  */
95033965Sjdp	      ch = GET ();
95133965Sjdp	      old_state = 3;
95233965Sjdp	    }
95333965Sjdp	  else if (state == 9)
95433965Sjdp	    old_state = 3;
95533965Sjdp	  else
95633965Sjdp	    old_state = state;
95733965Sjdp	  state = 5;
95833965Sjdp	  PUT (ch);
95933965Sjdp	  break;
96033965Sjdp
96133965Sjdp#ifndef IEEE_STYLE
96233965Sjdp	case LEX_IS_ONECHAR_QUOTE:
96333965Sjdp	  if (state == 10)
96433965Sjdp	    {
96533965Sjdp	      /* Preserve the whitespace in foo 'b' */
96633965Sjdp	      UNGET (ch);
96733965Sjdp	      state = 3;
96833965Sjdp	      PUT (' ');
96933965Sjdp	      break;
97033965Sjdp	    }
97133965Sjdp	  ch = GET ();
97233965Sjdp	  if (ch == EOF)
97333965Sjdp	    {
97460484Sobrien	      as_warn (_("end of file after a one-character quote; \\0 inserted"));
97533965Sjdp	      ch = 0;
97633965Sjdp	    }
97733965Sjdp	  if (ch == '\\')
97833965Sjdp	    {
97933965Sjdp	      ch = GET ();
98033965Sjdp	      if (ch == EOF)
98133965Sjdp		{
98260484Sobrien		  as_warn (_("end of file in escape character"));
98333965Sjdp		  ch = '\\';
98433965Sjdp		}
98533965Sjdp	      else
98633965Sjdp		ch = process_escape (ch);
98733965Sjdp	    }
98833965Sjdp	  sprintf (out_buf, "%d", (int) (unsigned char) ch);
98933965Sjdp
99033965Sjdp	  /* None of these 'x constants for us.  We want 'x'.  */
99133965Sjdp	  if ((ch = GET ()) != '\'')
99233965Sjdp	    {
99333965Sjdp#ifdef REQUIRE_CHAR_CLOSE_QUOTE
99460484Sobrien	      as_warn (_("Missing close quote: (assumed)"));
99533965Sjdp#else
99633965Sjdp	      if (ch != EOF)
99733965Sjdp		UNGET (ch);
99833965Sjdp#endif
99933965Sjdp	    }
100033965Sjdp	  if (strlen (out_buf) == 1)
100133965Sjdp	    {
100233965Sjdp	      PUT (out_buf[0]);
100333965Sjdp	      break;
100433965Sjdp	    }
100533965Sjdp	  if (state == 9)
100633965Sjdp	    old_state = 3;
100733965Sjdp	  else
100833965Sjdp	    old_state = state;
100933965Sjdp	  state = -1;
101033965Sjdp	  out_string = out_buf;
101133965Sjdp	  PUT (*out_string++);
101233965Sjdp	  break;
101333965Sjdp#endif
101433965Sjdp
101533965Sjdp	case LEX_IS_COLON:
101660484Sobrien#ifdef KEEP_WHITE_AROUND_COLON
101777298Sobrien	  state = 9;
101860484Sobrien#else
101933965Sjdp	  if (state == 9 || state == 10)
102033965Sjdp	    state = 3;
102133965Sjdp	  else if (state != 3)
102233965Sjdp	    state = 1;
102360484Sobrien#endif
102433965Sjdp	  PUT (ch);
102533965Sjdp	  break;
102633965Sjdp
102733965Sjdp	case LEX_IS_NEWLINE:
102833965Sjdp	  /* Roll out a bunch of newlines from inside comments, etc.  */
102933965Sjdp	  if (add_newlines)
103033965Sjdp	    {
103133965Sjdp	      --add_newlines;
103233965Sjdp	      UNGET (ch);
103333965Sjdp	    }
103477298Sobrien	  /* Fall through.  */
103533965Sjdp
103633965Sjdp	case LEX_IS_LINE_SEPARATOR:
103733965Sjdp	  state = 0;
103833965Sjdp	  PUT (ch);
103933965Sjdp	  break;
104033965Sjdp
104138889Sjdp#ifdef TC_V850
104238889Sjdp	case LEX_IS_DOUBLEDASH_1ST:
104377298Sobrien	  ch2 = GET ();
104438889Sjdp	  if (ch2 != '-')
104538889Sjdp	    {
104638889Sjdp	      UNGET (ch2);
104738889Sjdp	      goto de_fault;
104838889Sjdp	    }
104977298Sobrien	  /* Read and skip to end of line.  */
105038889Sjdp	  do
105138889Sjdp	    {
105238889Sjdp	      ch = GET ();
105338889Sjdp	    }
105438889Sjdp	  while (ch != EOF && ch != '\n');
105538889Sjdp	  if (ch == EOF)
105638889Sjdp	    {
105760484Sobrien	      as_warn (_("end of file in comment; newline inserted"));
105838889Sjdp	    }
105938889Sjdp	  state = 0;
106038889Sjdp	  PUT ('\n');
106138889Sjdp	  break;
106277298Sobrien#endif
106360484Sobrien#ifdef DOUBLEBAR_PARALLEL
106438889Sjdp	case LEX_IS_DOUBLEBAR_1ST:
106577298Sobrien	  ch2 = GET ();
106638889Sjdp	  if (ch2 != '|')
106738889Sjdp	    {
106838889Sjdp	      UNGET (ch2);
106938889Sjdp	      goto de_fault;
107038889Sjdp	    }
107138889Sjdp	  /* Reset back to state 1 and pretend that we are parsing a line from
107238889Sjdp	     just after the first white space.  */
107338889Sjdp	  state = 1;
107438889Sjdp	  PUT ('|');
107538889Sjdp	  PUT ('|');
107638889Sjdp	  break;
107777298Sobrien#endif
107833965Sjdp	case LEX_IS_LINE_COMMENT_START:
107933965Sjdp	  /* FIXME-someday: The two character comment stuff was badly
108033965Sjdp	     thought out.  On i386, we want '/' as line comment start
108133965Sjdp	     AND we want C style comments.  hence this hack.  The
108233965Sjdp	     whole lexical process should be reworked.  xoxorich.  */
108333965Sjdp	  if (ch == '/')
108433965Sjdp	    {
108533965Sjdp	      ch2 = GET ();
108633965Sjdp	      if (ch2 == '*')
108733965Sjdp		{
108833965Sjdp		  old_state = 3;
108933965Sjdp		  state = -2;
109033965Sjdp		  break;
109133965Sjdp		}
109233965Sjdp	      else
109333965Sjdp		{
109433965Sjdp		  UNGET (ch2);
109533965Sjdp		}
109633965Sjdp	    } /* bad hack */
109733965Sjdp
109833965Sjdp	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
109933965Sjdp	    {
110033965Sjdp	      int startch;
110133965Sjdp
110233965Sjdp	      startch = ch;
110333965Sjdp
110433965Sjdp	      do
110533965Sjdp		{
110633965Sjdp		  ch = GET ();
110733965Sjdp		}
110833965Sjdp	      while (ch != EOF && IS_WHITESPACE (ch));
110933965Sjdp	      if (ch == EOF)
111033965Sjdp		{
111160484Sobrien		  as_warn (_("end of file in comment; newline inserted"));
111233965Sjdp		  PUT ('\n');
111333965Sjdp		  break;
111433965Sjdp		}
111533965Sjdp	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
111633965Sjdp		{
111733965Sjdp		  /* Not a cpp line.  */
111833965Sjdp		  while (ch != EOF && !IS_NEWLINE (ch))
111933965Sjdp		    ch = GET ();
112033965Sjdp		  if (ch == EOF)
112160484Sobrien		    as_warn (_("EOF in Comment: Newline inserted"));
112233965Sjdp		  state = 0;
112333965Sjdp		  PUT ('\n');
112433965Sjdp		  break;
112533965Sjdp		}
112677298Sobrien	      /* Looks like `# 123 "filename"' from cpp.  */
112733965Sjdp	      UNGET (ch);
112833965Sjdp	      old_state = 4;
112933965Sjdp	      state = -1;
113033965Sjdp	      if (scrub_m68k_mri)
113133965Sjdp		out_string = "\tappline ";
113233965Sjdp	      else
113333965Sjdp		out_string = "\t.appline ";
113433965Sjdp	      PUT (*out_string++);
113533965Sjdp	      break;
113633965Sjdp	    }
113733965Sjdp
113838889Sjdp#ifdef TC_D10V
113938889Sjdp	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
114038889Sjdp	     Trap is the only short insn that has a first operand that is
114138889Sjdp	     neither register nor label.
114238889Sjdp	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
114377298Sobrien	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
114477298Sobrien	     already LEX_IS_LINE_COMMENT_START.  However, it is the
114577298Sobrien	     only character in line_comment_chars for d10v, hence we
114677298Sobrien	     can recognize it as such.  */
114738889Sjdp	  /* An alternative approach would be to reset the state to 1 when
114838889Sjdp	     we see '||', '<'- or '->', but that seems to be overkill.  */
114977298Sobrien	  if (state == 10)
115077298Sobrien	    PUT (' ');
115138889Sjdp#endif
115233965Sjdp	  /* We have a line comment character which is not at the
115333965Sjdp	     start of a line.  If this is also a normal comment
115433965Sjdp	     character, fall through.  Otherwise treat it as a default
115533965Sjdp	     character.  */
115633965Sjdp	  if (strchr (tc_comment_chars, ch) == NULL
115733965Sjdp	      && (! scrub_m68k_mri
115833965Sjdp		  || (ch != '!' && ch != '*')))
115933965Sjdp	    goto de_fault;
116033965Sjdp	  if (scrub_m68k_mri
116133965Sjdp	      && (ch == '!' || ch == '*' || ch == '#')
116233965Sjdp	      && state != 1
116333965Sjdp	      && state != 10)
116433965Sjdp	    goto de_fault;
116533965Sjdp	  /* Fall through.  */
116633965Sjdp	case LEX_IS_COMMENT_START:
116760484Sobrien#if defined TC_ARM && defined OBJ_ELF
116860484Sobrien	  /* On the ARM, `@' is the comment character.
116960484Sobrien	     Unfortunately this is also a special character in ELF .symver
117077298Sobrien	     directives (and .type, though we deal with those another way).
117177298Sobrien	     So we check if this line is such a directive, and treat
117277298Sobrien	     the character as default if so.  This is a hack.  */
117360484Sobrien	  if ((symver_state != NULL) && (*symver_state == 0))
117460484Sobrien	    goto de_fault;
117560484Sobrien#endif
117677298Sobrien#ifdef WARN_COMMENTS
117777298Sobrien	  if (!found_comment)
117877298Sobrien	    as_where (&found_comment_file, &found_comment);
117977298Sobrien#endif
118033965Sjdp	  do
118133965Sjdp	    {
118233965Sjdp	      ch = GET ();
118333965Sjdp	    }
118433965Sjdp	  while (ch != EOF && !IS_NEWLINE (ch));
118533965Sjdp	  if (ch == EOF)
118660484Sobrien	    as_warn (_("end of file in comment; newline inserted"));
118733965Sjdp	  state = 0;
118833965Sjdp	  PUT ('\n');
118933965Sjdp	  break;
119033965Sjdp
119133965Sjdp	case LEX_IS_SYMBOL_COMPONENT:
119233965Sjdp	  if (state == 10)
119333965Sjdp	    {
119433965Sjdp	      /* This is a symbol character following another symbol
119533965Sjdp		 character, with whitespace in between.  We skipped
119633965Sjdp		 the whitespace earlier, so output it now.  */
119733965Sjdp	      UNGET (ch);
119833965Sjdp	      state = 3;
119933965Sjdp	      PUT (' ');
120033965Sjdp	      break;
120133965Sjdp	    }
120233965Sjdp
120333965Sjdp	  if (state == 3)
120433965Sjdp	    state = 9;
120533965Sjdp
120633965Sjdp	  /* This is a common case.  Quickly copy CH and all the
120733965Sjdp             following symbol component or normal characters.  */
120860484Sobrien	  if (to + 1 < toend
120960484Sobrien	      && mri_state == NULL
121060484Sobrien#if defined TC_ARM && defined OBJ_ELF
121160484Sobrien	      && symver_state == NULL
121260484Sobrien#endif
121360484Sobrien	      )
121433965Sjdp	    {
121533965Sjdp	      char *s;
121633965Sjdp	      int len;
121733965Sjdp
121833965Sjdp	      for (s = from; s < fromend; s++)
121933965Sjdp		{
122033965Sjdp		  int type;
122133965Sjdp
122277298Sobrien		  ch2 = *(unsigned char *) s;
122333965Sjdp		  type = lex[ch2];
122433965Sjdp		  if (type != 0
122533965Sjdp		      && type != LEX_IS_SYMBOL_COMPONENT)
122633965Sjdp		    break;
122733965Sjdp		}
122833965Sjdp	      if (s > from)
122933965Sjdp		{
123033965Sjdp		  /* Handle the last character normally, for
123133965Sjdp                     simplicity.  */
123233965Sjdp		  --s;
123333965Sjdp		}
123433965Sjdp	      len = s - from;
123533965Sjdp	      if (len > (toend - to) - 1)
123633965Sjdp		len = (toend - to) - 1;
123733965Sjdp	      if (len > 0)
123833965Sjdp		{
123933965Sjdp		  PUT (ch);
124033965Sjdp		  if (len > 8)
124133965Sjdp		    {
124233965Sjdp		      memcpy (to, from, len);
124333965Sjdp		      to += len;
124433965Sjdp		      from += len;
124533965Sjdp		    }
124633965Sjdp		  else
124733965Sjdp		    {
124833965Sjdp		      switch (len)
124933965Sjdp			{
125033965Sjdp			case 8: *to++ = *from++;
125133965Sjdp			case 7: *to++ = *from++;
125233965Sjdp			case 6: *to++ = *from++;
125333965Sjdp			case 5: *to++ = *from++;
125433965Sjdp			case 4: *to++ = *from++;
125533965Sjdp			case 3: *to++ = *from++;
125633965Sjdp			case 2: *to++ = *from++;
125733965Sjdp			case 1: *to++ = *from++;
125833965Sjdp			}
125977298Sobrien		    }
126033965Sjdp		  ch = GET ();
126133965Sjdp		}
126233965Sjdp	    }
126333965Sjdp
126433965Sjdp	  /* Fall through.  */
126533965Sjdp	default:
126633965Sjdp	de_fault:
126733965Sjdp	  /* Some relatively `normal' character.  */
126833965Sjdp	  if (state == 0)
126933965Sjdp	    {
127033965Sjdp	      state = 11;	/* Now seeing label definition */
127133965Sjdp	    }
127233965Sjdp	  else if (state == 1)
127333965Sjdp	    {
127433965Sjdp	      state = 2;	/* Ditto */
127533965Sjdp	    }
127633965Sjdp	  else if (state == 9)
127733965Sjdp	    {
127833965Sjdp	      if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
127933965Sjdp		state = 3;
128033965Sjdp	    }
128133965Sjdp	  else if (state == 10)
128233965Sjdp	    {
128360484Sobrien	      if (ch == '\\')
128460484Sobrien		{
128560484Sobrien		  /* Special handling for backslash: a backslash may
128660484Sobrien		     be the beginning of a formal parameter (of a
128760484Sobrien		     macro) following another symbol character, with
128860484Sobrien		     whitespace in between.  If that is the case, we
128960484Sobrien		     output a space before the parameter.  Strictly
129060484Sobrien		     speaking, correct handling depends upon what the
129160484Sobrien		     macro parameter expands into; if the parameter
129260484Sobrien		     expands into something which does not start with
129360484Sobrien		     an operand character, then we don't want to keep
129460484Sobrien		     the space.  We don't have enough information to
129560484Sobrien		     make the right choice, so here we are making the
129660484Sobrien		     choice which is more likely to be correct.  */
129760484Sobrien		  PUT (' ');
129860484Sobrien		}
129960484Sobrien
130033965Sjdp	      state = 3;
130133965Sjdp	    }
130233965Sjdp	  PUT (ch);
130333965Sjdp	  break;
130433965Sjdp	}
130533965Sjdp    }
130633965Sjdp
130733965Sjdp  /*NOTREACHED*/
130833965Sjdp
130933965Sjdp fromeof:
131033965Sjdp  /* We have reached the end of the input.  */
131133965Sjdp  return to - tostart;
131233965Sjdp
131333965Sjdp tofull:
131433965Sjdp  /* The output buffer is full.  Save any input we have not yet
131533965Sjdp     processed.  */
131633965Sjdp  if (fromend > from)
131733965Sjdp    {
131860484Sobrien      saved_input = from;
131933965Sjdp      saved_input_len = fromend - from;
132033965Sjdp    }
132133965Sjdp  else
132260484Sobrien    saved_input = NULL;
132360484Sobrien
132433965Sjdp  return to - tostart;
132533965Sjdp}
132633965Sjdp
132733965Sjdp/* end of app.c */
1328