app.c revision 38889
133965Sjdp/* This is the Assembler Pre-Processor
233965Sjdp   Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 1997
333965Sjdp   Free Software Foundation, Inc.
433965Sjdp
533965Sjdp   This file is part of GAS, the GNU Assembler.
633965Sjdp
733965Sjdp   GAS is free software; you can redistribute it and/or modify
833965Sjdp   it under the terms of the GNU General Public License as published by
933965Sjdp   the Free Software Foundation; either version 2, or (at your option)
1033965Sjdp   any later version.
1133965Sjdp
1233965Sjdp   GAS is distributed in the hope that it will be useful,
1333965Sjdp   but WITHOUT ANY WARRANTY; without even the implied warranty of
1433965Sjdp   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1533965Sjdp   GNU General Public License for more details.
1633965Sjdp
1733965Sjdp   You should have received a copy of the GNU General Public License
1833965Sjdp   along with GAS; see the file COPYING.  If not, write to the Free
1933965Sjdp   Software Foundation, 59 Temple Place - Suite 330, Boston, MA
2033965Sjdp   02111-1307, USA.  */
2133965Sjdp
2233965Sjdp/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
2333965Sjdp/* App, the assembler pre-processor.  This pre-processor strips out excess
2433965Sjdp   spaces, turns single-quoted characters into a decimal constant, and turns
2533965Sjdp   # <number> <filename> <garbage> into a .line <number>\n.file <filename>
2633965Sjdp   pair.  This needs better error-handling.  */
2733965Sjdp
2833965Sjdp#include <stdio.h>
2933965Sjdp#include "as.h"			/* For BAD_CASE() only */
3033965Sjdp
3133965Sjdp#if (__STDC__ != 1)
3233965Sjdp#ifndef const
3333965Sjdp#define const  /* empty */
3433965Sjdp#endif
3533965Sjdp#endif
3633965Sjdp
3733965Sjdp/* Whether we are scrubbing in m68k MRI mode.  This is different from
3833965Sjdp   flag_m68k_mri, because the two flags will be affected by the .mri
3933965Sjdp   pseudo-op at different times.  */
4033965Sjdpstatic int scrub_m68k_mri;
4133965Sjdp
4233965Sjdp/* The pseudo-op which switches in and out of MRI mode.  See the
4333965Sjdp   comment in do_scrub_chars.  */
4433965Sjdpstatic const char mri_pseudo[] = ".mri 0";
4533965Sjdp
4633965Sjdpstatic char lex[256];
4733965Sjdpstatic const char symbol_chars[] =
4833965Sjdp"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
4933965Sjdp
5033965Sjdp#define LEX_IS_SYMBOL_COMPONENT		1
5133965Sjdp#define LEX_IS_WHITESPACE		2
5233965Sjdp#define LEX_IS_LINE_SEPARATOR		3
5333965Sjdp#define LEX_IS_COMMENT_START		4
5433965Sjdp#define LEX_IS_LINE_COMMENT_START	5
5533965Sjdp#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
5633965Sjdp#define	LEX_IS_STRINGQUOTE		8
5733965Sjdp#define	LEX_IS_COLON			9
5833965Sjdp#define	LEX_IS_NEWLINE			10
5933965Sjdp#define	LEX_IS_ONECHAR_QUOTE		11
6038889Sjdp#ifdef TC_V850
6138889Sjdp#define LEX_IS_DOUBLEDASH_1ST		12
6238889Sjdp#endif
6338889Sjdp#ifdef TC_M32R
6438889Sjdp#define LEX_IS_DOUBLEBAR_1ST		13
6538889Sjdp#endif
6633965Sjdp#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
6733965Sjdp#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
6833965Sjdp#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
6933965Sjdp#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
7033965Sjdp#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
7133965Sjdp#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
7233965Sjdp
7333965Sjdpstatic int process_escape PARAMS ((int));
7433965Sjdp
7533965Sjdp/* FIXME-soon: The entire lexer/parser thingy should be
7633965Sjdp   built statically at compile time rather than dynamically
7733965Sjdp   each and every time the assembler is run.  xoxorich. */
7833965Sjdp
7933965Sjdpvoid
8033965Sjdpdo_scrub_begin (m68k_mri)
8133965Sjdp     int m68k_mri;
8233965Sjdp{
8333965Sjdp  const char *p;
8433965Sjdp
8533965Sjdp  scrub_m68k_mri = m68k_mri;
8633965Sjdp
8733965Sjdp  lex[' '] = LEX_IS_WHITESPACE;
8833965Sjdp  lex['\t'] = LEX_IS_WHITESPACE;
8938889Sjdp  lex['\r'] = LEX_IS_WHITESPACE;
9033965Sjdp  lex['\n'] = LEX_IS_NEWLINE;
9133965Sjdp  lex[';'] = LEX_IS_LINE_SEPARATOR;
9233965Sjdp  lex[':'] = LEX_IS_COLON;
9333965Sjdp
9433965Sjdp  if (! m68k_mri)
9533965Sjdp    {
9633965Sjdp      lex['"'] = LEX_IS_STRINGQUOTE;
9733965Sjdp
9833965Sjdp#ifndef TC_HPPA
9933965Sjdp      lex['\''] = LEX_IS_ONECHAR_QUOTE;
10033965Sjdp#endif
10133965Sjdp
10233965Sjdp#ifdef SINGLE_QUOTE_STRINGS
10333965Sjdp      lex['\''] = LEX_IS_STRINGQUOTE;
10433965Sjdp#endif
10533965Sjdp    }
10633965Sjdp
10733965Sjdp  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
10833965Sjdp     in state 5 of do_scrub_chars must be changed.  */
10933965Sjdp
11033965Sjdp  /* Note that these override the previous defaults, e.g. if ';' is a
11133965Sjdp     comment char, then it isn't a line separator.  */
11233965Sjdp  for (p = symbol_chars; *p; ++p)
11333965Sjdp    {
11433965Sjdp      lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
11533965Sjdp    }				/* declare symbol characters */
11633965Sjdp
11733965Sjdp  /* The m68k backend wants to be able to change comment_chars.  */
11833965Sjdp#ifndef tc_comment_chars
11933965Sjdp#define tc_comment_chars comment_chars
12033965Sjdp#endif
12133965Sjdp  for (p = tc_comment_chars; *p; p++)
12233965Sjdp    {
12333965Sjdp      lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
12433965Sjdp    }				/* declare comment chars */
12533965Sjdp
12633965Sjdp  for (p = line_comment_chars; *p; p++)
12733965Sjdp    {
12833965Sjdp      lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
12933965Sjdp    }				/* declare line comment chars */
13033965Sjdp
13133965Sjdp  for (p = line_separator_chars; *p; p++)
13233965Sjdp    {
13333965Sjdp      lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
13433965Sjdp    }				/* declare line separators */
13533965Sjdp
13633965Sjdp  /* Only allow slash-star comments if slash is not in use.
13733965Sjdp     FIXME: This isn't right.  We should always permit them.  */
13833965Sjdp  if (lex['/'] == 0)
13933965Sjdp    {
14033965Sjdp      lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
14133965Sjdp    }
14233965Sjdp
14333965Sjdp  if (m68k_mri)
14433965Sjdp    {
14533965Sjdp      lex['\''] = LEX_IS_STRINGQUOTE;
14633965Sjdp      lex[';'] = LEX_IS_COMMENT_START;
14733965Sjdp      lex['*'] = LEX_IS_LINE_COMMENT_START;
14833965Sjdp      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
14933965Sjdp         then it can't be used in an expression.  */
15033965Sjdp      lex['!'] = LEX_IS_LINE_COMMENT_START;
15133965Sjdp    }
15238889Sjdp
15338889Sjdp#ifdef TC_V850
15438889Sjdp  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
15538889Sjdp#endif
15638889Sjdp#ifdef TC_M32R
15738889Sjdp  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
15838889Sjdp#endif
15933965Sjdp}				/* do_scrub_begin() */
16033965Sjdp
16133965Sjdp/* Saved state of the scrubber */
16233965Sjdpstatic int state;
16333965Sjdpstatic int old_state;
16433965Sjdpstatic char *out_string;
16533965Sjdpstatic char out_buf[20];
16633965Sjdpstatic int add_newlines;
16733965Sjdpstatic char *saved_input;
16833965Sjdpstatic int saved_input_len;
16933965Sjdpstatic const char *mri_state;
17033965Sjdpstatic char mri_last_ch;
17133965Sjdp
17233965Sjdp/* Data structure for saving the state of app across #include's.  Note that
17333965Sjdp   app is called asynchronously to the parsing of the .include's, so our
17433965Sjdp   state at the time .include is interpreted is completely unrelated.
17533965Sjdp   That's why we have to save it all.  */
17633965Sjdp
17733965Sjdpstruct app_save
17833965Sjdp  {
17933965Sjdp    int state;
18033965Sjdp    int old_state;
18133965Sjdp    char *out_string;
18233965Sjdp    char out_buf[sizeof (out_buf)];
18333965Sjdp    int add_newlines;
18433965Sjdp    char *saved_input;
18533965Sjdp    int saved_input_len;
18633965Sjdp    int scrub_m68k_mri;
18733965Sjdp    const char *mri_state;
18833965Sjdp    char mri_last_ch;
18933965Sjdp  };
19033965Sjdp
19133965Sjdpchar *
19233965Sjdpapp_push ()
19333965Sjdp{
19433965Sjdp  register struct app_save *saved;
19533965Sjdp
19633965Sjdp  saved = (struct app_save *) xmalloc (sizeof (*saved));
19733965Sjdp  saved->state = state;
19833965Sjdp  saved->old_state = old_state;
19933965Sjdp  saved->out_string = out_string;
20033965Sjdp  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
20133965Sjdp  saved->add_newlines = add_newlines;
20233965Sjdp  saved->saved_input = saved_input;
20333965Sjdp  saved->saved_input_len = saved_input_len;
20433965Sjdp  saved->scrub_m68k_mri = scrub_m68k_mri;
20533965Sjdp  saved->mri_state = mri_state;
20633965Sjdp  saved->mri_last_ch = mri_last_ch;
20733965Sjdp
20833965Sjdp  /* do_scrub_begin() is not useful, just wastes time. */
20933965Sjdp
21033965Sjdp  state = 0;
21133965Sjdp  saved_input = NULL;
21233965Sjdp
21333965Sjdp  return (char *) saved;
21433965Sjdp}
21533965Sjdp
21633965Sjdpvoid
21733965Sjdpapp_pop (arg)
21833965Sjdp     char *arg;
21933965Sjdp{
22033965Sjdp  register struct app_save *saved = (struct app_save *) arg;
22133965Sjdp
22233965Sjdp  /* There is no do_scrub_end (). */
22333965Sjdp  state = saved->state;
22433965Sjdp  old_state = saved->old_state;
22533965Sjdp  out_string = saved->out_string;
22633965Sjdp  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
22733965Sjdp  add_newlines = saved->add_newlines;
22833965Sjdp  saved_input = saved->saved_input;
22933965Sjdp  saved_input_len = saved->saved_input_len;
23033965Sjdp  scrub_m68k_mri = saved->scrub_m68k_mri;
23133965Sjdp  mri_state = saved->mri_state;
23233965Sjdp  mri_last_ch = saved->mri_last_ch;
23333965Sjdp
23433965Sjdp  free (arg);
23533965Sjdp}				/* app_pop() */
23633965Sjdp
23733965Sjdp/* @@ This assumes that \n &c are the same on host and target.  This is not
23833965Sjdp   necessarily true.  */
23933965Sjdpstatic int
24033965Sjdpprocess_escape (ch)
24133965Sjdp     int ch;
24233965Sjdp{
24333965Sjdp  switch (ch)
24433965Sjdp    {
24533965Sjdp    case 'b':
24633965Sjdp      return '\b';
24733965Sjdp    case 'f':
24833965Sjdp      return '\f';
24933965Sjdp    case 'n':
25033965Sjdp      return '\n';
25133965Sjdp    case 'r':
25233965Sjdp      return '\r';
25333965Sjdp    case 't':
25433965Sjdp      return '\t';
25533965Sjdp    case '\'':
25633965Sjdp      return '\'';
25733965Sjdp    case '"':
25833965Sjdp      return '\"';
25933965Sjdp    default:
26033965Sjdp      return ch;
26133965Sjdp    }
26233965Sjdp}
26333965Sjdp
26433965Sjdp/* This function is called to process input characters.  The GET
26533965Sjdp   parameter is used to retrieve more input characters.  GET should
26633965Sjdp   set its parameter to point to a buffer, and return the length of
26733965Sjdp   the buffer; it should return 0 at end of file.  The scrubbed output
26833965Sjdp   characters are put into the buffer starting at TOSTART; the TOSTART
26933965Sjdp   buffer is TOLEN bytes in length.  The function returns the number
27033965Sjdp   of scrubbed characters put into TOSTART.  This will be TOLEN unless
27133965Sjdp   end of file was seen.  This function is arranged as a state
27233965Sjdp   machine, and saves its state so that it may return at any point.
27333965Sjdp   This is the way the old code used to work.  */
27433965Sjdp
27533965Sjdpint
27633965Sjdpdo_scrub_chars (get, tostart, tolen)
27733965Sjdp     int (*get) PARAMS ((char **));
27833965Sjdp     char *tostart;
27933965Sjdp     int tolen;
28033965Sjdp{
28133965Sjdp  char *to = tostart;
28233965Sjdp  char *toend = tostart + tolen;
28333965Sjdp  char *from;
28433965Sjdp  char *fromend;
28533965Sjdp  int fromlen;
28633965Sjdp  register int ch, ch2 = 0;
28733965Sjdp
28833965Sjdp  /*State 0: beginning of normal line
28933965Sjdp	  1: After first whitespace on line (flush more white)
29033965Sjdp	  2: After first non-white (opcode) on line (keep 1white)
29133965Sjdp	  3: after second white on line (into operands) (flush white)
29233965Sjdp	  4: after putting out a .line, put out digits
29333965Sjdp	  5: parsing a string, then go to old-state
29433965Sjdp	  6: putting out \ escape in a "d string.
29533965Sjdp	  7: After putting out a .appfile, put out string.
29633965Sjdp	  8: After putting out a .appfile string, flush until newline.
29733965Sjdp	  9: After seeing symbol char in state 3 (keep 1white after symchar)
29833965Sjdp	 10: After seeing whitespace in state 9 (keep white before symchar)
29933965Sjdp	 11: After seeing a symbol character in state 0 (eg a label definition)
30033965Sjdp	 -1: output string in out_string and go to the state in old_state
30133965Sjdp	 -2: flush text until a '*' '/' is seen, then go to state old_state
30238889Sjdp#ifdef TC_V850
30338889Sjdp         12: After seeing a dash, looking for a second dash as a start of comment.
30438889Sjdp#endif
30538889Sjdp#ifdef TC_M32R
30638889Sjdp	 13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
30738889Sjdp#endif
30833965Sjdp	  */
30933965Sjdp
31033965Sjdp  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
31133965Sjdp     constructs like ``.loc 1 20''.  This was turning into ``.loc
31233965Sjdp     120''.  States 9 and 10 ensure that a space is never dropped in
31333965Sjdp     between characters which could appear in a identifier.  Ian
31433965Sjdp     Taylor, ian@cygnus.com.
31533965Sjdp
31633965Sjdp     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
31733965Sjdp     correctly on the PA (and any other target where colons are optional).
31838889Sjdp     Jeff Law, law@cs.utah.edu.
31933965Sjdp
32038889Sjdp     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
32138889Sjdp     get squashed into "cmp r1,r2||trap#1", with the all important space
32238889Sjdp     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
32338889Sjdp
32433965Sjdp  /* This macro gets the next input character.  */
32533965Sjdp
32633965Sjdp#define GET()				\
32733965Sjdp  (from < fromend			\
32833965Sjdp   ? *from++				\
32933965Sjdp   : ((saved_input != NULL		\
33033965Sjdp       ? (free (saved_input),		\
33133965Sjdp	  saved_input = NULL,		\
33233965Sjdp	  0)				\
33333965Sjdp       : 0),				\
33433965Sjdp      fromlen = (*get) (&from),		\
33533965Sjdp      fromend = from + fromlen,		\
33633965Sjdp      (fromlen == 0			\
33733965Sjdp       ? EOF				\
33833965Sjdp       : *from++)))
33933965Sjdp
34033965Sjdp  /* This macro pushes a character back on the input stream.  */
34133965Sjdp
34233965Sjdp#define UNGET(uch) (*--from = (uch))
34333965Sjdp
34433965Sjdp  /* This macro puts a character into the output buffer.  If this
34533965Sjdp     character fills the output buffer, this macro jumps to the label
34633965Sjdp     TOFULL.  We use this rather ugly approach because we need to
34733965Sjdp     handle two different termination conditions: EOF on the input
34833965Sjdp     stream, and a full output buffer.  It would be simpler if we
34933965Sjdp     always read in the entire input stream before processing it, but
35033965Sjdp     I don't want to make such a significant change to the assembler's
35133965Sjdp     memory usage.  */
35233965Sjdp
35333965Sjdp#define PUT(pch)			\
35433965Sjdp  do					\
35533965Sjdp    {					\
35633965Sjdp      *to++ = (pch);			\
35733965Sjdp      if (to >= toend)			\
35833965Sjdp        goto tofull;			\
35933965Sjdp    }					\
36033965Sjdp  while (0)
36133965Sjdp
36233965Sjdp  if (saved_input != NULL)
36333965Sjdp    {
36433965Sjdp      from = saved_input;
36533965Sjdp      fromend = from + saved_input_len;
36633965Sjdp    }
36733965Sjdp  else
36833965Sjdp    {
36933965Sjdp      fromlen = (*get) (&from);
37033965Sjdp      if (fromlen == 0)
37133965Sjdp	return 0;
37233965Sjdp      fromend = from + fromlen;
37333965Sjdp    }
37433965Sjdp
37533965Sjdp  while (1)
37633965Sjdp    {
37733965Sjdp      /* The cases in this switch end with continue, in order to
37833965Sjdp         branch back to the top of this while loop and generate the
37933965Sjdp         next output character in the appropriate state.  */
38033965Sjdp      switch (state)
38133965Sjdp	{
38233965Sjdp	case -1:
38333965Sjdp	  ch = *out_string++;
38433965Sjdp	  if (*out_string == '\0')
38533965Sjdp	    {
38633965Sjdp	      state = old_state;
38733965Sjdp	      old_state = 3;
38833965Sjdp	    }
38933965Sjdp	  PUT (ch);
39033965Sjdp	  continue;
39133965Sjdp
39233965Sjdp	case -2:
39333965Sjdp	  for (;;)
39433965Sjdp	    {
39533965Sjdp	      do
39633965Sjdp		{
39733965Sjdp		  ch = GET ();
39833965Sjdp
39933965Sjdp		  if (ch == EOF)
40033965Sjdp		    {
40133965Sjdp		      as_warn ("end of file in comment");
40233965Sjdp		      goto fromeof;
40333965Sjdp		    }
40433965Sjdp
40533965Sjdp		  if (ch == '\n')
40633965Sjdp		    PUT ('\n');
40733965Sjdp		}
40833965Sjdp	      while (ch != '*');
40933965Sjdp
41033965Sjdp	      while ((ch = GET ()) == '*')
41133965Sjdp		;
41233965Sjdp
41333965Sjdp	      if (ch == EOF)
41433965Sjdp		{
41533965Sjdp		  as_warn ("end of file in comment");
41633965Sjdp		  goto fromeof;
41733965Sjdp		}
41833965Sjdp
41933965Sjdp	      if (ch == '/')
42033965Sjdp		break;
42133965Sjdp
42233965Sjdp	      UNGET (ch);
42333965Sjdp	    }
42433965Sjdp
42533965Sjdp	  state = old_state;
42633965Sjdp	  UNGET (' ');
42733965Sjdp	  continue;
42833965Sjdp
42933965Sjdp	case 4:
43033965Sjdp	  ch = GET ();
43133965Sjdp	  if (ch == EOF)
43233965Sjdp	    goto fromeof;
43333965Sjdp	  else if (ch >= '0' && ch <= '9')
43433965Sjdp	    PUT (ch);
43533965Sjdp	  else
43633965Sjdp	    {
43733965Sjdp	      while (ch != EOF && IS_WHITESPACE (ch))
43833965Sjdp		ch = GET ();
43933965Sjdp	      if (ch == '"')
44033965Sjdp		{
44133965Sjdp		  UNGET (ch);
44233965Sjdp		  if (scrub_m68k_mri)
44333965Sjdp		    out_string = "\n\tappfile ";
44433965Sjdp		  else
44533965Sjdp		    out_string = "\n\t.appfile ";
44633965Sjdp		  old_state = 7;
44733965Sjdp		  state = -1;
44833965Sjdp		  PUT (*out_string++);
44933965Sjdp		}
45033965Sjdp	      else
45133965Sjdp		{
45233965Sjdp		  while (ch != EOF && ch != '\n')
45333965Sjdp		    ch = GET ();
45433965Sjdp		  state = 0;
45533965Sjdp		  PUT (ch);
45633965Sjdp		}
45733965Sjdp	    }
45833965Sjdp	  continue;
45933965Sjdp
46033965Sjdp	case 5:
46133965Sjdp	  /* We are going to copy everything up to a quote character,
46233965Sjdp             with special handling for a backslash.  We try to
46333965Sjdp             optimize the copying in the simple case without using the
46433965Sjdp             GET and PUT macros.  */
46533965Sjdp	  {
46633965Sjdp	    char *s;
46733965Sjdp	    int len;
46833965Sjdp
46933965Sjdp	    for (s = from; s < fromend; s++)
47033965Sjdp	      {
47133965Sjdp		ch = *s;
47233965Sjdp		/* This condition must be changed if the type of any
47333965Sjdp                   other character can be LEX_IS_STRINGQUOTE.  */
47433965Sjdp		if (ch == '\\'
47533965Sjdp		    || ch == '"'
47633965Sjdp		    || ch == '\''
47733965Sjdp		    || ch == '\n')
47833965Sjdp		  break;
47933965Sjdp	      }
48033965Sjdp	    len = s - from;
48133965Sjdp	    if (len > toend - to)
48233965Sjdp	      len = toend - to;
48333965Sjdp	    if (len > 0)
48433965Sjdp	      {
48533965Sjdp		memcpy (to, from, len);
48633965Sjdp		to += len;
48733965Sjdp		from += len;
48833965Sjdp	      }
48933965Sjdp	  }
49033965Sjdp
49133965Sjdp	  ch = GET ();
49233965Sjdp	  if (ch == EOF)
49333965Sjdp	    {
49433965Sjdp	      as_warn ("end of file in string: inserted '\"'");
49533965Sjdp	      state = old_state;
49633965Sjdp	      UNGET ('\n');
49733965Sjdp	      PUT ('"');
49833965Sjdp	    }
49933965Sjdp	  else if (lex[ch] == LEX_IS_STRINGQUOTE)
50033965Sjdp	    {
50133965Sjdp	      state = old_state;
50233965Sjdp	      PUT (ch);
50333965Sjdp	    }
50433965Sjdp#ifndef NO_STRING_ESCAPES
50533965Sjdp	  else if (ch == '\\')
50633965Sjdp	    {
50733965Sjdp	      state = 6;
50833965Sjdp	      PUT (ch);
50933965Sjdp	    }
51033965Sjdp#endif
51133965Sjdp	  else if (scrub_m68k_mri && ch == '\n')
51233965Sjdp	    {
51333965Sjdp	      /* Just quietly terminate the string.  This permits lines like
51433965Sjdp		   bne	label	loop if we haven't reach end yet
51533965Sjdp		 */
51633965Sjdp	      state = old_state;
51733965Sjdp	      UNGET (ch);
51833965Sjdp	      PUT ('\'');
51933965Sjdp	    }
52033965Sjdp	  else
52133965Sjdp	    {
52233965Sjdp	      PUT (ch);
52333965Sjdp	    }
52433965Sjdp	  continue;
52533965Sjdp
52633965Sjdp	case 6:
52733965Sjdp	  state = 5;
52833965Sjdp	  ch = GET ();
52933965Sjdp	  switch (ch)
53033965Sjdp	    {
53133965Sjdp	      /* Handle strings broken across lines, by turning '\n' into
53233965Sjdp		 '\\' and 'n'.  */
53333965Sjdp	    case '\n':
53433965Sjdp	      UNGET ('n');
53533965Sjdp	      add_newlines++;
53633965Sjdp	      PUT ('\\');
53733965Sjdp	      continue;
53833965Sjdp
53933965Sjdp	    case '"':
54033965Sjdp	    case '\\':
54133965Sjdp	    case 'b':
54233965Sjdp	    case 'f':
54333965Sjdp	    case 'n':
54433965Sjdp	    case 'r':
54533965Sjdp	    case 't':
54633965Sjdp	    case 'v':
54733965Sjdp	    case 'x':
54833965Sjdp	    case 'X':
54933965Sjdp	    case '0':
55033965Sjdp	    case '1':
55133965Sjdp	    case '2':
55233965Sjdp	    case '3':
55333965Sjdp	    case '4':
55433965Sjdp	    case '5':
55533965Sjdp	    case '6':
55633965Sjdp	    case '7':
55733965Sjdp	      break;
55833965Sjdp#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
55933965Sjdp	    default:
56033965Sjdp	      as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
56133965Sjdp	      break;
56233965Sjdp#else  /* ONLY_STANDARD_ESCAPES */
56333965Sjdp	    default:
56433965Sjdp	      /* Accept \x as x for any x */
56533965Sjdp	      break;
56633965Sjdp#endif /* ONLY_STANDARD_ESCAPES */
56733965Sjdp
56833965Sjdp	    case EOF:
56933965Sjdp	      as_warn ("End of file in string: '\"' inserted");
57033965Sjdp	      PUT ('"');
57133965Sjdp	      continue;
57233965Sjdp	    }
57333965Sjdp	  PUT (ch);
57433965Sjdp	  continue;
57533965Sjdp
57633965Sjdp	case 7:
57733965Sjdp	  ch = GET ();
57833965Sjdp	  state = 5;
57933965Sjdp	  old_state = 8;
58033965Sjdp	  if (ch == EOF)
58133965Sjdp	    goto fromeof;
58233965Sjdp	  PUT (ch);
58333965Sjdp	  continue;
58433965Sjdp
58533965Sjdp	case 8:
58633965Sjdp	  do
58733965Sjdp	    ch = GET ();
58833965Sjdp	  while (ch != '\n' && ch != EOF);
58933965Sjdp	  if (ch == EOF)
59033965Sjdp	    goto fromeof;
59133965Sjdp	  state = 0;
59233965Sjdp	  PUT (ch);
59333965Sjdp	  continue;
59433965Sjdp	}
59533965Sjdp
59633965Sjdp      /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
59733965Sjdp
59833965Sjdp      /* flushchar: */
59933965Sjdp      ch = GET ();
60033965Sjdp
60133965Sjdp    recycle:
60233965Sjdp
60333965Sjdp#ifdef TC_M68K
60433965Sjdp      /* We want to have pseudo-ops which control whether we are in
60533965Sjdp         MRI mode or not.  Unfortunately, since m68k MRI mode affects
60633965Sjdp         the scrubber, that means that we need a special purpose
60733965Sjdp         recognizer here.  */
60833965Sjdp      if (mri_state == NULL)
60933965Sjdp	{
61033965Sjdp	  if ((state == 0 || state == 1)
61133965Sjdp	      && ch == mri_pseudo[0])
61233965Sjdp	    mri_state = mri_pseudo + 1;
61333965Sjdp	}
61433965Sjdp      else
61533965Sjdp	{
61633965Sjdp	  /* We advance to the next state if we find the right
61733965Sjdp	     character, or if we need a space character and we get any
61833965Sjdp	     whitespace character, or if we need a '0' and we get a
61933965Sjdp	     '1' (this is so that we only need one state to handle
62033965Sjdp	     ``.mri 0'' and ``.mri 1'').  */
62133965Sjdp	  if (ch != '\0'
62233965Sjdp	      && (*mri_state == ch
62333965Sjdp		  || (*mri_state == ' '
62433965Sjdp		      && lex[ch] == LEX_IS_WHITESPACE)
62533965Sjdp		  || (*mri_state == '0'
62633965Sjdp		      && ch == '1')))
62733965Sjdp	    {
62833965Sjdp	      mri_last_ch = ch;
62933965Sjdp	      ++mri_state;
63033965Sjdp	    }
63133965Sjdp	  else if (*mri_state != '\0'
63233965Sjdp		   || (lex[ch] != LEX_IS_WHITESPACE
63333965Sjdp		       && lex[ch] != LEX_IS_NEWLINE))
63433965Sjdp	    {
63533965Sjdp	      /* We did not get the expected character, or we didn't
63633965Sjdp		 get a valid terminating character after seeing the
63733965Sjdp		 entire pseudo-op, so we must go back to the
63833965Sjdp		 beginning.  */
63933965Sjdp	      mri_state = NULL;
64033965Sjdp	    }
64133965Sjdp	  else
64233965Sjdp	    {
64333965Sjdp	      /* We've read the entire pseudo-op.  mips_last_ch is
64433965Sjdp                 either '0' or '1' indicating whether to enter or
64533965Sjdp                 leave MRI mode.  */
64633965Sjdp	      do_scrub_begin (mri_last_ch == '1');
64738889Sjdp	      mri_state = NULL;
64833965Sjdp
64933965Sjdp	      /* We continue handling the character as usual.  The
65033965Sjdp                 main gas reader must also handle the .mri pseudo-op
65133965Sjdp                 to control expression parsing and the like.  */
65233965Sjdp	    }
65333965Sjdp	}
65433965Sjdp#endif
65533965Sjdp
65633965Sjdp      if (ch == EOF)
65733965Sjdp	{
65833965Sjdp	  if (state != 0)
65933965Sjdp	    {
66033965Sjdp	      as_warn ("end of file not at end of a line; newline inserted");
66133965Sjdp	      state = 0;
66233965Sjdp	      PUT ('\n');
66333965Sjdp	    }
66433965Sjdp	  goto fromeof;
66533965Sjdp	}
66633965Sjdp
66733965Sjdp      switch (lex[ch])
66833965Sjdp	{
66933965Sjdp	case LEX_IS_WHITESPACE:
67033965Sjdp	  do
67133965Sjdp	    {
67233965Sjdp	      ch = GET ();
67333965Sjdp	    }
67433965Sjdp	  while (ch != EOF && IS_WHITESPACE (ch));
67533965Sjdp	  if (ch == EOF)
67633965Sjdp	    goto fromeof;
67733965Sjdp
67833965Sjdp	  if (state == 0)
67933965Sjdp	    {
68033965Sjdp	      /* Preserve a single whitespace character at the
68133965Sjdp		 beginning of a line.  */
68233965Sjdp	      state = 1;
68333965Sjdp	      UNGET (ch);
68433965Sjdp	      PUT (' ');
68533965Sjdp	      break;
68633965Sjdp	    }
68733965Sjdp
68833965Sjdp	  if (IS_COMMENT (ch)
68933965Sjdp	      || ch == '/'
69033965Sjdp	      || IS_LINE_SEPARATOR (ch))
69133965Sjdp	    {
69233965Sjdp	      if (scrub_m68k_mri)
69333965Sjdp		{
69433965Sjdp		  /* In MRI mode, we keep these spaces.  */
69533965Sjdp		  UNGET (ch);
69633965Sjdp		  PUT (' ');
69733965Sjdp		  break;
69833965Sjdp		}
69933965Sjdp	      goto recycle;
70033965Sjdp	    }
70133965Sjdp
70233965Sjdp	  /* If we're in state 2 or 11, we've seen a non-white
70333965Sjdp	     character followed by whitespace.  If the next character
70433965Sjdp	     is ':', this is whitespace after a label name which we
70533965Sjdp	     normally must ignore.  In MRI mode, though, spaces are
70633965Sjdp	     not permitted between the label and the colon.  */
70733965Sjdp	  if ((state == 2 || state == 11)
70833965Sjdp	      && lex[ch] == LEX_IS_COLON
70933965Sjdp	      && ! scrub_m68k_mri)
71033965Sjdp	    {
71133965Sjdp	      state = 1;
71233965Sjdp	      PUT (ch);
71333965Sjdp	      break;
71433965Sjdp	    }
71533965Sjdp
71633965Sjdp	  switch (state)
71733965Sjdp	    {
71833965Sjdp	    case 0:
71933965Sjdp	      state++;
72033965Sjdp	      goto recycle;	/* Punted leading sp */
72133965Sjdp	    case 1:
72233965Sjdp	      /* We can arrive here if we leave a leading whitespace
72333965Sjdp		 character at the beginning of a line.  */
72433965Sjdp	      goto recycle;
72533965Sjdp	    case 2:
72633965Sjdp	      state = 3;
72733965Sjdp	      if (to + 1 < toend)
72833965Sjdp		{
72933965Sjdp		  /* Optimize common case by skipping UNGET/GET.  */
73033965Sjdp		  PUT (' ');	/* Sp after opco */
73133965Sjdp		  goto recycle;
73233965Sjdp		}
73333965Sjdp	      UNGET (ch);
73433965Sjdp	      PUT (' ');
73533965Sjdp	      break;
73633965Sjdp	    case 3:
73733965Sjdp	      if (scrub_m68k_mri)
73833965Sjdp		{
73933965Sjdp		  /* In MRI mode, we keep these spaces.  */
74033965Sjdp		  UNGET (ch);
74133965Sjdp		  PUT (' ');
74233965Sjdp		  break;
74333965Sjdp		}
74433965Sjdp	      goto recycle;	/* Sp in operands */
74533965Sjdp	    case 9:
74633965Sjdp	    case 10:
74733965Sjdp	      if (scrub_m68k_mri)
74833965Sjdp		{
74933965Sjdp		  /* In MRI mode, we keep these spaces.  */
75033965Sjdp		  state = 3;
75133965Sjdp		  UNGET (ch);
75233965Sjdp		  PUT (' ');
75333965Sjdp		  break;
75433965Sjdp		}
75533965Sjdp	      state = 10;	/* Sp after symbol char */
75633965Sjdp	      goto recycle;
75733965Sjdp	    case 11:
75833965Sjdp	      if (flag_m68k_mri
75933965Sjdp#ifdef LABELS_WITHOUT_COLONS
76033965Sjdp		  || 1
76133965Sjdp#endif
76233965Sjdp		  )
76333965Sjdp		state = 1;
76433965Sjdp	      else
76533965Sjdp		{
76633965Sjdp		  /* We know that ch is not ':', since we tested that
76733965Sjdp                     case above.  Therefore this is not a label, so it
76833965Sjdp                     must be the opcode, and we've just seen the
76933965Sjdp                     whitespace after it.  */
77033965Sjdp		  state = 3;
77133965Sjdp		}
77233965Sjdp	      UNGET (ch);
77333965Sjdp	      PUT (' ');	/* Sp after label definition.  */
77433965Sjdp	      break;
77533965Sjdp	    default:
77633965Sjdp	      BAD_CASE (state);
77733965Sjdp	    }
77833965Sjdp	  break;
77933965Sjdp
78033965Sjdp	case LEX_IS_TWOCHAR_COMMENT_1ST:
78133965Sjdp	  ch2 = GET ();
78233965Sjdp	  if (ch2 == '*')
78333965Sjdp	    {
78433965Sjdp	      for (;;)
78533965Sjdp		{
78633965Sjdp		  do
78733965Sjdp		    {
78833965Sjdp		      ch2 = GET ();
78933965Sjdp		      if (ch2 != EOF && IS_NEWLINE (ch2))
79033965Sjdp			add_newlines++;
79133965Sjdp		    }
79233965Sjdp		  while (ch2 != EOF && ch2 != '*');
79333965Sjdp
79433965Sjdp		  while (ch2 == '*')
79533965Sjdp		    ch2 = GET ();
79633965Sjdp
79733965Sjdp		  if (ch2 == EOF || ch2 == '/')
79833965Sjdp		    break;
79933965Sjdp
80033965Sjdp		  /* This UNGET will ensure that we count newlines
80133965Sjdp                     correctly.  */
80233965Sjdp		  UNGET (ch2);
80333965Sjdp		}
80433965Sjdp
80533965Sjdp	      if (ch2 == EOF)
80633965Sjdp		as_warn ("end of file in multiline comment");
80733965Sjdp
80833965Sjdp	      ch = ' ';
80933965Sjdp	      goto recycle;
81033965Sjdp	    }
81133965Sjdp	  else
81233965Sjdp	    {
81333965Sjdp	      if (ch2 != EOF)
81433965Sjdp		UNGET (ch2);
81533965Sjdp	      if (state == 9 || state == 10)
81633965Sjdp		state = 3;
81733965Sjdp	      PUT (ch);
81833965Sjdp	    }
81933965Sjdp	  break;
82033965Sjdp
82133965Sjdp	case LEX_IS_STRINGQUOTE:
82233965Sjdp	  if (state == 10)
82333965Sjdp	    {
82433965Sjdp	      /* Preserve the whitespace in foo "bar" */
82533965Sjdp	      UNGET (ch);
82633965Sjdp	      state = 3;
82733965Sjdp	      PUT (' ');
82833965Sjdp
82933965Sjdp	      /* PUT didn't jump out.  We could just break, but we
83033965Sjdp                 know what will happen, so optimize a bit.  */
83133965Sjdp	      ch = GET ();
83233965Sjdp	      old_state = 3;
83333965Sjdp	    }
83433965Sjdp	  else if (state == 9)
83533965Sjdp	    old_state = 3;
83633965Sjdp	  else
83733965Sjdp	    old_state = state;
83833965Sjdp	  state = 5;
83933965Sjdp	  PUT (ch);
84033965Sjdp	  break;
84133965Sjdp
84233965Sjdp#ifndef IEEE_STYLE
84333965Sjdp	case LEX_IS_ONECHAR_QUOTE:
84433965Sjdp	  if (state == 10)
84533965Sjdp	    {
84633965Sjdp	      /* Preserve the whitespace in foo 'b' */
84733965Sjdp	      UNGET (ch);
84833965Sjdp	      state = 3;
84933965Sjdp	      PUT (' ');
85033965Sjdp	      break;
85133965Sjdp	    }
85233965Sjdp	  ch = GET ();
85333965Sjdp	  if (ch == EOF)
85433965Sjdp	    {
85533965Sjdp	      as_warn ("end of file after a one-character quote; \\0 inserted");
85633965Sjdp	      ch = 0;
85733965Sjdp	    }
85833965Sjdp	  if (ch == '\\')
85933965Sjdp	    {
86033965Sjdp	      ch = GET ();
86133965Sjdp	      if (ch == EOF)
86233965Sjdp		{
86333965Sjdp		  as_warn ("end of file in escape character");
86433965Sjdp		  ch = '\\';
86533965Sjdp		}
86633965Sjdp	      else
86733965Sjdp		ch = process_escape (ch);
86833965Sjdp	    }
86933965Sjdp	  sprintf (out_buf, "%d", (int) (unsigned char) ch);
87033965Sjdp
87133965Sjdp	  /* None of these 'x constants for us.  We want 'x'.  */
87233965Sjdp	  if ((ch = GET ()) != '\'')
87333965Sjdp	    {
87433965Sjdp#ifdef REQUIRE_CHAR_CLOSE_QUOTE
87533965Sjdp	      as_warn ("Missing close quote: (assumed)");
87633965Sjdp#else
87733965Sjdp	      if (ch != EOF)
87833965Sjdp		UNGET (ch);
87933965Sjdp#endif
88033965Sjdp	    }
88133965Sjdp	  if (strlen (out_buf) == 1)
88233965Sjdp	    {
88333965Sjdp	      PUT (out_buf[0]);
88433965Sjdp	      break;
88533965Sjdp	    }
88633965Sjdp	  if (state == 9)
88733965Sjdp	    old_state = 3;
88833965Sjdp	  else
88933965Sjdp	    old_state = state;
89033965Sjdp	  state = -1;
89133965Sjdp	  out_string = out_buf;
89233965Sjdp	  PUT (*out_string++);
89333965Sjdp	  break;
89433965Sjdp#endif
89533965Sjdp
89633965Sjdp	case LEX_IS_COLON:
89733965Sjdp	  if (state == 9 || state == 10)
89833965Sjdp	    state = 3;
89933965Sjdp	  else if (state != 3)
90033965Sjdp	    state = 1;
90133965Sjdp	  PUT (ch);
90233965Sjdp	  break;
90333965Sjdp
90433965Sjdp	case LEX_IS_NEWLINE:
90533965Sjdp	  /* Roll out a bunch of newlines from inside comments, etc.  */
90633965Sjdp	  if (add_newlines)
90733965Sjdp	    {
90833965Sjdp	      --add_newlines;
90933965Sjdp	      UNGET (ch);
91033965Sjdp	    }
91133965Sjdp	  /* fall thru into... */
91233965Sjdp
91333965Sjdp	case LEX_IS_LINE_SEPARATOR:
91433965Sjdp	  state = 0;
91533965Sjdp	  PUT (ch);
91633965Sjdp	  break;
91733965Sjdp
91838889Sjdp#ifdef TC_V850
91938889Sjdp	case LEX_IS_DOUBLEDASH_1ST:
92038889Sjdp	  ch2 = GET();
92138889Sjdp	  if (ch2 != '-')
92238889Sjdp	    {
92338889Sjdp	      UNGET (ch2);
92438889Sjdp	      goto de_fault;
92538889Sjdp	    }
92638889Sjdp	  /* read and skip to end of line */
92738889Sjdp	  do
92838889Sjdp	    {
92938889Sjdp	      ch = GET ();
93038889Sjdp	    }
93138889Sjdp	  while (ch != EOF && ch != '\n');
93238889Sjdp	  if (ch == EOF)
93338889Sjdp	    {
93438889Sjdp	      as_warn ("end of file in comment; newline inserted");
93538889Sjdp	    }
93638889Sjdp	  state = 0;
93738889Sjdp	  PUT ('\n');
93838889Sjdp	  break;
93938889Sjdp#endif
94038889Sjdp#ifdef TC_M32R
94138889Sjdp	case LEX_IS_DOUBLEBAR_1ST:
94238889Sjdp	  ch2 = GET();
94338889Sjdp	  if (ch2 != '|')
94438889Sjdp	    {
94538889Sjdp	      UNGET (ch2);
94638889Sjdp	      goto de_fault;
94738889Sjdp	    }
94838889Sjdp	  /* Reset back to state 1 and pretend that we are parsing a line from
94938889Sjdp	     just after the first white space.  */
95038889Sjdp	  state = 1;
95138889Sjdp	  PUT ('|');
95238889Sjdp	  PUT ('|');
95338889Sjdp	  break;
95438889Sjdp#endif
95533965Sjdp	case LEX_IS_LINE_COMMENT_START:
95633965Sjdp	  /* FIXME-someday: The two character comment stuff was badly
95733965Sjdp	     thought out.  On i386, we want '/' as line comment start
95833965Sjdp	     AND we want C style comments.  hence this hack.  The
95933965Sjdp	     whole lexical process should be reworked.  xoxorich.  */
96033965Sjdp	  if (ch == '/')
96133965Sjdp	    {
96233965Sjdp	      ch2 = GET ();
96333965Sjdp	      if (ch2 == '*')
96433965Sjdp		{
96533965Sjdp		  old_state = 3;
96633965Sjdp		  state = -2;
96733965Sjdp		  break;
96833965Sjdp		}
96933965Sjdp	      else
97033965Sjdp		{
97133965Sjdp		  UNGET (ch2);
97233965Sjdp		}
97333965Sjdp	    } /* bad hack */
97433965Sjdp
97533965Sjdp	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
97633965Sjdp	    {
97733965Sjdp	      int startch;
97833965Sjdp
97933965Sjdp	      startch = ch;
98033965Sjdp
98133965Sjdp	      do
98233965Sjdp		{
98333965Sjdp		  ch = GET ();
98433965Sjdp		}
98533965Sjdp	      while (ch != EOF && IS_WHITESPACE (ch));
98633965Sjdp	      if (ch == EOF)
98733965Sjdp		{
98833965Sjdp		  as_warn ("end of file in comment; newline inserted");
98933965Sjdp		  PUT ('\n');
99033965Sjdp		  break;
99133965Sjdp		}
99233965Sjdp	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
99333965Sjdp		{
99433965Sjdp		  /* Not a cpp line.  */
99533965Sjdp		  while (ch != EOF && !IS_NEWLINE (ch))
99633965Sjdp		    ch = GET ();
99733965Sjdp		  if (ch == EOF)
99833965Sjdp		    as_warn ("EOF in Comment: Newline inserted");
99933965Sjdp		  state = 0;
100033965Sjdp		  PUT ('\n');
100133965Sjdp		  break;
100233965Sjdp		}
100333965Sjdp	      /* Loks like `# 123 "filename"' from cpp.  */
100433965Sjdp	      UNGET (ch);
100533965Sjdp	      old_state = 4;
100633965Sjdp	      state = -1;
100733965Sjdp	      if (scrub_m68k_mri)
100833965Sjdp		out_string = "\tappline ";
100933965Sjdp	      else
101033965Sjdp		out_string = "\t.appline ";
101133965Sjdp	      PUT (*out_string++);
101233965Sjdp	      break;
101333965Sjdp	    }
101433965Sjdp
101538889Sjdp#ifdef TC_D10V
101638889Sjdp	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
101738889Sjdp	     Trap is the only short insn that has a first operand that is
101838889Sjdp	     neither register nor label.
101938889Sjdp	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
102038889Sjdp	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is already
102138889Sjdp	     LEX_IS_LINE_COMMENT_START.  However, it is the only character in
102238889Sjdp	     line_comment_chars for d10v, hence we can recognize it as such.  */
102338889Sjdp	  /* An alternative approach would be to reset the state to 1 when
102438889Sjdp	     we see '||', '<'- or '->', but that seems to be overkill.  */
102538889Sjdp	  if (state == 10) PUT (' ');
102638889Sjdp#endif
102733965Sjdp	  /* We have a line comment character which is not at the
102833965Sjdp	     start of a line.  If this is also a normal comment
102933965Sjdp	     character, fall through.  Otherwise treat it as a default
103033965Sjdp	     character.  */
103133965Sjdp	  if (strchr (tc_comment_chars, ch) == NULL
103233965Sjdp	      && (! scrub_m68k_mri
103333965Sjdp		  || (ch != '!' && ch != '*')))
103433965Sjdp	    goto de_fault;
103533965Sjdp	  if (scrub_m68k_mri
103633965Sjdp	      && (ch == '!' || ch == '*' || ch == '#')
103733965Sjdp	      && state != 1
103833965Sjdp	      && state != 10)
103933965Sjdp	    goto de_fault;
104033965Sjdp	  /* Fall through.  */
104133965Sjdp	case LEX_IS_COMMENT_START:
104233965Sjdp	  do
104333965Sjdp	    {
104433965Sjdp	      ch = GET ();
104533965Sjdp	    }
104633965Sjdp	  while (ch != EOF && !IS_NEWLINE (ch));
104733965Sjdp	  if (ch == EOF)
104833965Sjdp	    as_warn ("end of file in comment; newline inserted");
104933965Sjdp	  state = 0;
105033965Sjdp	  PUT ('\n');
105133965Sjdp	  break;
105233965Sjdp
105333965Sjdp	case LEX_IS_SYMBOL_COMPONENT:
105433965Sjdp	  if (state == 10)
105533965Sjdp	    {
105633965Sjdp	      /* This is a symbol character following another symbol
105733965Sjdp		 character, with whitespace in between.  We skipped
105833965Sjdp		 the whitespace earlier, so output it now.  */
105933965Sjdp	      UNGET (ch);
106033965Sjdp	      state = 3;
106133965Sjdp	      PUT (' ');
106233965Sjdp	      break;
106333965Sjdp	    }
106433965Sjdp
106533965Sjdp	  if (state == 3)
106633965Sjdp	    state = 9;
106733965Sjdp
106833965Sjdp	  /* This is a common case.  Quickly copy CH and all the
106933965Sjdp             following symbol component or normal characters.  */
107033965Sjdp	  if (to + 1 < toend && mri_state == NULL)
107133965Sjdp	    {
107233965Sjdp	      char *s;
107333965Sjdp	      int len;
107433965Sjdp
107533965Sjdp	      for (s = from; s < fromend; s++)
107633965Sjdp		{
107733965Sjdp		  int type;
107833965Sjdp
107933965Sjdp		  ch2 = *s;
108033965Sjdp		  type = lex[ch2];
108133965Sjdp		  if (type != 0
108233965Sjdp		      && type != LEX_IS_SYMBOL_COMPONENT)
108333965Sjdp		    break;
108433965Sjdp		}
108533965Sjdp	      if (s > from)
108633965Sjdp		{
108733965Sjdp		  /* Handle the last character normally, for
108833965Sjdp                     simplicity.  */
108933965Sjdp		  --s;
109033965Sjdp		}
109133965Sjdp	      len = s - from;
109233965Sjdp	      if (len > (toend - to) - 1)
109333965Sjdp		len = (toend - to) - 1;
109433965Sjdp	      if (len > 0)
109533965Sjdp		{
109633965Sjdp		  PUT (ch);
109733965Sjdp		  if (len > 8)
109833965Sjdp		    {
109933965Sjdp		      memcpy (to, from, len);
110033965Sjdp		      to += len;
110133965Sjdp		      from += len;
110233965Sjdp		    }
110333965Sjdp		  else
110433965Sjdp		    {
110533965Sjdp		      switch (len)
110633965Sjdp			{
110733965Sjdp			case 8: *to++ = *from++;
110833965Sjdp			case 7: *to++ = *from++;
110933965Sjdp			case 6: *to++ = *from++;
111033965Sjdp			case 5: *to++ = *from++;
111133965Sjdp			case 4: *to++ = *from++;
111233965Sjdp			case 3: *to++ = *from++;
111333965Sjdp			case 2: *to++ = *from++;
111433965Sjdp			case 1: *to++ = *from++;
111533965Sjdp			}
111633965Sjdp		    }
111733965Sjdp		  ch = GET ();
111833965Sjdp		}
111933965Sjdp	    }
112033965Sjdp
112133965Sjdp	  /* Fall through.  */
112233965Sjdp	default:
112333965Sjdp	de_fault:
112433965Sjdp	  /* Some relatively `normal' character.  */
112533965Sjdp	  if (state == 0)
112633965Sjdp	    {
112733965Sjdp	      state = 11;	/* Now seeing label definition */
112833965Sjdp	    }
112933965Sjdp	  else if (state == 1)
113033965Sjdp	    {
113133965Sjdp	      state = 2;	/* Ditto */
113233965Sjdp	    }
113333965Sjdp	  else if (state == 9)
113433965Sjdp	    {
113533965Sjdp	      if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
113633965Sjdp		state = 3;
113733965Sjdp	    }
113833965Sjdp	  else if (state == 10)
113933965Sjdp	    {
114033965Sjdp	      state = 3;
114133965Sjdp	    }
114233965Sjdp	  PUT (ch);
114333965Sjdp	  break;
114433965Sjdp	}
114533965Sjdp    }
114633965Sjdp
114733965Sjdp  /*NOTREACHED*/
114833965Sjdp
114933965Sjdp fromeof:
115033965Sjdp  /* We have reached the end of the input.  */
115133965Sjdp  return to - tostart;
115233965Sjdp
115333965Sjdp tofull:
115433965Sjdp  /* The output buffer is full.  Save any input we have not yet
115533965Sjdp     processed.  */
115633965Sjdp  if (fromend > from)
115733965Sjdp    {
115833965Sjdp      char *save;
115933965Sjdp
116033965Sjdp      save = (char *) xmalloc (fromend - from);
116133965Sjdp      memcpy (save, from, fromend - from);
116233965Sjdp      if (saved_input != NULL)
116333965Sjdp	free (saved_input);
116433965Sjdp      saved_input = save;
116533965Sjdp      saved_input_len = fromend - from;
116633965Sjdp    }
116733965Sjdp  else
116833965Sjdp    {
116933965Sjdp      if (saved_input != NULL)
117033965Sjdp	{
117133965Sjdp	  free (saved_input);
117233965Sjdp	  saved_input = NULL;
117333965Sjdp	}
117433965Sjdp    }
117533965Sjdp  return to - tostart;
117633965Sjdp}
117733965Sjdp
117833965Sjdp/* end of app.c */
1179