binutils/gas/app.c

33965Sjdp/* This is the Assembler Pre-Processor
78828Sobrien   Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
78828Sobrien   1999, 2000
33965Sjdp   Free Software Foundation, Inc.
33965Sjdp
33965Sjdp   This file is part of GAS, the GNU Assembler.
33965Sjdp
33965Sjdp   GAS is free software; you can redistribute it and/or modify
33965Sjdp   it under the terms of the GNU General Public License as published by
33965Sjdp   the Free Software Foundation; either version 2, or (at your option)
33965Sjdp   any later version.
33965Sjdp
33965Sjdp   GAS is distributed in the hope that it will be useful,
33965Sjdp   but WITHOUT ANY WARRANTY; without even the implied warranty of
33965Sjdp   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
33965Sjdp   GNU General Public License for more details.
33965Sjdp
33965Sjdp   You should have received a copy of the GNU General Public License
33965Sjdp   along with GAS; see the file COPYING.  If not, write to the Free
33965Sjdp   Software Foundation, 59 Temple Place - Suite 330, Boston, MA
33965Sjdp   02111-1307, USA.  */
33965Sjdp
33965Sjdp/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
33965Sjdp/* App, the assembler pre-processor.  This pre-processor strips out excess
33965Sjdp   spaces, turns single-quoted characters into a decimal constant, and turns
33965Sjdp   # <number> <filename> <garbage> into a .line <number>\n.file <filename>
33965Sjdp   pair.  This needs better error-handling.  */
33965Sjdp
33965Sjdp#include <stdio.h>
33965Sjdp#include "as.h"			/* For BAD_CASE() only */
33965Sjdp
33965Sjdp#if (__STDC__ != 1)
33965Sjdp#ifndef const
33965Sjdp#define const  /* empty */
33965Sjdp#endif
33965Sjdp#endif
33965Sjdp
60484Sobrien#ifdef TC_M68K
33965Sjdp/* Whether we are scrubbing in m68k MRI mode.  This is different from
33965Sjdp   flag_m68k_mri, because the two flags will be affected by the .mri
33965Sjdp   pseudo-op at different times.  */
33965Sjdpstatic int scrub_m68k_mri;
60484Sobrien#else
60484Sobrien#define scrub_m68k_mri 0
60484Sobrien#endif
33965Sjdp
33965Sjdp/* The pseudo-op which switches in and out of MRI mode.  See the
33965Sjdp   comment in do_scrub_chars.  */
33965Sjdpstatic const char mri_pseudo[] = ".mri 0";
33965Sjdp
60484Sobrien#if defined TC_ARM && defined OBJ_ELF
77298Sobrien/* The pseudo-op for which we need to special-case `@' characters.
60484Sobrien   See the comment in do_scrub_chars.  */
60484Sobrienstatic const char   symver_pseudo[] = ".symver";
60484Sobrienstatic const char * symver_state;
60484Sobrien#endif
60484Sobrien
33965Sjdpstatic char lex[256];
33965Sjdpstatic const char symbol_chars[] =
33965Sjdp"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
33965Sjdp
33965Sjdp#define LEX_IS_SYMBOL_COMPONENT		1
33965Sjdp#define LEX_IS_WHITESPACE		2
33965Sjdp#define LEX_IS_LINE_SEPARATOR		3
33965Sjdp#define LEX_IS_COMMENT_START		4
33965Sjdp#define LEX_IS_LINE_COMMENT_START	5
33965Sjdp#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
33965Sjdp#define	LEX_IS_STRINGQUOTE		8
33965Sjdp#define	LEX_IS_COLON			9
33965Sjdp#define	LEX_IS_NEWLINE			10
33965Sjdp#define	LEX_IS_ONECHAR_QUOTE		11
38889Sjdp#ifdef TC_V850
38889Sjdp#define LEX_IS_DOUBLEDASH_1ST		12
38889Sjdp#endif
38889Sjdp#ifdef TC_M32R
60484Sobrien#define DOUBLEBAR_PARALLEL
60484Sobrien#endif
60484Sobrien#ifdef DOUBLEBAR_PARALLEL
38889Sjdp#define LEX_IS_DOUBLEBAR_1ST		13
38889Sjdp#endif
33965Sjdp#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
33965Sjdp#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
33965Sjdp#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
33965Sjdp#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
33965Sjdp#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
33965Sjdp#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
33965Sjdp
33965Sjdpstatic int process_escape PARAMS ((int));
33965Sjdp
33965Sjdp/* FIXME-soon: The entire lexer/parser thingy should be
33965Sjdp   built statically at compile time rather than dynamically
77298Sobrien   each and every time the assembler is run.  xoxorich.  */
33965Sjdp
77298Sobrienvoid
33965Sjdpdo_scrub_begin (m68k_mri)
60484Sobrien     int m68k_mri ATTRIBUTE_UNUSED;
33965Sjdp{
33965Sjdp  const char *p;
60484Sobrien  int c;
33965Sjdp
33965Sjdp  lex[' '] = LEX_IS_WHITESPACE;
33965Sjdp  lex['\t'] = LEX_IS_WHITESPACE;
38889Sjdp  lex['\r'] = LEX_IS_WHITESPACE;
33965Sjdp  lex['\n'] = LEX_IS_NEWLINE;
33965Sjdp  lex[':'] = LEX_IS_COLON;
33965Sjdp
60484Sobrien#ifdef TC_M68K
60484Sobrien  scrub_m68k_mri = m68k_mri;
60484Sobrien
33965Sjdp  if (! m68k_mri)
60484Sobrien#endif
33965Sjdp    {
33965Sjdp      lex['"'] = LEX_IS_STRINGQUOTE;
33965Sjdp
60484Sobrien#if ! defined (TC_HPPA) && ! defined (TC_I370)
60484Sobrien      /* I370 uses single-quotes to delimit integer, float constants */
33965Sjdp      lex['\''] = LEX_IS_ONECHAR_QUOTE;
33965Sjdp#endif
33965Sjdp
33965Sjdp#ifdef SINGLE_QUOTE_STRINGS
33965Sjdp      lex['\''] = LEX_IS_STRINGQUOTE;
33965Sjdp#endif
33965Sjdp    }
33965Sjdp
33965Sjdp  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
33965Sjdp     in state 5 of do_scrub_chars must be changed.  */
33965Sjdp
33965Sjdp  /* Note that these override the previous defaults, e.g. if ';' is a
33965Sjdp     comment char, then it isn't a line separator.  */
33965Sjdp  for (p = symbol_chars; *p; ++p)
33965Sjdp    {
33965Sjdp      lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
33965Sjdp    }				/* declare symbol characters */
33965Sjdp
60484Sobrien  for (c = 128; c < 256; ++c)
60484Sobrien    lex[c] = LEX_IS_SYMBOL_COMPONENT;
60484Sobrien
60484Sobrien#ifdef tc_symbol_chars
60484Sobrien  /* This macro permits the processor to specify all characters which
60484Sobrien     may appears in an operand.  This will prevent the scrubber from
60484Sobrien     discarding meaningful whitespace in certain cases.  The i386
60484Sobrien     backend uses this to support prefixes, which can confuse the
60484Sobrien     scrubber as to whether it is parsing operands or opcodes.  */
60484Sobrien  for (p = tc_symbol_chars; *p; ++p)
60484Sobrien    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
60484Sobrien#endif
60484Sobrien
33965Sjdp  /* The m68k backend wants to be able to change comment_chars.  */
33965Sjdp#ifndef tc_comment_chars
33965Sjdp#define tc_comment_chars comment_chars
33965Sjdp#endif
33965Sjdp  for (p = tc_comment_chars; *p; p++)
33965Sjdp    {
33965Sjdp      lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
33965Sjdp    }				/* declare comment chars */
33965Sjdp
33965Sjdp  for (p = line_comment_chars; *p; p++)
33965Sjdp    {
33965Sjdp      lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
33965Sjdp    }				/* declare line comment chars */
33965Sjdp
33965Sjdp  for (p = line_separator_chars; *p; p++)
33965Sjdp    {
33965Sjdp      lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
33965Sjdp    }				/* declare line separators */
33965Sjdp
33965Sjdp  /* Only allow slash-star comments if slash is not in use.
33965Sjdp     FIXME: This isn't right.  We should always permit them.  */
33965Sjdp  if (lex['/'] == 0)
33965Sjdp    {
33965Sjdp      lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
33965Sjdp    }
33965Sjdp
60484Sobrien#ifdef TC_M68K
33965Sjdp  if (m68k_mri)
33965Sjdp    {
33965Sjdp      lex['\''] = LEX_IS_STRINGQUOTE;
33965Sjdp      lex[';'] = LEX_IS_COMMENT_START;
33965Sjdp      lex['*'] = LEX_IS_LINE_COMMENT_START;
33965Sjdp      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
33965Sjdp         then it can't be used in an expression.  */
33965Sjdp      lex['!'] = LEX_IS_LINE_COMMENT_START;
33965Sjdp    }
60484Sobrien#endif
38889Sjdp
38889Sjdp#ifdef TC_V850
38889Sjdp  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
38889Sjdp#endif
60484Sobrien#ifdef DOUBLEBAR_PARALLEL
38889Sjdp  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
38889Sjdp#endif
60484Sobrien#ifdef TC_D30V
60484Sobrien  /* must do this is we want VLIW instruction with "->" or "<-" */
60484Sobrien  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
60484Sobrien#endif
33965Sjdp}				/* do_scrub_begin() */
33965Sjdp
33965Sjdp/* Saved state of the scrubber */
33965Sjdpstatic int state;
33965Sjdpstatic int old_state;
33965Sjdpstatic char *out_string;
33965Sjdpstatic char out_buf[20];
33965Sjdpstatic int add_newlines;
33965Sjdpstatic char *saved_input;
33965Sjdpstatic int saved_input_len;
60484Sobrienstatic char input_buffer[32 * 1024];
33965Sjdpstatic const char *mri_state;
33965Sjdpstatic char mri_last_ch;
33965Sjdp
33965Sjdp/* Data structure for saving the state of app across #include's.  Note that
33965Sjdp   app is called asynchronously to the parsing of the .include's, so our
33965Sjdp   state at the time .include is interpreted is completely unrelated.
33965Sjdp   That's why we have to save it all.  */
33965Sjdp
77298Sobrienstruct app_save {
77298Sobrien  int          state;
77298Sobrien  int          old_state;
77298Sobrien  char *       out_string;
77298Sobrien  char         out_buf[sizeof (out_buf)];
77298Sobrien  int          add_newlines;
77298Sobrien  char *       saved_input;
77298Sobrien  int          saved_input_len;
60484Sobrien#ifdef TC_M68K
77298Sobrien  int          scrub_m68k_mri;
60484Sobrien#endif
77298Sobrien  const char * mri_state;
77298Sobrien  char         mri_last_ch;
60484Sobrien#if defined TC_ARM && defined OBJ_ELF
77298Sobrien  const char * symver_state;
60484Sobrien#endif
77298Sobrien};
33965Sjdp
33965Sjdpchar *
33965Sjdpapp_push ()
33965Sjdp{
33965Sjdp  register struct app_save *saved;
33965Sjdp
33965Sjdp  saved = (struct app_save *) xmalloc (sizeof (*saved));
33965Sjdp  saved->state = state;
33965Sjdp  saved->old_state = old_state;
33965Sjdp  saved->out_string = out_string;
33965Sjdp  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
33965Sjdp  saved->add_newlines = add_newlines;
60484Sobrien  if (saved_input == NULL)
60484Sobrien    saved->saved_input = NULL;
60484Sobrien  else
60484Sobrien    {
60484Sobrien      saved->saved_input = xmalloc (saved_input_len);
60484Sobrien      memcpy (saved->saved_input, saved_input, saved_input_len);
60484Sobrien      saved->saved_input_len = saved_input_len;
60484Sobrien    }
60484Sobrien#ifdef TC_M68K
33965Sjdp  saved->scrub_m68k_mri = scrub_m68k_mri;
60484Sobrien#endif
33965Sjdp  saved->mri_state = mri_state;
33965Sjdp  saved->mri_last_ch = mri_last_ch;
60484Sobrien#if defined TC_ARM && defined OBJ_ELF
60484Sobrien  saved->symver_state = symver_state;
60484Sobrien#endif
33965Sjdp
77298Sobrien  /* do_scrub_begin() is not useful, just wastes time.  */
33965Sjdp
33965Sjdp  state = 0;
33965Sjdp  saved_input = NULL;
33965Sjdp
33965Sjdp  return (char *) saved;
33965Sjdp}
33965Sjdp
77298Sobrienvoid
33965Sjdpapp_pop (arg)
33965Sjdp     char *arg;
33965Sjdp{
33965Sjdp  register struct app_save *saved = (struct app_save *) arg;
33965Sjdp
77298Sobrien  /* There is no do_scrub_end ().  */
33965Sjdp  state = saved->state;
33965Sjdp  old_state = saved->old_state;
33965Sjdp  out_string = saved->out_string;
33965Sjdp  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
33965Sjdp  add_newlines = saved->add_newlines;
60484Sobrien  if (saved->saved_input == NULL)
60484Sobrien    saved_input = NULL;
60484Sobrien  else
60484Sobrien    {
60484Sobrien      assert (saved->saved_input_len <= (int) (sizeof input_buffer));
60484Sobrien      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
60484Sobrien      saved_input = input_buffer;
60484Sobrien      saved_input_len = saved->saved_input_len;
60484Sobrien      free (saved->saved_input);
60484Sobrien    }
60484Sobrien#ifdef TC_M68K
33965Sjdp  scrub_m68k_mri = saved->scrub_m68k_mri;
60484Sobrien#endif
33965Sjdp  mri_state = saved->mri_state;
33965Sjdp  mri_last_ch = saved->mri_last_ch;
60484Sobrien#if defined TC_ARM && defined OBJ_ELF
60484Sobrien  symver_state = saved->symver_state;
60484Sobrien#endif
33965Sjdp
33965Sjdp  free (arg);
33965Sjdp}				/* app_pop() */
33965Sjdp
33965Sjdp/* @@ This assumes that \n &c are the same on host and target.  This is not
33965Sjdp   necessarily true.  */
77298Sobrienstatic int
33965Sjdpprocess_escape (ch)
33965Sjdp     int ch;
33965Sjdp{
33965Sjdp  switch (ch)
33965Sjdp    {
33965Sjdp    case 'b':
33965Sjdp      return '\b';
33965Sjdp    case 'f':
33965Sjdp      return '\f';
33965Sjdp    case 'n':
33965Sjdp      return '\n';
33965Sjdp    case 'r':
33965Sjdp      return '\r';
33965Sjdp    case 't':
33965Sjdp      return '\t';
33965Sjdp    case '\'':
33965Sjdp      return '\'';
33965Sjdp    case '"':
33965Sjdp      return '\"';
33965Sjdp    default:
33965Sjdp      return ch;
33965Sjdp    }
33965Sjdp}
33965Sjdp
33965Sjdp/* This function is called to process input characters.  The GET
33965Sjdp   parameter is used to retrieve more input characters.  GET should
33965Sjdp   set its parameter to point to a buffer, and return the length of
33965Sjdp   the buffer; it should return 0 at end of file.  The scrubbed output
33965Sjdp   characters are put into the buffer starting at TOSTART; the TOSTART
33965Sjdp   buffer is TOLEN bytes in length.  The function returns the number
33965Sjdp   of scrubbed characters put into TOSTART.  This will be TOLEN unless
33965Sjdp   end of file was seen.  This function is arranged as a state
33965Sjdp   machine, and saves its state so that it may return at any point.
33965Sjdp   This is the way the old code used to work.  */
33965Sjdp
33965Sjdpint
33965Sjdpdo_scrub_chars (get, tostart, tolen)
60484Sobrien     int (*get) PARAMS ((char *, int));
33965Sjdp     char *tostart;
33965Sjdp     int tolen;
33965Sjdp{
33965Sjdp  char *to = tostart;
33965Sjdp  char *toend = tostart + tolen;
33965Sjdp  char *from;
33965Sjdp  char *fromend;
33965Sjdp  int fromlen;
33965Sjdp  register int ch, ch2 = 0;
33965Sjdp
33965Sjdp  /*State 0: beginning of normal line
33965Sjdp	  1: After first whitespace on line (flush more white)
33965Sjdp	  2: After first non-white (opcode) on line (keep 1white)
33965Sjdp	  3: after second white on line (into operands) (flush white)
33965Sjdp	  4: after putting out a .line, put out digits
33965Sjdp	  5: parsing a string, then go to old-state
33965Sjdp	  6: putting out \ escape in a "d string.
33965Sjdp	  7: After putting out a .appfile, put out string.
33965Sjdp	  8: After putting out a .appfile string, flush until newline.
33965Sjdp	  9: After seeing symbol char in state 3 (keep 1white after symchar)
33965Sjdp	 10: After seeing whitespace in state 9 (keep white before symchar)
33965Sjdp	 11: After seeing a symbol character in state 0 (eg a label definition)
33965Sjdp	 -1: output string in out_string and go to the state in old_state
33965Sjdp	 -2: flush text until a '*' '/' is seen, then go to state old_state
38889Sjdp#ifdef TC_V850
38889Sjdp         12: After seeing a dash, looking for a second dash as a start of comment.
38889Sjdp#endif
60484Sobrien#ifdef DOUBLEBAR_PARALLEL
38889Sjdp	 13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
38889Sjdp#endif
33965Sjdp	  */
33965Sjdp
33965Sjdp  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
33965Sjdp     constructs like ``.loc 1 20''.  This was turning into ``.loc
33965Sjdp     120''.  States 9 and 10 ensure that a space is never dropped in
33965Sjdp     between characters which could appear in a identifier.  Ian
33965Sjdp     Taylor, ian@cygnus.com.
33965Sjdp
33965Sjdp     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
33965Sjdp     correctly on the PA (and any other target where colons are optional).
38889Sjdp     Jeff Law, law@cs.utah.edu.
33965Sjdp
38889Sjdp     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
38889Sjdp     get squashed into "cmp r1,r2||trap#1", with the all important space
38889Sjdp     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
38889Sjdp
33965Sjdp  /* This macro gets the next input character.  */
33965Sjdp
60484Sobrien#define GET()							\
60484Sobrien  (from < fromend						\
60484Sobrien   ? * (unsigned char *) (from++)				\
60484Sobrien   : (saved_input = NULL,					\
60484Sobrien      fromlen = (*get) (input_buffer, sizeof input_buffer),	\
60484Sobrien      from = input_buffer,					\
60484Sobrien      fromend = from + fromlen,					\
60484Sobrien      (fromlen == 0						\
60484Sobrien       ? EOF							\
60484Sobrien       : * (unsigned char *) (from++))))
33965Sjdp
33965Sjdp  /* This macro pushes a character back on the input stream.  */
33965Sjdp
33965Sjdp#define UNGET(uch) (*--from = (uch))
33965Sjdp
33965Sjdp  /* This macro puts a character into the output buffer.  If this
33965Sjdp     character fills the output buffer, this macro jumps to the label
33965Sjdp     TOFULL.  We use this rather ugly approach because we need to
33965Sjdp     handle two different termination conditions: EOF on the input
33965Sjdp     stream, and a full output buffer.  It would be simpler if we
33965Sjdp     always read in the entire input stream before processing it, but
33965Sjdp     I don't want to make such a significant change to the assembler's
33965Sjdp     memory usage.  */
33965Sjdp
33965Sjdp#define PUT(pch)			\
33965Sjdp  do					\
33965Sjdp    {					\
33965Sjdp      *to++ = (pch);			\
33965Sjdp      if (to >= toend)			\
33965Sjdp        goto tofull;			\
33965Sjdp    }					\
33965Sjdp  while (0)
33965Sjdp
33965Sjdp  if (saved_input != NULL)
33965Sjdp    {
33965Sjdp      from = saved_input;
33965Sjdp      fromend = from + saved_input_len;
33965Sjdp    }
33965Sjdp  else
33965Sjdp    {
60484Sobrien      fromlen = (*get) (input_buffer, sizeof input_buffer);
33965Sjdp      if (fromlen == 0)
33965Sjdp	return 0;
60484Sobrien      from = input_buffer;
33965Sjdp      fromend = from + fromlen;
33965Sjdp    }
33965Sjdp
33965Sjdp  while (1)
33965Sjdp    {
33965Sjdp      /* The cases in this switch end with continue, in order to
33965Sjdp         branch back to the top of this while loop and generate the
33965Sjdp         next output character in the appropriate state.  */
33965Sjdp      switch (state)
33965Sjdp	{
33965Sjdp	case -1:
33965Sjdp	  ch = *out_string++;
33965Sjdp	  if (*out_string == '\0')
33965Sjdp	    {
33965Sjdp	      state = old_state;
33965Sjdp	      old_state = 3;
33965Sjdp	    }
33965Sjdp	  PUT (ch);
33965Sjdp	  continue;
33965Sjdp
33965Sjdp	case -2:
33965Sjdp	  for (;;)
33965Sjdp	    {
33965Sjdp	      do
33965Sjdp		{
33965Sjdp		  ch = GET ();
33965Sjdp
33965Sjdp		  if (ch == EOF)
33965Sjdp		    {
60484Sobrien		      as_warn (_("end of file in comment"));
33965Sjdp		      goto fromeof;
33965Sjdp		    }
33965Sjdp
33965Sjdp		  if (ch == '\n')
33965Sjdp		    PUT ('\n');
33965Sjdp		}
33965Sjdp	      while (ch != '*');
33965Sjdp
33965Sjdp	      while ((ch = GET ()) == '*')
33965Sjdp		;
33965Sjdp
33965Sjdp	      if (ch == EOF)
33965Sjdp		{
60484Sobrien		  as_warn (_("end of file in comment"));
33965Sjdp		  goto fromeof;
33965Sjdp		}
33965Sjdp
33965Sjdp	      if (ch == '/')
33965Sjdp		break;
33965Sjdp
33965Sjdp	      UNGET (ch);
33965Sjdp	    }
33965Sjdp
33965Sjdp	  state = old_state;
33965Sjdp	  UNGET (' ');
33965Sjdp	  continue;
33965Sjdp
33965Sjdp	case 4:
33965Sjdp	  ch = GET ();
33965Sjdp	  if (ch == EOF)
33965Sjdp	    goto fromeof;
33965Sjdp	  else if (ch >= '0' && ch <= '9')
33965Sjdp	    PUT (ch);
33965Sjdp	  else
33965Sjdp	    {
33965Sjdp	      while (ch != EOF && IS_WHITESPACE (ch))
33965Sjdp		ch = GET ();
33965Sjdp	      if (ch == '"')
33965Sjdp		{
33965Sjdp		  UNGET (ch);
33965Sjdp		  if (scrub_m68k_mri)
33965Sjdp		    out_string = "\n\tappfile ";
33965Sjdp		  else
33965Sjdp		    out_string = "\n\t.appfile ";
33965Sjdp		  old_state = 7;
33965Sjdp		  state = -1;
33965Sjdp		  PUT (*out_string++);
33965Sjdp		}
33965Sjdp	      else
33965Sjdp		{
33965Sjdp		  while (ch != EOF && ch != '\n')
33965Sjdp		    ch = GET ();
33965Sjdp		  state = 0;
33965Sjdp		  PUT (ch);
33965Sjdp		}
33965Sjdp	    }
33965Sjdp	  continue;
33965Sjdp
33965Sjdp	case 5:
33965Sjdp	  /* We are going to copy everything up to a quote character,
33965Sjdp             with special handling for a backslash.  We try to
33965Sjdp             optimize the copying in the simple case without using the
33965Sjdp             GET and PUT macros.  */
33965Sjdp	  {
33965Sjdp	    char *s;
33965Sjdp	    int len;
33965Sjdp
33965Sjdp	    for (s = from; s < fromend; s++)
33965Sjdp	      {
33965Sjdp		ch = *s;
33965Sjdp		/* This condition must be changed if the type of any
33965Sjdp                   other character can be LEX_IS_STRINGQUOTE.  */
33965Sjdp		if (ch == '\\'
33965Sjdp		    || ch == '"'
33965Sjdp		    || ch == '\''
33965Sjdp		    || ch == '\n')
33965Sjdp		  break;
33965Sjdp	      }
33965Sjdp	    len = s - from;
33965Sjdp	    if (len > toend - to)
33965Sjdp	      len = toend - to;
33965Sjdp	    if (len > 0)
33965Sjdp	      {
33965Sjdp		memcpy (to, from, len);
33965Sjdp		to += len;
33965Sjdp		from += len;
33965Sjdp	      }
33965Sjdp	  }
33965Sjdp
33965Sjdp	  ch = GET ();
33965Sjdp	  if (ch == EOF)
33965Sjdp	    {
60484Sobrien	      as_warn (_("end of file in string: inserted '\"'"));
33965Sjdp	      state = old_state;
33965Sjdp	      UNGET ('\n');
33965Sjdp	      PUT ('"');
33965Sjdp	    }
33965Sjdp	  else if (lex[ch] == LEX_IS_STRINGQUOTE)
33965Sjdp	    {
33965Sjdp	      state = old_state;
33965Sjdp	      PUT (ch);
33965Sjdp	    }
33965Sjdp#ifndef NO_STRING_ESCAPES
33965Sjdp	  else if (ch == '\\')
33965Sjdp	    {
33965Sjdp	      state = 6;
33965Sjdp	      PUT (ch);
33965Sjdp	    }
33965Sjdp#endif
33965Sjdp	  else if (scrub_m68k_mri && ch == '\n')
33965Sjdp	    {
33965Sjdp	      /* Just quietly terminate the string.  This permits lines like
33965Sjdp		   bne	label	loop if we haven't reach end yet
33965Sjdp		 */
33965Sjdp	      state = old_state;
33965Sjdp	      UNGET (ch);
33965Sjdp	      PUT ('\'');
33965Sjdp	    }
33965Sjdp	  else
33965Sjdp	    {
33965Sjdp	      PUT (ch);
33965Sjdp	    }
33965Sjdp	  continue;
33965Sjdp
33965Sjdp	case 6:
33965Sjdp	  state = 5;
33965Sjdp	  ch = GET ();
33965Sjdp	  switch (ch)
33965Sjdp	    {
33965Sjdp	      /* Handle strings broken across lines, by turning '\n' into
33965Sjdp		 '\\' and 'n'.  */
33965Sjdp	    case '\n':
33965Sjdp	      UNGET ('n');
33965Sjdp	      add_newlines++;
33965Sjdp	      PUT ('\\');
33965Sjdp	      continue;
33965Sjdp
33965Sjdp	    case '"':
33965Sjdp	    case '\\':
33965Sjdp	    case 'b':
33965Sjdp	    case 'f':
33965Sjdp	    case 'n':
33965Sjdp	    case 'r':
33965Sjdp	    case 't':
33965Sjdp	    case 'v':
33965Sjdp	    case 'x':
33965Sjdp	    case 'X':
33965Sjdp	    case '0':
33965Sjdp	    case '1':
33965Sjdp	    case '2':
33965Sjdp	    case '3':
33965Sjdp	    case '4':
33965Sjdp	    case '5':
33965Sjdp	    case '6':
33965Sjdp	    case '7':
33965Sjdp	      break;
33965Sjdp#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
33965Sjdp	    default:
60484Sobrien	      as_warn (_("Unknown escape '\\%c' in string: Ignored"), ch);
33965Sjdp	      break;
33965Sjdp#else  /* ONLY_STANDARD_ESCAPES */
33965Sjdp	    default:
33965Sjdp	      /* Accept \x as x for any x */
33965Sjdp	      break;
33965Sjdp#endif /* ONLY_STANDARD_ESCAPES */
33965Sjdp
33965Sjdp	    case EOF:
60484Sobrien	      as_warn (_("End of file in string: '\"' inserted"));
33965Sjdp	      PUT ('"');
33965Sjdp	      continue;
33965Sjdp	    }
33965Sjdp	  PUT (ch);
33965Sjdp	  continue;
33965Sjdp
33965Sjdp	case 7:
33965Sjdp	  ch = GET ();
33965Sjdp	  state = 5;
33965Sjdp	  old_state = 8;
33965Sjdp	  if (ch == EOF)
33965Sjdp	    goto fromeof;
33965Sjdp	  PUT (ch);
33965Sjdp	  continue;
33965Sjdp
33965Sjdp	case 8:
33965Sjdp	  do
33965Sjdp	    ch = GET ();
33965Sjdp	  while (ch != '\n' && ch != EOF);
33965Sjdp	  if (ch == EOF)
33965Sjdp	    goto fromeof;
33965Sjdp	  state = 0;
33965Sjdp	  PUT (ch);
33965Sjdp	  continue;
33965Sjdp	}
33965Sjdp
33965Sjdp      /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
33965Sjdp
33965Sjdp      /* flushchar: */
33965Sjdp      ch = GET ();
33965Sjdp
33965Sjdp    recycle:
33965Sjdp
60484Sobrien#if defined TC_ARM && defined OBJ_ELF
60484Sobrien      /* We need to watch out for .symver directives.  See the comment later
60484Sobrien	 in this function.  */
60484Sobrien      if (symver_state == NULL)
60484Sobrien	{
60484Sobrien	  if ((state == 0 || state == 1) && ch == symver_pseudo[0])
60484Sobrien	    symver_state = symver_pseudo + 1;
60484Sobrien	}
60484Sobrien      else
60484Sobrien	{
60484Sobrien	  /* We advance to the next state if we find the right
60484Sobrien	     character.  */
60484Sobrien	  if (ch != '\0' && (*symver_state == ch))
60484Sobrien	    ++symver_state;
60484Sobrien	  else if (*symver_state != '\0')
60484Sobrien	    /* We did not get the expected character, or we didn't
60484Sobrien	       get a valid terminating character after seeing the
60484Sobrien	       entire pseudo-op, so we must go back to the beginning.  */
60484Sobrien	    symver_state = NULL;
60484Sobrien	  else
60484Sobrien	    {
60484Sobrien	      /* We've read the entire pseudo-op.  If this is the end
60484Sobrien		 of the line, go back to the beginning.  */
60484Sobrien	      if (IS_NEWLINE (ch))
60484Sobrien		symver_state = NULL;
60484Sobrien	    }
60484Sobrien	}
60484Sobrien#endif /* TC_ARM && OBJ_ELF */
60484Sobrien
33965Sjdp#ifdef TC_M68K
33965Sjdp      /* We want to have pseudo-ops which control whether we are in
33965Sjdp         MRI mode or not.  Unfortunately, since m68k MRI mode affects
33965Sjdp         the scrubber, that means that we need a special purpose
33965Sjdp         recognizer here.  */
33965Sjdp      if (mri_state == NULL)
33965Sjdp	{
33965Sjdp	  if ((state == 0 || state == 1)
33965Sjdp	      && ch == mri_pseudo[0])
33965Sjdp	    mri_state = mri_pseudo + 1;
33965Sjdp	}
33965Sjdp      else
33965Sjdp	{
33965Sjdp	  /* We advance to the next state if we find the right
33965Sjdp	     character, or if we need a space character and we get any
33965Sjdp	     whitespace character, or if we need a '0' and we get a
33965Sjdp	     '1' (this is so that we only need one state to handle
33965Sjdp	     ``.mri 0'' and ``.mri 1'').  */
33965Sjdp	  if (ch != '\0'
33965Sjdp	      && (*mri_state == ch
33965Sjdp		  || (*mri_state == ' '
33965Sjdp		      && lex[ch] == LEX_IS_WHITESPACE)
33965Sjdp		  || (*mri_state == '0'
33965Sjdp		      && ch == '1')))
33965Sjdp	    {
33965Sjdp	      mri_last_ch = ch;
33965Sjdp	      ++mri_state;
33965Sjdp	    }
33965Sjdp	  else if (*mri_state != '\0'
33965Sjdp		   || (lex[ch] != LEX_IS_WHITESPACE
33965Sjdp		       && lex[ch] != LEX_IS_NEWLINE))
33965Sjdp	    {
33965Sjdp	      /* We did not get the expected character, or we didn't
33965Sjdp		 get a valid terminating character after seeing the
33965Sjdp		 entire pseudo-op, so we must go back to the
33965Sjdp		 beginning.  */
33965Sjdp	      mri_state = NULL;
33965Sjdp	    }
33965Sjdp	  else
33965Sjdp	    {
33965Sjdp	      /* We've read the entire pseudo-op.  mips_last_ch is
33965Sjdp                 either '0' or '1' indicating whether to enter or
33965Sjdp                 leave MRI mode.  */
33965Sjdp	      do_scrub_begin (mri_last_ch == '1');
38889Sjdp	      mri_state = NULL;
33965Sjdp
33965Sjdp	      /* We continue handling the character as usual.  The
33965Sjdp                 main gas reader must also handle the .mri pseudo-op
33965Sjdp                 to control expression parsing and the like.  */
33965Sjdp	    }
33965Sjdp	}
33965Sjdp#endif
33965Sjdp
33965Sjdp      if (ch == EOF)
33965Sjdp	{
33965Sjdp	  if (state != 0)
33965Sjdp	    {
60484Sobrien	      as_warn (_("end of file not at end of a line; newline inserted"));
33965Sjdp	      state = 0;
33965Sjdp	      PUT ('\n');
33965Sjdp	    }
33965Sjdp	  goto fromeof;
33965Sjdp	}
33965Sjdp
33965Sjdp      switch (lex[ch])
33965Sjdp	{
33965Sjdp	case LEX_IS_WHITESPACE:
33965Sjdp	  do
33965Sjdp	    {
33965Sjdp	      ch = GET ();
33965Sjdp	    }
33965Sjdp	  while (ch != EOF && IS_WHITESPACE (ch));
33965Sjdp	  if (ch == EOF)
33965Sjdp	    goto fromeof;
33965Sjdp
33965Sjdp	  if (state == 0)
33965Sjdp	    {
33965Sjdp	      /* Preserve a single whitespace character at the
33965Sjdp		 beginning of a line.  */
33965Sjdp	      state = 1;
33965Sjdp	      UNGET (ch);
33965Sjdp	      PUT (' ');
33965Sjdp	      break;
33965Sjdp	    }
33965Sjdp
60484Sobrien#ifdef KEEP_WHITE_AROUND_COLON
77298Sobrien	  if (lex[ch] == LEX_IS_COLON)
77298Sobrien	    {
77298Sobrien	      /* Only keep this white if there's no white *after* the
77298Sobrien                 colon.  */
77298Sobrien	      ch2 = GET ();
77298Sobrien	      UNGET (ch2);
77298Sobrien	      if (!IS_WHITESPACE (ch2))
77298Sobrien		{
77298Sobrien		  state = 9;
77298Sobrien		  UNGET (ch);
77298Sobrien		  PUT (' ');
77298Sobrien		  break;
77298Sobrien		}
77298Sobrien	    }
60484Sobrien#endif
33965Sjdp	  if (IS_COMMENT (ch)
33965Sjdp	      || ch == '/'
33965Sjdp	      || IS_LINE_SEPARATOR (ch))
33965Sjdp	    {
33965Sjdp	      if (scrub_m68k_mri)
33965Sjdp		{
33965Sjdp		  /* In MRI mode, we keep these spaces.  */
33965Sjdp		  UNGET (ch);
33965Sjdp		  PUT (' ');
33965Sjdp		  break;
33965Sjdp		}
33965Sjdp	      goto recycle;
33965Sjdp	    }
33965Sjdp
33965Sjdp	  /* If we're in state 2 or 11, we've seen a non-white
33965Sjdp	     character followed by whitespace.  If the next character
33965Sjdp	     is ':', this is whitespace after a label name which we
33965Sjdp	     normally must ignore.  In MRI mode, though, spaces are
33965Sjdp	     not permitted between the label and the colon.  */
33965Sjdp	  if ((state == 2 || state == 11)
33965Sjdp	      && lex[ch] == LEX_IS_COLON
33965Sjdp	      && ! scrub_m68k_mri)
33965Sjdp	    {
33965Sjdp	      state = 1;
33965Sjdp	      PUT (ch);
33965Sjdp	      break;
33965Sjdp	    }
33965Sjdp
33965Sjdp	  switch (state)
33965Sjdp	    {
33965Sjdp	    case 0:
33965Sjdp	      state++;
33965Sjdp	      goto recycle;	/* Punted leading sp */
33965Sjdp	    case 1:
33965Sjdp	      /* We can arrive here if we leave a leading whitespace
33965Sjdp		 character at the beginning of a line.  */
33965Sjdp	      goto recycle;
33965Sjdp	    case 2:
33965Sjdp	      state = 3;
33965Sjdp	      if (to + 1 < toend)
33965Sjdp		{
33965Sjdp		  /* Optimize common case by skipping UNGET/GET.  */
33965Sjdp		  PUT (' ');	/* Sp after opco */
33965Sjdp		  goto recycle;
33965Sjdp		}
33965Sjdp	      UNGET (ch);
33965Sjdp	      PUT (' ');
33965Sjdp	      break;
33965Sjdp	    case 3:
33965Sjdp	      if (scrub_m68k_mri)
33965Sjdp		{
33965Sjdp		  /* In MRI mode, we keep these spaces.  */
33965Sjdp		  UNGET (ch);
33965Sjdp		  PUT (' ');
33965Sjdp		  break;
33965Sjdp		}
33965Sjdp	      goto recycle;	/* Sp in operands */
33965Sjdp	    case 9:
33965Sjdp	    case 10:
33965Sjdp	      if (scrub_m68k_mri)
33965Sjdp		{
33965Sjdp		  /* In MRI mode, we keep these spaces.  */
33965Sjdp		  state = 3;
33965Sjdp		  UNGET (ch);
33965Sjdp		  PUT (' ');
33965Sjdp		  break;
33965Sjdp		}
33965Sjdp	      state = 10;	/* Sp after symbol char */
33965Sjdp	      goto recycle;
33965Sjdp	    case 11:
60484Sobrien	      if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
33965Sjdp		state = 1;
33965Sjdp	      else
33965Sjdp		{
33965Sjdp		  /* We know that ch is not ':', since we tested that
33965Sjdp                     case above.  Therefore this is not a label, so it
33965Sjdp                     must be the opcode, and we've just seen the
33965Sjdp                     whitespace after it.  */
33965Sjdp		  state = 3;
33965Sjdp		}
33965Sjdp	      UNGET (ch);
33965Sjdp	      PUT (' ');	/* Sp after label definition.  */
33965Sjdp	      break;
33965Sjdp	    default:
33965Sjdp	      BAD_CASE (state);
33965Sjdp	    }
33965Sjdp	  break;
33965Sjdp
33965Sjdp	case LEX_IS_TWOCHAR_COMMENT_1ST:
33965Sjdp	  ch2 = GET ();
33965Sjdp	  if (ch2 == '*')
33965Sjdp	    {
33965Sjdp	      for (;;)
33965Sjdp		{
33965Sjdp		  do
33965Sjdp		    {
33965Sjdp		      ch2 = GET ();
33965Sjdp		      if (ch2 != EOF && IS_NEWLINE (ch2))
33965Sjdp			add_newlines++;
33965Sjdp		    }
33965Sjdp		  while (ch2 != EOF && ch2 != '*');
33965Sjdp
33965Sjdp		  while (ch2 == '*')
33965Sjdp		    ch2 = GET ();
33965Sjdp
33965Sjdp		  if (ch2 == EOF || ch2 == '/')
33965Sjdp		    break;
33965Sjdp
33965Sjdp		  /* This UNGET will ensure that we count newlines
33965Sjdp                     correctly.  */
33965Sjdp		  UNGET (ch2);
33965Sjdp		}
33965Sjdp
33965Sjdp	      if (ch2 == EOF)
60484Sobrien		as_warn (_("end of file in multiline comment"));
33965Sjdp
33965Sjdp	      ch = ' ';
33965Sjdp	      goto recycle;
33965Sjdp	    }
77298Sobrien#ifdef DOUBLESLASH_LINE_COMMENTS
77298Sobrien	  else if (ch2 == '/')
77298Sobrien	    {
77298Sobrien	      do
77298Sobrien		{
77298Sobrien		  ch = GET ();
77298Sobrien		}
77298Sobrien	      while (ch != EOF && !IS_NEWLINE (ch));
77298Sobrien	      if (ch == EOF)
77298Sobrien		as_warn ("end of file in comment; newline inserted");
77298Sobrien	      state = 0;
77298Sobrien	      PUT ('\n');
77298Sobrien	      break;
77298Sobrien	    }
77298Sobrien#endif
33965Sjdp	  else
33965Sjdp	    {
33965Sjdp	      if (ch2 != EOF)
33965Sjdp		UNGET (ch2);
33965Sjdp	      if (state == 9 || state == 10)
33965Sjdp		state = 3;
33965Sjdp	      PUT (ch);
33965Sjdp	    }
33965Sjdp	  break;
33965Sjdp
33965Sjdp	case LEX_IS_STRINGQUOTE:
33965Sjdp	  if (state == 10)
33965Sjdp	    {
33965Sjdp	      /* Preserve the whitespace in foo "bar" */
33965Sjdp	      UNGET (ch);
33965Sjdp	      state = 3;
33965Sjdp	      PUT (' ');
33965Sjdp
33965Sjdp	      /* PUT didn't jump out.  We could just break, but we
33965Sjdp                 know what will happen, so optimize a bit.  */
33965Sjdp	      ch = GET ();
33965Sjdp	      old_state = 3;
33965Sjdp	    }
33965Sjdp	  else if (state == 9)
33965Sjdp	    old_state = 3;
33965Sjdp	  else
33965Sjdp	    old_state = state;
33965Sjdp	  state = 5;
33965Sjdp	  PUT (ch);
33965Sjdp	  break;
33965Sjdp
33965Sjdp#ifndef IEEE_STYLE
33965Sjdp	case LEX_IS_ONECHAR_QUOTE:
33965Sjdp	  if (state == 10)
33965Sjdp	    {
33965Sjdp	      /* Preserve the whitespace in foo 'b' */
33965Sjdp	      UNGET (ch);
33965Sjdp	      state = 3;
33965Sjdp	      PUT (' ');
33965Sjdp	      break;
33965Sjdp	    }
33965Sjdp	  ch = GET ();
33965Sjdp	  if (ch == EOF)
33965Sjdp	    {
60484Sobrien	      as_warn (_("end of file after a one-character quote; \\0 inserted"));
33965Sjdp	      ch = 0;
33965Sjdp	    }
33965Sjdp	  if (ch == '\\')
33965Sjdp	    {
33965Sjdp	      ch = GET ();
33965Sjdp	      if (ch == EOF)
33965Sjdp		{
60484Sobrien		  as_warn (_("end of file in escape character"));
33965Sjdp		  ch = '\\';
33965Sjdp		}
33965Sjdp	      else
33965Sjdp		ch = process_escape (ch);
33965Sjdp	    }
33965Sjdp	  sprintf (out_buf, "%d", (int) (unsigned char) ch);
33965Sjdp
33965Sjdp	  /* None of these 'x constants for us.  We want 'x'.  */
33965Sjdp	  if ((ch = GET ()) != '\'')
33965Sjdp	    {
33965Sjdp#ifdef REQUIRE_CHAR_CLOSE_QUOTE
60484Sobrien	      as_warn (_("Missing close quote: (assumed)"));
33965Sjdp#else
33965Sjdp	      if (ch != EOF)
33965Sjdp		UNGET (ch);
33965Sjdp#endif
33965Sjdp	    }
33965Sjdp	  if (strlen (out_buf) == 1)
33965Sjdp	    {
33965Sjdp	      PUT (out_buf[0]);
33965Sjdp	      break;
33965Sjdp	    }
33965Sjdp	  if (state == 9)
33965Sjdp	    old_state = 3;
33965Sjdp	  else
33965Sjdp	    old_state = state;
33965Sjdp	  state = -1;
33965Sjdp	  out_string = out_buf;
33965Sjdp	  PUT (*out_string++);
33965Sjdp	  break;
33965Sjdp#endif
33965Sjdp
33965Sjdp	case LEX_IS_COLON:
60484Sobrien#ifdef KEEP_WHITE_AROUND_COLON
77298Sobrien	  state = 9;
60484Sobrien#else
33965Sjdp	  if (state == 9 || state == 10)
33965Sjdp	    state = 3;
33965Sjdp	  else if (state != 3)
33965Sjdp	    state = 1;
60484Sobrien#endif
33965Sjdp	  PUT (ch);
33965Sjdp	  break;
33965Sjdp
33965Sjdp	case LEX_IS_NEWLINE:
33965Sjdp	  /* Roll out a bunch of newlines from inside comments, etc.  */
33965Sjdp	  if (add_newlines)
33965Sjdp	    {
33965Sjdp	      --add_newlines;
33965Sjdp	      UNGET (ch);
33965Sjdp	    }
77298Sobrien	  /* Fall through.  */
33965Sjdp
33965Sjdp	case LEX_IS_LINE_SEPARATOR:
33965Sjdp	  state = 0;
33965Sjdp	  PUT (ch);
33965Sjdp	  break;
33965Sjdp
38889Sjdp#ifdef TC_V850
38889Sjdp	case LEX_IS_DOUBLEDASH_1ST:
77298Sobrien	  ch2 = GET ();
38889Sjdp	  if (ch2 != '-')
38889Sjdp	    {
38889Sjdp	      UNGET (ch2);
38889Sjdp	      goto de_fault;
38889Sjdp	    }
77298Sobrien	  /* Read and skip to end of line.  */
38889Sjdp	  do
38889Sjdp	    {
38889Sjdp	      ch = GET ();
38889Sjdp	    }
38889Sjdp	  while (ch != EOF && ch != '\n');
38889Sjdp	  if (ch == EOF)
38889Sjdp	    {
60484Sobrien	      as_warn (_("end of file in comment; newline inserted"));
38889Sjdp	    }
38889Sjdp	  state = 0;
38889Sjdp	  PUT ('\n');
38889Sjdp	  break;
77298Sobrien#endif
60484Sobrien#ifdef DOUBLEBAR_PARALLEL
38889Sjdp	case LEX_IS_DOUBLEBAR_1ST:
77298Sobrien	  ch2 = GET ();
38889Sjdp	  if (ch2 != '|')
38889Sjdp	    {
38889Sjdp	      UNGET (ch2);
38889Sjdp	      goto de_fault;
38889Sjdp	    }
38889Sjdp	  /* Reset back to state 1 and pretend that we are parsing a line from
38889Sjdp	     just after the first white space.  */
38889Sjdp	  state = 1;
38889Sjdp	  PUT ('|');
38889Sjdp	  PUT ('|');
38889Sjdp	  break;
77298Sobrien#endif
33965Sjdp	case LEX_IS_LINE_COMMENT_START:
33965Sjdp	  /* FIXME-someday: The two character comment stuff was badly
33965Sjdp	     thought out.  On i386, we want '/' as line comment start
33965Sjdp	     AND we want C style comments.  hence this hack.  The
33965Sjdp	     whole lexical process should be reworked.  xoxorich.  */
33965Sjdp	  if (ch == '/')
33965Sjdp	    {
33965Sjdp	      ch2 = GET ();
33965Sjdp	      if (ch2 == '*')
33965Sjdp		{
33965Sjdp		  old_state = 3;
33965Sjdp		  state = -2;
33965Sjdp		  break;
33965Sjdp		}
33965Sjdp	      else
33965Sjdp		{
33965Sjdp		  UNGET (ch2);
33965Sjdp		}
33965Sjdp	    } /* bad hack */
33965Sjdp
33965Sjdp	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
33965Sjdp	    {
33965Sjdp	      int startch;
33965Sjdp
33965Sjdp	      startch = ch;
33965Sjdp
33965Sjdp	      do
33965Sjdp		{
33965Sjdp		  ch = GET ();
33965Sjdp		}
33965Sjdp	      while (ch != EOF && IS_WHITESPACE (ch));
33965Sjdp	      if (ch == EOF)
33965Sjdp		{
60484Sobrien		  as_warn (_("end of file in comment; newline inserted"));
33965Sjdp		  PUT ('\n');
33965Sjdp		  break;
33965Sjdp		}
33965Sjdp	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
33965Sjdp		{
33965Sjdp		  /* Not a cpp line.  */
33965Sjdp		  while (ch != EOF && !IS_NEWLINE (ch))
33965Sjdp		    ch = GET ();
33965Sjdp		  if (ch == EOF)
60484Sobrien		    as_warn (_("EOF in Comment: Newline inserted"));
33965Sjdp		  state = 0;
33965Sjdp		  PUT ('\n');
33965Sjdp		  break;
33965Sjdp		}
77298Sobrien	      /* Looks like `# 123 "filename"' from cpp.  */
33965Sjdp	      UNGET (ch);
33965Sjdp	      old_state = 4;
33965Sjdp	      state = -1;
33965Sjdp	      if (scrub_m68k_mri)
33965Sjdp		out_string = "\tappline ";
33965Sjdp	      else
33965Sjdp		out_string = "\t.appline ";
33965Sjdp	      PUT (*out_string++);
33965Sjdp	      break;
33965Sjdp	    }
33965Sjdp
38889Sjdp#ifdef TC_D10V
38889Sjdp	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
38889Sjdp	     Trap is the only short insn that has a first operand that is
38889Sjdp	     neither register nor label.
38889Sjdp	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
77298Sobrien	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
77298Sobrien	     already LEX_IS_LINE_COMMENT_START.  However, it is the
77298Sobrien	     only character in line_comment_chars for d10v, hence we
77298Sobrien	     can recognize it as such.  */
38889Sjdp	  /* An alternative approach would be to reset the state to 1 when
38889Sjdp	     we see '||', '<'- or '->', but that seems to be overkill.  */
77298Sobrien	  if (state == 10)
77298Sobrien	    PUT (' ');
38889Sjdp#endif
33965Sjdp	  /* We have a line comment character which is not at the
33965Sjdp	     start of a line.  If this is also a normal comment
33965Sjdp	     character, fall through.  Otherwise treat it as a default
33965Sjdp	     character.  */
33965Sjdp	  if (strchr (tc_comment_chars, ch) == NULL
33965Sjdp	      && (! scrub_m68k_mri
33965Sjdp		  || (ch != '!' && ch != '*')))
33965Sjdp	    goto de_fault;
33965Sjdp	  if (scrub_m68k_mri
33965Sjdp	      && (ch == '!' || ch == '*' || ch == '#')
33965Sjdp	      && state != 1
33965Sjdp	      && state != 10)
33965Sjdp	    goto de_fault;
33965Sjdp	  /* Fall through.  */
33965Sjdp	case LEX_IS_COMMENT_START:
60484Sobrien#if defined TC_ARM && defined OBJ_ELF
60484Sobrien	  /* On the ARM, `@' is the comment character.
60484Sobrien	     Unfortunately this is also a special character in ELF .symver
77298Sobrien	     directives (and .type, though we deal with those another way).
77298Sobrien	     So we check if this line is such a directive, and treat
77298Sobrien	     the character as default if so.  This is a hack.  */
60484Sobrien	  if ((symver_state != NULL) && (*symver_state == 0))
60484Sobrien	    goto de_fault;
60484Sobrien#endif
77298Sobrien#ifdef WARN_COMMENTS
77298Sobrien	  if (!found_comment)
77298Sobrien	    as_where (&found_comment_file, &found_comment);
77298Sobrien#endif
33965Sjdp	  do
33965Sjdp	    {
33965Sjdp	      ch = GET ();
33965Sjdp	    }
33965Sjdp	  while (ch != EOF && !IS_NEWLINE (ch));
33965Sjdp	  if (ch == EOF)
60484Sobrien	    as_warn (_("end of file in comment; newline inserted"));
33965Sjdp	  state = 0;
33965Sjdp	  PUT ('\n');
33965Sjdp	  break;
33965Sjdp
33965Sjdp	case LEX_IS_SYMBOL_COMPONENT:
33965Sjdp	  if (state == 10)
33965Sjdp	    {
33965Sjdp	      /* This is a symbol character following another symbol
33965Sjdp		 character, with whitespace in between.  We skipped
33965Sjdp		 the whitespace earlier, so output it now.  */
33965Sjdp	      UNGET (ch);
33965Sjdp	      state = 3;
33965Sjdp	      PUT (' ');
33965Sjdp	      break;
33965Sjdp	    }
33965Sjdp
33965Sjdp	  if (state == 3)
33965Sjdp	    state = 9;
33965Sjdp
33965Sjdp	  /* This is a common case.  Quickly copy CH and all the
33965Sjdp             following symbol component or normal characters.  */
60484Sobrien	  if (to + 1 < toend
60484Sobrien	      && mri_state == NULL
60484Sobrien#if defined TC_ARM && defined OBJ_ELF
60484Sobrien	      && symver_state == NULL
60484Sobrien#endif
60484Sobrien	      )
33965Sjdp	    {
33965Sjdp	      char *s;
33965Sjdp	      int len;
33965Sjdp
33965Sjdp	      for (s = from; s < fromend; s++)
33965Sjdp		{
33965Sjdp		  int type;
33965Sjdp
77298Sobrien		  ch2 = *(unsigned char *) s;
33965Sjdp		  type = lex[ch2];
33965Sjdp		  if (type != 0
33965Sjdp		      && type != LEX_IS_SYMBOL_COMPONENT)
33965Sjdp		    break;
33965Sjdp		}
33965Sjdp	      if (s > from)
33965Sjdp		{
33965Sjdp		  /* Handle the last character normally, for
33965Sjdp                     simplicity.  */
33965Sjdp		  --s;
33965Sjdp		}
33965Sjdp	      len = s - from;
33965Sjdp	      if (len > (toend - to) - 1)
33965Sjdp		len = (toend - to) - 1;
33965Sjdp	      if (len > 0)
33965Sjdp		{
33965Sjdp		  PUT (ch);
33965Sjdp		  if (len > 8)
33965Sjdp		    {
33965Sjdp		      memcpy (to, from, len);
33965Sjdp		      to += len;
33965Sjdp		      from += len;
33965Sjdp		    }
33965Sjdp		  else
33965Sjdp		    {
33965Sjdp		      switch (len)
33965Sjdp			{
33965Sjdp			case 8: *to++ = *from++;
33965Sjdp			case 7: *to++ = *from++;
33965Sjdp			case 6: *to++ = *from++;
33965Sjdp			case 5: *to++ = *from++;
33965Sjdp			case 4: *to++ = *from++;
33965Sjdp			case 3: *to++ = *from++;
33965Sjdp			case 2: *to++ = *from++;
33965Sjdp			case 1: *to++ = *from++;
33965Sjdp			}
77298Sobrien		    }
33965Sjdp		  ch = GET ();
33965Sjdp		}
33965Sjdp	    }
33965Sjdp
33965Sjdp	  /* Fall through.  */
33965Sjdp	default:
33965Sjdp	de_fault:
33965Sjdp	  /* Some relatively `normal' character.  */
33965Sjdp	  if (state == 0)
33965Sjdp	    {
33965Sjdp	      state = 11;	/* Now seeing label definition */
33965Sjdp	    }
33965Sjdp	  else if (state == 1)
33965Sjdp	    {
33965Sjdp	      state = 2;	/* Ditto */
33965Sjdp	    }
33965Sjdp	  else if (state == 9)
33965Sjdp	    {
33965Sjdp	      if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
33965Sjdp		state = 3;
33965Sjdp	    }
33965Sjdp	  else if (state == 10)
33965Sjdp	    {
60484Sobrien	      if (ch == '\\')
60484Sobrien		{
60484Sobrien		  /* Special handling for backslash: a backslash may
60484Sobrien		     be the beginning of a formal parameter (of a
60484Sobrien		     macro) following another symbol character, with
60484Sobrien		     whitespace in between.  If that is the case, we
60484Sobrien		     output a space before the parameter.  Strictly
60484Sobrien		     speaking, correct handling depends upon what the
60484Sobrien		     macro parameter expands into; if the parameter
60484Sobrien		     expands into something which does not start with
60484Sobrien		     an operand character, then we don't want to keep
60484Sobrien		     the space.  We don't have enough information to
60484Sobrien		     make the right choice, so here we are making the
60484Sobrien		     choice which is more likely to be correct.  */
60484Sobrien		  PUT (' ');
60484Sobrien		}
60484Sobrien
33965Sjdp	      state = 3;
33965Sjdp	    }
33965Sjdp	  PUT (ch);
33965Sjdp	  break;
33965Sjdp	}
33965Sjdp    }
33965Sjdp
33965Sjdp  /*NOTREACHED*/
33965Sjdp
33965Sjdp fromeof:
33965Sjdp  /* We have reached the end of the input.  */
33965Sjdp  return to - tostart;
33965Sjdp
33965Sjdp tofull:
33965Sjdp  /* The output buffer is full.  Save any input we have not yet
33965Sjdp     processed.  */
33965Sjdp  if (fromend > from)
33965Sjdp    {
60484Sobrien      saved_input = from;
33965Sjdp      saved_input_len = fromend - from;
33965Sjdp    }
33965Sjdp  else
60484Sobrien    saved_input = NULL;
60484Sobrien
33965Sjdp  return to - tostart;
33965Sjdp}
33965Sjdp
33965Sjdp/* end of app.c */