binutils/gas/app.c

33965Sjdp/* This is the Assembler Pre-Processor
33965Sjdp   Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 1997
33965Sjdp   Free Software Foundation, Inc.
33965Sjdp
33965Sjdp   This file is part of GAS, the GNU Assembler.
33965Sjdp
33965Sjdp   GAS is free software; you can redistribute it and/or modify
33965Sjdp   it under the terms of the GNU General Public License as published by
33965Sjdp   the Free Software Foundation; either version 2, or (at your option)
33965Sjdp   any later version.
33965Sjdp
33965Sjdp   GAS is distributed in the hope that it will be useful,
33965Sjdp   but WITHOUT ANY WARRANTY; without even the implied warranty of
33965Sjdp   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
33965Sjdp   GNU General Public License for more details.
33965Sjdp
33965Sjdp   You should have received a copy of the GNU General Public License
33965Sjdp   along with GAS; see the file COPYING.  If not, write to the Free
33965Sjdp   Software Foundation, 59 Temple Place - Suite 330, Boston, MA
33965Sjdp   02111-1307, USA.  */
33965Sjdp
33965Sjdp/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
33965Sjdp/* App, the assembler pre-processor.  This pre-processor strips out excess
33965Sjdp   spaces, turns single-quoted characters into a decimal constant, and turns
33965Sjdp   # <number> <filename> <garbage> into a .line <number>\n.file <filename>
33965Sjdp   pair.  This needs better error-handling.  */
33965Sjdp
33965Sjdp#include <stdio.h>
33965Sjdp#include "as.h"			/* For BAD_CASE() only */
33965Sjdp
33965Sjdp#if (__STDC__ != 1)
33965Sjdp#ifndef const
33965Sjdp#define const  /* empty */
33965Sjdp#endif
33965Sjdp#endif
33965Sjdp
33965Sjdp/* Whether we are scrubbing in m68k MRI mode.  This is different from
33965Sjdp   flag_m68k_mri, because the two flags will be affected by the .mri
33965Sjdp   pseudo-op at different times.  */
33965Sjdpstatic int scrub_m68k_mri;
33965Sjdp
33965Sjdp/* The pseudo-op which switches in and out of MRI mode.  See the
33965Sjdp   comment in do_scrub_chars.  */
33965Sjdpstatic const char mri_pseudo[] = ".mri 0";
33965Sjdp
33965Sjdpstatic char lex[256];
33965Sjdpstatic const char symbol_chars[] =
33965Sjdp"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
33965Sjdp
33965Sjdp#define LEX_IS_SYMBOL_COMPONENT		1
33965Sjdp#define LEX_IS_WHITESPACE		2
33965Sjdp#define LEX_IS_LINE_SEPARATOR		3
33965Sjdp#define LEX_IS_COMMENT_START		4
33965Sjdp#define LEX_IS_LINE_COMMENT_START	5
33965Sjdp#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
33965Sjdp#define	LEX_IS_STRINGQUOTE		8
33965Sjdp#define	LEX_IS_COLON			9
33965Sjdp#define	LEX_IS_NEWLINE			10
33965Sjdp#define	LEX_IS_ONECHAR_QUOTE		11
33965Sjdp#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
33965Sjdp#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
33965Sjdp#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
33965Sjdp#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
33965Sjdp#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
33965Sjdp#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
33965Sjdp
33965Sjdpstatic int process_escape PARAMS ((int));
33965Sjdp
33965Sjdp/* FIXME-soon: The entire lexer/parser thingy should be
33965Sjdp   built statically at compile time rather than dynamically
33965Sjdp   each and every time the assembler is run.  xoxorich. */
33965Sjdp
33965Sjdpvoid
33965Sjdpdo_scrub_begin (m68k_mri)
33965Sjdp     int m68k_mri;
33965Sjdp{
33965Sjdp  const char *p;
33965Sjdp
33965Sjdp  scrub_m68k_mri = m68k_mri;
33965Sjdp
33965Sjdp  lex[' '] = LEX_IS_WHITESPACE;
33965Sjdp  lex['\t'] = LEX_IS_WHITESPACE;
33965Sjdp  lex['\n'] = LEX_IS_NEWLINE;
33965Sjdp  lex[';'] = LEX_IS_LINE_SEPARATOR;
33965Sjdp  lex[':'] = LEX_IS_COLON;
33965Sjdp
33965Sjdp  if (! m68k_mri)
33965Sjdp    {
33965Sjdp      lex['"'] = LEX_IS_STRINGQUOTE;
33965Sjdp
33965Sjdp#ifndef TC_HPPA
33965Sjdp      lex['\''] = LEX_IS_ONECHAR_QUOTE;
33965Sjdp#endif
33965Sjdp
33965Sjdp#ifdef SINGLE_QUOTE_STRINGS
33965Sjdp      lex['\''] = LEX_IS_STRINGQUOTE;
33965Sjdp#endif
33965Sjdp    }
33965Sjdp
33965Sjdp  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
33965Sjdp     in state 5 of do_scrub_chars must be changed.  */
33965Sjdp
33965Sjdp  /* Note that these override the previous defaults, e.g. if ';' is a
33965Sjdp     comment char, then it isn't a line separator.  */
33965Sjdp  for (p = symbol_chars; *p; ++p)
33965Sjdp    {
33965Sjdp      lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
33965Sjdp    }				/* declare symbol characters */
33965Sjdp
33965Sjdp  /* The m68k backend wants to be able to change comment_chars.  */
33965Sjdp#ifndef tc_comment_chars
33965Sjdp#define tc_comment_chars comment_chars
33965Sjdp#endif
33965Sjdp  for (p = tc_comment_chars; *p; p++)
33965Sjdp    {
33965Sjdp      lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
33965Sjdp    }				/* declare comment chars */
33965Sjdp
33965Sjdp  for (p = line_comment_chars; *p; p++)
33965Sjdp    {
33965Sjdp      lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
33965Sjdp    }				/* declare line comment chars */
33965Sjdp
33965Sjdp  for (p = line_separator_chars; *p; p++)
33965Sjdp    {
33965Sjdp      lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
33965Sjdp    }				/* declare line separators */
33965Sjdp
33965Sjdp  /* Only allow slash-star comments if slash is not in use.
33965Sjdp     FIXME: This isn't right.  We should always permit them.  */
33965Sjdp  if (lex['/'] == 0)
33965Sjdp    {
33965Sjdp      lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
33965Sjdp    }
33965Sjdp
33965Sjdp  if (m68k_mri)
33965Sjdp    {
33965Sjdp      lex['\''] = LEX_IS_STRINGQUOTE;
33965Sjdp      lex[';'] = LEX_IS_COMMENT_START;
33965Sjdp      lex['*'] = LEX_IS_LINE_COMMENT_START;
33965Sjdp      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
33965Sjdp         then it can't be used in an expression.  */
33965Sjdp      lex['!'] = LEX_IS_LINE_COMMENT_START;
33965Sjdp    }
33965Sjdp}				/* do_scrub_begin() */
33965Sjdp
33965Sjdp/* Saved state of the scrubber */
33965Sjdpstatic int state;
33965Sjdpstatic int old_state;
33965Sjdpstatic char *out_string;
33965Sjdpstatic char out_buf[20];
33965Sjdpstatic int add_newlines;
33965Sjdpstatic char *saved_input;
33965Sjdpstatic int saved_input_len;
33965Sjdpstatic const char *mri_state;
33965Sjdpstatic char mri_last_ch;
33965Sjdp
33965Sjdp/* Data structure for saving the state of app across #include's.  Note that
33965Sjdp   app is called asynchronously to the parsing of the .include's, so our
33965Sjdp   state at the time .include is interpreted is completely unrelated.
33965Sjdp   That's why we have to save it all.  */
33965Sjdp
33965Sjdpstruct app_save
33965Sjdp  {
33965Sjdp    int state;
33965Sjdp    int old_state;
33965Sjdp    char *out_string;
33965Sjdp    char out_buf[sizeof (out_buf)];
33965Sjdp    int add_newlines;
33965Sjdp    char *saved_input;
33965Sjdp    int saved_input_len;
33965Sjdp    int scrub_m68k_mri;
33965Sjdp    const char *mri_state;
33965Sjdp    char mri_last_ch;
33965Sjdp  };
33965Sjdp
33965Sjdpchar *
33965Sjdpapp_push ()
33965Sjdp{
33965Sjdp  register struct app_save *saved;
33965Sjdp
33965Sjdp  saved = (struct app_save *) xmalloc (sizeof (*saved));
33965Sjdp  saved->state = state;
33965Sjdp  saved->old_state = old_state;
33965Sjdp  saved->out_string = out_string;
33965Sjdp  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
33965Sjdp  saved->add_newlines = add_newlines;
33965Sjdp  saved->saved_input = saved_input;
33965Sjdp  saved->saved_input_len = saved_input_len;
33965Sjdp  saved->scrub_m68k_mri = scrub_m68k_mri;
33965Sjdp  saved->mri_state = mri_state;
33965Sjdp  saved->mri_last_ch = mri_last_ch;
33965Sjdp
33965Sjdp  /* do_scrub_begin() is not useful, just wastes time. */
33965Sjdp
33965Sjdp  state = 0;
33965Sjdp  saved_input = NULL;
33965Sjdp
33965Sjdp  return (char *) saved;
33965Sjdp}
33965Sjdp
33965Sjdpvoid
33965Sjdpapp_pop (arg)
33965Sjdp     char *arg;
33965Sjdp{
33965Sjdp  register struct app_save *saved = (struct app_save *) arg;
33965Sjdp
33965Sjdp  /* There is no do_scrub_end (). */
33965Sjdp  state = saved->state;
33965Sjdp  old_state = saved->old_state;
33965Sjdp  out_string = saved->out_string;
33965Sjdp  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
33965Sjdp  add_newlines = saved->add_newlines;
33965Sjdp  saved_input = saved->saved_input;
33965Sjdp  saved_input_len = saved->saved_input_len;
33965Sjdp  scrub_m68k_mri = saved->scrub_m68k_mri;
33965Sjdp  mri_state = saved->mri_state;
33965Sjdp  mri_last_ch = saved->mri_last_ch;
33965Sjdp
33965Sjdp  free (arg);
33965Sjdp}				/* app_pop() */
33965Sjdp
33965Sjdp/* @@ This assumes that \n &c are the same on host and target.  This is not
33965Sjdp   necessarily true.  */
33965Sjdpstatic int
33965Sjdpprocess_escape (ch)
33965Sjdp     int ch;
33965Sjdp{
33965Sjdp  switch (ch)
33965Sjdp    {
33965Sjdp    case 'b':
33965Sjdp      return '\b';
33965Sjdp    case 'f':
33965Sjdp      return '\f';
33965Sjdp    case 'n':
33965Sjdp      return '\n';
33965Sjdp    case 'r':
33965Sjdp      return '\r';
33965Sjdp    case 't':
33965Sjdp      return '\t';
33965Sjdp    case '\'':
33965Sjdp      return '\'';
33965Sjdp    case '"':
33965Sjdp      return '\"';
33965Sjdp    default:
33965Sjdp      return ch;
33965Sjdp    }
33965Sjdp}
33965Sjdp
33965Sjdp/* This function is called to process input characters.  The GET
33965Sjdp   parameter is used to retrieve more input characters.  GET should
33965Sjdp   set its parameter to point to a buffer, and return the length of
33965Sjdp   the buffer; it should return 0 at end of file.  The scrubbed output
33965Sjdp   characters are put into the buffer starting at TOSTART; the TOSTART
33965Sjdp   buffer is TOLEN bytes in length.  The function returns the number
33965Sjdp   of scrubbed characters put into TOSTART.  This will be TOLEN unless
33965Sjdp   end of file was seen.  This function is arranged as a state
33965Sjdp   machine, and saves its state so that it may return at any point.
33965Sjdp   This is the way the old code used to work.  */
33965Sjdp
33965Sjdpint
33965Sjdpdo_scrub_chars (get, tostart, tolen)
33965Sjdp     int (*get) PARAMS ((char **));
33965Sjdp     char *tostart;
33965Sjdp     int tolen;
33965Sjdp{
33965Sjdp  char *to = tostart;
33965Sjdp  char *toend = tostart + tolen;
33965Sjdp  char *from;
33965Sjdp  char *fromend;
33965Sjdp  int fromlen;
33965Sjdp  register int ch, ch2 = 0;
33965Sjdp
33965Sjdp  /*State 0: beginning of normal line
33965Sjdp	  1: After first whitespace on line (flush more white)
33965Sjdp	  2: After first non-white (opcode) on line (keep 1white)
33965Sjdp	  3: after second white on line (into operands) (flush white)
33965Sjdp	  4: after putting out a .line, put out digits
33965Sjdp	  5: parsing a string, then go to old-state
33965Sjdp	  6: putting out \ escape in a "d string.
33965Sjdp	  7: After putting out a .appfile, put out string.
33965Sjdp	  8: After putting out a .appfile string, flush until newline.
33965Sjdp	  9: After seeing symbol char in state 3 (keep 1white after symchar)
33965Sjdp	 10: After seeing whitespace in state 9 (keep white before symchar)
33965Sjdp	 11: After seeing a symbol character in state 0 (eg a label definition)
33965Sjdp	 -1: output string in out_string and go to the state in old_state
33965Sjdp	 -2: flush text until a '*' '/' is seen, then go to state old_state
33965Sjdp	  */
33965Sjdp
33965Sjdp  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
33965Sjdp     constructs like ``.loc 1 20''.  This was turning into ``.loc
33965Sjdp     120''.  States 9 and 10 ensure that a space is never dropped in
33965Sjdp     between characters which could appear in a identifier.  Ian
33965Sjdp     Taylor, ian@cygnus.com.
33965Sjdp
33965Sjdp     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
33965Sjdp     correctly on the PA (and any other target where colons are optional).
33965Sjdp     Jeff Law, law@cs.utah.edu.  */
33965Sjdp
33965Sjdp  /* This macro gets the next input character.  */
33965Sjdp
33965Sjdp#define GET()				\
33965Sjdp  (from < fromend			\
33965Sjdp   ? *from++				\
33965Sjdp   : ((saved_input != NULL		\
33965Sjdp       ? (free (saved_input),		\
33965Sjdp	  saved_input = NULL,		\
33965Sjdp	  0)				\
33965Sjdp       : 0),				\
33965Sjdp      fromlen = (*get) (&from),		\
33965Sjdp      fromend = from + fromlen,		\
33965Sjdp      (fromlen == 0			\
33965Sjdp       ? EOF				\
33965Sjdp       : *from++)))
33965Sjdp
33965Sjdp  /* This macro pushes a character back on the input stream.  */
33965Sjdp
33965Sjdp#define UNGET(uch) (*--from = (uch))
33965Sjdp
33965Sjdp  /* This macro puts a character into the output buffer.  If this
33965Sjdp     character fills the output buffer, this macro jumps to the label
33965Sjdp     TOFULL.  We use this rather ugly approach because we need to
33965Sjdp     handle two different termination conditions: EOF on the input
33965Sjdp     stream, and a full output buffer.  It would be simpler if we
33965Sjdp     always read in the entire input stream before processing it, but
33965Sjdp     I don't want to make such a significant change to the assembler's
33965Sjdp     memory usage.  */
33965Sjdp
33965Sjdp#define PUT(pch)			\
33965Sjdp  do					\
33965Sjdp    {					\
33965Sjdp      *to++ = (pch);			\
33965Sjdp      if (to >= toend)			\
33965Sjdp        goto tofull;			\
33965Sjdp    }					\
33965Sjdp  while (0)
33965Sjdp
33965Sjdp  if (saved_input != NULL)
33965Sjdp    {
33965Sjdp      from = saved_input;
33965Sjdp      fromend = from + saved_input_len;
33965Sjdp    }
33965Sjdp  else
33965Sjdp    {
33965Sjdp      fromlen = (*get) (&from);
33965Sjdp      if (fromlen == 0)
33965Sjdp	return 0;
33965Sjdp      fromend = from + fromlen;
33965Sjdp    }
33965Sjdp
33965Sjdp  while (1)
33965Sjdp    {
33965Sjdp      /* The cases in this switch end with continue, in order to
33965Sjdp         branch back to the top of this while loop and generate the
33965Sjdp         next output character in the appropriate state.  */
33965Sjdp      switch (state)
33965Sjdp	{
33965Sjdp	case -1:
33965Sjdp	  ch = *out_string++;
33965Sjdp	  if (*out_string == '\0')
33965Sjdp	    {
33965Sjdp	      state = old_state;
33965Sjdp	      old_state = 3;
33965Sjdp	    }
33965Sjdp	  PUT (ch);
33965Sjdp	  continue;
33965Sjdp
33965Sjdp	case -2:
33965Sjdp	  for (;;)
33965Sjdp	    {
33965Sjdp	      do
33965Sjdp		{
33965Sjdp		  ch = GET ();
33965Sjdp
33965Sjdp		  if (ch == EOF)
33965Sjdp		    {
33965Sjdp		      as_warn ("end of file in comment");
33965Sjdp		      goto fromeof;
33965Sjdp		    }
33965Sjdp
33965Sjdp		  if (ch == '\n')
33965Sjdp		    PUT ('\n');
33965Sjdp		}
33965Sjdp	      while (ch != '*');
33965Sjdp
33965Sjdp	      while ((ch = GET ()) == '*')
33965Sjdp		;
33965Sjdp
33965Sjdp	      if (ch == EOF)
33965Sjdp		{
33965Sjdp		  as_warn ("end of file in comment");
33965Sjdp		  goto fromeof;
33965Sjdp		}
33965Sjdp
33965Sjdp	      if (ch == '/')
33965Sjdp		break;
33965Sjdp
33965Sjdp	      UNGET (ch);
33965Sjdp	    }
33965Sjdp
33965Sjdp	  state = old_state;
33965Sjdp	  UNGET (' ');
33965Sjdp	  continue;
33965Sjdp
33965Sjdp	case 4:
33965Sjdp	  ch = GET ();
33965Sjdp	  if (ch == EOF)
33965Sjdp	    goto fromeof;
33965Sjdp	  else if (ch >= '0' && ch <= '9')
33965Sjdp	    PUT (ch);
33965Sjdp	  else
33965Sjdp	    {
33965Sjdp	      while (ch != EOF && IS_WHITESPACE (ch))
33965Sjdp		ch = GET ();
33965Sjdp	      if (ch == '"')
33965Sjdp		{
33965Sjdp		  UNGET (ch);
33965Sjdp		  if (scrub_m68k_mri)
33965Sjdp		    out_string = "\n\tappfile ";
33965Sjdp		  else
33965Sjdp		    out_string = "\n\t.appfile ";
33965Sjdp		  old_state = 7;
33965Sjdp		  state = -1;
33965Sjdp		  PUT (*out_string++);
33965Sjdp		}
33965Sjdp	      else
33965Sjdp		{
33965Sjdp		  while (ch != EOF && ch != '\n')
33965Sjdp		    ch = GET ();
33965Sjdp		  state = 0;
33965Sjdp		  PUT (ch);
33965Sjdp		}
33965Sjdp	    }
33965Sjdp	  continue;
33965Sjdp
33965Sjdp	case 5:
33965Sjdp	  /* We are going to copy everything up to a quote character,
33965Sjdp             with special handling for a backslash.  We try to
33965Sjdp             optimize the copying in the simple case without using the
33965Sjdp             GET and PUT macros.  */
33965Sjdp	  {
33965Sjdp	    char *s;
33965Sjdp	    int len;
33965Sjdp
33965Sjdp	    for (s = from; s < fromend; s++)
33965Sjdp	      {
33965Sjdp		ch = *s;
33965Sjdp		/* This condition must be changed if the type of any
33965Sjdp                   other character can be LEX_IS_STRINGQUOTE.  */
33965Sjdp		if (ch == '\\'
33965Sjdp		    || ch == '"'
33965Sjdp		    || ch == '\''
33965Sjdp		    || ch == '\n')
33965Sjdp		  break;
33965Sjdp	      }
33965Sjdp	    len = s - from;
33965Sjdp	    if (len > toend - to)
33965Sjdp	      len = toend - to;
33965Sjdp	    if (len > 0)
33965Sjdp	      {
33965Sjdp		memcpy (to, from, len);
33965Sjdp		to += len;
33965Sjdp		from += len;
33965Sjdp	      }
33965Sjdp	  }
33965Sjdp
33965Sjdp	  ch = GET ();
33965Sjdp	  if (ch == EOF)
33965Sjdp	    {
33965Sjdp	      as_warn ("end of file in string: inserted '\"'");
33965Sjdp	      state = old_state;
33965Sjdp	      UNGET ('\n');
33965Sjdp	      PUT ('"');
33965Sjdp	    }
33965Sjdp	  else if (lex[ch] == LEX_IS_STRINGQUOTE)
33965Sjdp	    {
33965Sjdp	      state = old_state;
33965Sjdp	      PUT (ch);
33965Sjdp	    }
33965Sjdp#ifndef NO_STRING_ESCAPES
33965Sjdp	  else if (ch == '\\')
33965Sjdp	    {
33965Sjdp	      state = 6;
33965Sjdp	      PUT (ch);
33965Sjdp	    }
33965Sjdp#endif
33965Sjdp	  else if (scrub_m68k_mri && ch == '\n')
33965Sjdp	    {
33965Sjdp	      /* Just quietly terminate the string.  This permits lines like
33965Sjdp		   bne	label	loop if we haven't reach end yet
33965Sjdp		 */
33965Sjdp	      state = old_state;
33965Sjdp	      UNGET (ch);
33965Sjdp	      PUT ('\'');
33965Sjdp	    }
33965Sjdp	  else
33965Sjdp	    {
33965Sjdp	      PUT (ch);
33965Sjdp	    }
33965Sjdp	  continue;
33965Sjdp
33965Sjdp	case 6:
33965Sjdp	  state = 5;
33965Sjdp	  ch = GET ();
33965Sjdp	  switch (ch)
33965Sjdp	    {
33965Sjdp	      /* Handle strings broken across lines, by turning '\n' into
33965Sjdp		 '\\' and 'n'.  */
33965Sjdp	    case '\n':
33965Sjdp	      UNGET ('n');
33965Sjdp	      add_newlines++;
33965Sjdp	      PUT ('\\');
33965Sjdp	      continue;
33965Sjdp
33965Sjdp	    case '"':
33965Sjdp	    case '\\':
33965Sjdp	    case 'b':
33965Sjdp	    case 'f':
33965Sjdp	    case 'n':
33965Sjdp	    case 'r':
33965Sjdp	    case 't':
33965Sjdp	    case 'v':
33965Sjdp	    case 'x':
33965Sjdp	    case 'X':
33965Sjdp	    case '0':
33965Sjdp	    case '1':
33965Sjdp	    case '2':
33965Sjdp	    case '3':
33965Sjdp	    case '4':
33965Sjdp	    case '5':
33965Sjdp	    case '6':
33965Sjdp	    case '7':
33965Sjdp	      break;
33965Sjdp#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
33965Sjdp	    default:
33965Sjdp	      as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
33965Sjdp	      break;
33965Sjdp#else  /* ONLY_STANDARD_ESCAPES */
33965Sjdp	    default:
33965Sjdp	      /* Accept \x as x for any x */
33965Sjdp	      break;
33965Sjdp#endif /* ONLY_STANDARD_ESCAPES */
33965Sjdp
33965Sjdp	    case EOF:
33965Sjdp	      as_warn ("End of file in string: '\"' inserted");
33965Sjdp	      PUT ('"');
33965Sjdp	      continue;
33965Sjdp	    }
33965Sjdp	  PUT (ch);
33965Sjdp	  continue;
33965Sjdp
33965Sjdp	case 7:
33965Sjdp	  ch = GET ();
33965Sjdp	  state = 5;
33965Sjdp	  old_state = 8;
33965Sjdp	  if (ch == EOF)
33965Sjdp	    goto fromeof;
33965Sjdp	  PUT (ch);
33965Sjdp	  continue;
33965Sjdp
33965Sjdp	case 8:
33965Sjdp	  do
33965Sjdp	    ch = GET ();
33965Sjdp	  while (ch != '\n' && ch != EOF);
33965Sjdp	  if (ch == EOF)
33965Sjdp	    goto fromeof;
33965Sjdp	  state = 0;
33965Sjdp	  PUT (ch);
33965Sjdp	  continue;
33965Sjdp	}
33965Sjdp
33965Sjdp      /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
33965Sjdp
33965Sjdp      /* flushchar: */
33965Sjdp      ch = GET ();
33965Sjdp
33965Sjdp    recycle:
33965Sjdp
33965Sjdp#ifdef TC_M68K
33965Sjdp      /* We want to have pseudo-ops which control whether we are in
33965Sjdp         MRI mode or not.  Unfortunately, since m68k MRI mode affects
33965Sjdp         the scrubber, that means that we need a special purpose
33965Sjdp         recognizer here.  */
33965Sjdp      if (mri_state == NULL)
33965Sjdp	{
33965Sjdp	  if ((state == 0 || state == 1)
33965Sjdp	      && ch == mri_pseudo[0])
33965Sjdp	    mri_state = mri_pseudo + 1;
33965Sjdp	}
33965Sjdp      else
33965Sjdp	{
33965Sjdp	  /* We advance to the next state if we find the right
33965Sjdp	     character, or if we need a space character and we get any
33965Sjdp	     whitespace character, or if we need a '0' and we get a
33965Sjdp	     '1' (this is so that we only need one state to handle
33965Sjdp	     ``.mri 0'' and ``.mri 1'').  */
33965Sjdp	  if (ch != '\0'
33965Sjdp	      && (*mri_state == ch
33965Sjdp		  || (*mri_state == ' '
33965Sjdp		      && lex[ch] == LEX_IS_WHITESPACE)
33965Sjdp		  || (*mri_state == '0'
33965Sjdp		      && ch == '1')))
33965Sjdp	    {
33965Sjdp	      mri_last_ch = ch;
33965Sjdp	      ++mri_state;
33965Sjdp	    }
33965Sjdp	  else if (*mri_state != '\0'
33965Sjdp		   || (lex[ch] != LEX_IS_WHITESPACE
33965Sjdp		       && lex[ch] != LEX_IS_NEWLINE))
33965Sjdp	    {
33965Sjdp	      /* We did not get the expected character, or we didn't
33965Sjdp		 get a valid terminating character after seeing the
33965Sjdp		 entire pseudo-op, so we must go back to the
33965Sjdp		 beginning.  */
33965Sjdp	      mri_state = NULL;
33965Sjdp	    }
33965Sjdp	  else
33965Sjdp	    {
33965Sjdp	      /* We've read the entire pseudo-op.  mips_last_ch is
33965Sjdp                 either '0' or '1' indicating whether to enter or
33965Sjdp                 leave MRI mode.  */
33965Sjdp	      do_scrub_begin (mri_last_ch == '1');
33965Sjdp
33965Sjdp	      /* We continue handling the character as usual.  The
33965Sjdp                 main gas reader must also handle the .mri pseudo-op
33965Sjdp                 to control expression parsing and the like.  */
33965Sjdp	    }
33965Sjdp	}
33965Sjdp#endif
33965Sjdp
33965Sjdp      if (ch == EOF)
33965Sjdp	{
33965Sjdp	  if (state != 0)
33965Sjdp	    {
33965Sjdp	      as_warn ("end of file not at end of a line; newline inserted");
33965Sjdp	      state = 0;
33965Sjdp	      PUT ('\n');
33965Sjdp	    }
33965Sjdp	  goto fromeof;
33965Sjdp	}
33965Sjdp
33965Sjdp      switch (lex[ch])
33965Sjdp	{
33965Sjdp	case LEX_IS_WHITESPACE:
33965Sjdp	  do
33965Sjdp	    {
33965Sjdp	      ch = GET ();
33965Sjdp	    }
33965Sjdp	  while (ch != EOF && IS_WHITESPACE (ch));
33965Sjdp	  if (ch == EOF)
33965Sjdp	    goto fromeof;
33965Sjdp
33965Sjdp	  if (state == 0)
33965Sjdp	    {
33965Sjdp	      /* Preserve a single whitespace character at the
33965Sjdp		 beginning of a line.  */
33965Sjdp	      state = 1;
33965Sjdp	      UNGET (ch);
33965Sjdp	      PUT (' ');
33965Sjdp	      break;
33965Sjdp	    }
33965Sjdp
33965Sjdp	  if (IS_COMMENT (ch)
33965Sjdp	      || ch == '/'
33965Sjdp	      || IS_LINE_SEPARATOR (ch))
33965Sjdp	    {
33965Sjdp	      if (scrub_m68k_mri)
33965Sjdp		{
33965Sjdp		  /* In MRI mode, we keep these spaces.  */
33965Sjdp		  UNGET (ch);
33965Sjdp		  PUT (' ');
33965Sjdp		  break;
33965Sjdp		}
33965Sjdp	      goto recycle;
33965Sjdp	    }
33965Sjdp
33965Sjdp	  /* If we're in state 2 or 11, we've seen a non-white
33965Sjdp	     character followed by whitespace.  If the next character
33965Sjdp	     is ':', this is whitespace after a label name which we
33965Sjdp	     normally must ignore.  In MRI mode, though, spaces are
33965Sjdp	     not permitted between the label and the colon.  */
33965Sjdp	  if ((state == 2 || state == 11)
33965Sjdp	      && lex[ch] == LEX_IS_COLON
33965Sjdp	      && ! scrub_m68k_mri)
33965Sjdp	    {
33965Sjdp	      state = 1;
33965Sjdp	      PUT (ch);
33965Sjdp	      break;
33965Sjdp	    }
33965Sjdp
33965Sjdp	  switch (state)
33965Sjdp	    {
33965Sjdp	    case 0:
33965Sjdp	      state++;
33965Sjdp	      goto recycle;	/* Punted leading sp */
33965Sjdp	    case 1:
33965Sjdp	      /* We can arrive here if we leave a leading whitespace
33965Sjdp		 character at the beginning of a line.  */
33965Sjdp	      goto recycle;
33965Sjdp	    case 2:
33965Sjdp	      state = 3;
33965Sjdp	      if (to + 1 < toend)
33965Sjdp		{
33965Sjdp		  /* Optimize common case by skipping UNGET/GET.  */
33965Sjdp		  PUT (' ');	/* Sp after opco */
33965Sjdp		  goto recycle;
33965Sjdp		}
33965Sjdp	      UNGET (ch);
33965Sjdp	      PUT (' ');
33965Sjdp	      break;
33965Sjdp	    case 3:
33965Sjdp	      if (scrub_m68k_mri)
33965Sjdp		{
33965Sjdp		  /* In MRI mode, we keep these spaces.  */
33965Sjdp		  UNGET (ch);
33965Sjdp		  PUT (' ');
33965Sjdp		  break;
33965Sjdp		}
33965Sjdp	      goto recycle;	/* Sp in operands */
33965Sjdp	    case 9:
33965Sjdp	    case 10:
33965Sjdp	      if (scrub_m68k_mri)
33965Sjdp		{
33965Sjdp		  /* In MRI mode, we keep these spaces.  */
33965Sjdp		  state = 3;
33965Sjdp		  UNGET (ch);
33965Sjdp		  PUT (' ');
33965Sjdp		  break;
33965Sjdp		}
33965Sjdp	      state = 10;	/* Sp after symbol char */
33965Sjdp	      goto recycle;
33965Sjdp	    case 11:
33965Sjdp	      if (flag_m68k_mri
33965Sjdp#ifdef LABELS_WITHOUT_COLONS
33965Sjdp		  || 1
33965Sjdp#endif
33965Sjdp		  )
33965Sjdp		state = 1;
33965Sjdp	      else
33965Sjdp		{
33965Sjdp		  /* We know that ch is not ':', since we tested that
33965Sjdp                     case above.  Therefore this is not a label, so it
33965Sjdp                     must be the opcode, and we've just seen the
33965Sjdp                     whitespace after it.  */
33965Sjdp		  state = 3;
33965Sjdp		}
33965Sjdp	      UNGET (ch);
33965Sjdp	      PUT (' ');	/* Sp after label definition.  */
33965Sjdp	      break;
33965Sjdp	    default:
33965Sjdp	      BAD_CASE (state);
33965Sjdp	    }
33965Sjdp	  break;
33965Sjdp
33965Sjdp	case LEX_IS_TWOCHAR_COMMENT_1ST:
33965Sjdp	  ch2 = GET ();
33965Sjdp	  if (ch2 == '*')
33965Sjdp	    {
33965Sjdp	      for (;;)
33965Sjdp		{
33965Sjdp		  do
33965Sjdp		    {
33965Sjdp		      ch2 = GET ();
33965Sjdp		      if (ch2 != EOF && IS_NEWLINE (ch2))
33965Sjdp			add_newlines++;
33965Sjdp		    }
33965Sjdp		  while (ch2 != EOF && ch2 != '*');
33965Sjdp
33965Sjdp		  while (ch2 == '*')
33965Sjdp		    ch2 = GET ();
33965Sjdp
33965Sjdp		  if (ch2 == EOF || ch2 == '/')
33965Sjdp		    break;
33965Sjdp
33965Sjdp		  /* This UNGET will ensure that we count newlines
33965Sjdp                     correctly.  */
33965Sjdp		  UNGET (ch2);
33965Sjdp		}
33965Sjdp
33965Sjdp	      if (ch2 == EOF)
33965Sjdp		as_warn ("end of file in multiline comment");
33965Sjdp
33965Sjdp	      ch = ' ';
33965Sjdp	      goto recycle;
33965Sjdp	    }
33965Sjdp	  else
33965Sjdp	    {
33965Sjdp	      if (ch2 != EOF)
33965Sjdp		UNGET (ch2);
33965Sjdp	      if (state == 9 || state == 10)
33965Sjdp		state = 3;
33965Sjdp	      PUT (ch);
33965Sjdp	    }
33965Sjdp	  break;
33965Sjdp
33965Sjdp	case LEX_IS_STRINGQUOTE:
33965Sjdp	  if (state == 10)
33965Sjdp	    {
33965Sjdp	      /* Preserve the whitespace in foo "bar" */
33965Sjdp	      UNGET (ch);
33965Sjdp	      state = 3;
33965Sjdp	      PUT (' ');
33965Sjdp
33965Sjdp	      /* PUT didn't jump out.  We could just break, but we
33965Sjdp                 know what will happen, so optimize a bit.  */
33965Sjdp	      ch = GET ();
33965Sjdp	      old_state = 3;
33965Sjdp	    }
33965Sjdp	  else if (state == 9)
33965Sjdp	    old_state = 3;
33965Sjdp	  else
33965Sjdp	    old_state = state;
33965Sjdp	  state = 5;
33965Sjdp	  PUT (ch);
33965Sjdp	  break;
33965Sjdp
33965Sjdp#ifndef IEEE_STYLE
33965Sjdp	case LEX_IS_ONECHAR_QUOTE:
33965Sjdp	  if (state == 10)
33965Sjdp	    {
33965Sjdp	      /* Preserve the whitespace in foo 'b' */
33965Sjdp	      UNGET (ch);
33965Sjdp	      state = 3;
33965Sjdp	      PUT (' ');
33965Sjdp	      break;
33965Sjdp	    }
33965Sjdp	  ch = GET ();
33965Sjdp	  if (ch == EOF)
33965Sjdp	    {
33965Sjdp	      as_warn ("end of file after a one-character quote; \\0 inserted");
33965Sjdp	      ch = 0;
33965Sjdp	    }
33965Sjdp	  if (ch == '\\')
33965Sjdp	    {
33965Sjdp	      ch = GET ();
33965Sjdp	      if (ch == EOF)
33965Sjdp		{
33965Sjdp		  as_warn ("end of file in escape character");
33965Sjdp		  ch = '\\';
33965Sjdp		}
33965Sjdp	      else
33965Sjdp		ch = process_escape (ch);
33965Sjdp	    }
33965Sjdp	  sprintf (out_buf, "%d", (int) (unsigned char) ch);
33965Sjdp
33965Sjdp	  /* None of these 'x constants for us.  We want 'x'.  */
33965Sjdp	  if ((ch = GET ()) != '\'')
33965Sjdp	    {
33965Sjdp#ifdef REQUIRE_CHAR_CLOSE_QUOTE
33965Sjdp	      as_warn ("Missing close quote: (assumed)");
33965Sjdp#else
33965Sjdp	      if (ch != EOF)
33965Sjdp		UNGET (ch);
33965Sjdp#endif
33965Sjdp	    }
33965Sjdp	  if (strlen (out_buf) == 1)
33965Sjdp	    {
33965Sjdp	      PUT (out_buf[0]);
33965Sjdp	      break;
33965Sjdp	    }
33965Sjdp	  if (state == 9)
33965Sjdp	    old_state = 3;
33965Sjdp	  else
33965Sjdp	    old_state = state;
33965Sjdp	  state = -1;
33965Sjdp	  out_string = out_buf;
33965Sjdp	  PUT (*out_string++);
33965Sjdp	  break;
33965Sjdp#endif
33965Sjdp
33965Sjdp	case LEX_IS_COLON:
33965Sjdp	  if (state == 9 || state == 10)
33965Sjdp	    state = 3;
33965Sjdp	  else if (state != 3)
33965Sjdp	    state = 1;
33965Sjdp	  PUT (ch);
33965Sjdp	  break;
33965Sjdp
33965Sjdp	case LEX_IS_NEWLINE:
33965Sjdp	  /* Roll out a bunch of newlines from inside comments, etc.  */
33965Sjdp	  if (add_newlines)
33965Sjdp	    {
33965Sjdp	      --add_newlines;
33965Sjdp	      UNGET (ch);
33965Sjdp	    }
33965Sjdp	  /* fall thru into... */
33965Sjdp
33965Sjdp	case LEX_IS_LINE_SEPARATOR:
33965Sjdp	  state = 0;
33965Sjdp	  PUT (ch);
33965Sjdp	  break;
33965Sjdp
33965Sjdp	case LEX_IS_LINE_COMMENT_START:
33965Sjdp	  /* FIXME-someday: The two character comment stuff was badly
33965Sjdp	     thought out.  On i386, we want '/' as line comment start
33965Sjdp	     AND we want C style comments.  hence this hack.  The
33965Sjdp	     whole lexical process should be reworked.  xoxorich.  */
33965Sjdp	  if (ch == '/')
33965Sjdp	    {
33965Sjdp	      ch2 = GET ();
33965Sjdp	      if (ch2 == '*')
33965Sjdp		{
33965Sjdp		  old_state = 3;
33965Sjdp		  state = -2;
33965Sjdp		  break;
33965Sjdp		}
33965Sjdp	      else
33965Sjdp		{
33965Sjdp		  UNGET (ch2);
33965Sjdp		}
33965Sjdp	    } /* bad hack */
33965Sjdp
33965Sjdp	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
33965Sjdp	    {
33965Sjdp	      int startch;
33965Sjdp
33965Sjdp	      startch = ch;
33965Sjdp
33965Sjdp	      do
33965Sjdp		{
33965Sjdp		  ch = GET ();
33965Sjdp		}
33965Sjdp	      while (ch != EOF && IS_WHITESPACE (ch));
33965Sjdp	      if (ch == EOF)
33965Sjdp		{
33965Sjdp		  as_warn ("end of file in comment; newline inserted");
33965Sjdp		  PUT ('\n');
33965Sjdp		  break;
33965Sjdp		}
33965Sjdp	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
33965Sjdp		{
33965Sjdp		  /* Not a cpp line.  */
33965Sjdp		  while (ch != EOF && !IS_NEWLINE (ch))
33965Sjdp		    ch = GET ();
33965Sjdp		  if (ch == EOF)
33965Sjdp		    as_warn ("EOF in Comment: Newline inserted");
33965Sjdp		  state = 0;
33965Sjdp		  PUT ('\n');
33965Sjdp		  break;
33965Sjdp		}
33965Sjdp	      /* Loks like `# 123 "filename"' from cpp.  */
33965Sjdp	      UNGET (ch);
33965Sjdp	      old_state = 4;
33965Sjdp	      state = -1;
33965Sjdp	      if (scrub_m68k_mri)
33965Sjdp		out_string = "\tappline ";
33965Sjdp	      else
33965Sjdp		out_string = "\t.appline ";
33965Sjdp	      PUT (*out_string++);
33965Sjdp	      break;
33965Sjdp	    }
33965Sjdp
33965Sjdp	  /* We have a line comment character which is not at the
33965Sjdp	     start of a line.  If this is also a normal comment
33965Sjdp	     character, fall through.  Otherwise treat it as a default
33965Sjdp	     character.  */
33965Sjdp	  if (strchr (tc_comment_chars, ch) == NULL
33965Sjdp	      && (! scrub_m68k_mri
33965Sjdp		  || (ch != '!' && ch != '*')))
33965Sjdp	    goto de_fault;
33965Sjdp	  if (scrub_m68k_mri
33965Sjdp	      && (ch == '!' || ch == '*' || ch == '#')
33965Sjdp	      && state != 1
33965Sjdp	      && state != 10)
33965Sjdp	    goto de_fault;
33965Sjdp	  /* Fall through.  */
33965Sjdp	case LEX_IS_COMMENT_START:
33965Sjdp	  do
33965Sjdp	    {
33965Sjdp	      ch = GET ();
33965Sjdp	    }
33965Sjdp	  while (ch != EOF && !IS_NEWLINE (ch));
33965Sjdp	  if (ch == EOF)
33965Sjdp	    as_warn ("end of file in comment; newline inserted");
33965Sjdp	  state = 0;
33965Sjdp	  PUT ('\n');
33965Sjdp	  break;
33965Sjdp
33965Sjdp	case LEX_IS_SYMBOL_COMPONENT:
33965Sjdp	  if (state == 10)
33965Sjdp	    {
33965Sjdp	      /* This is a symbol character following another symbol
33965Sjdp		 character, with whitespace in between.  We skipped
33965Sjdp		 the whitespace earlier, so output it now.  */
33965Sjdp	      UNGET (ch);
33965Sjdp	      state = 3;
33965Sjdp	      PUT (' ');
33965Sjdp	      break;
33965Sjdp	    }
33965Sjdp
33965Sjdp	  if (state == 3)
33965Sjdp	    state = 9;
33965Sjdp
33965Sjdp	  /* This is a common case.  Quickly copy CH and all the
33965Sjdp             following symbol component or normal characters.  */
33965Sjdp	  if (to + 1 < toend && mri_state == NULL)
33965Sjdp	    {
33965Sjdp	      char *s;
33965Sjdp	      int len;
33965Sjdp
33965Sjdp	      for (s = from; s < fromend; s++)
33965Sjdp		{
33965Sjdp		  int type;
33965Sjdp
33965Sjdp		  ch2 = *s;
33965Sjdp		  type = lex[ch2];
33965Sjdp		  if (type != 0
33965Sjdp		      && type != LEX_IS_SYMBOL_COMPONENT)
33965Sjdp		    break;
33965Sjdp		}
33965Sjdp	      if (s > from)
33965Sjdp		{
33965Sjdp		  /* Handle the last character normally, for
33965Sjdp                     simplicity.  */
33965Sjdp		  --s;
33965Sjdp		}
33965Sjdp	      len = s - from;
33965Sjdp	      if (len > (toend - to) - 1)
33965Sjdp		len = (toend - to) - 1;
33965Sjdp	      if (len > 0)
33965Sjdp		{
33965Sjdp		  PUT (ch);
33965Sjdp		  if (len > 8)
33965Sjdp		    {
33965Sjdp		      memcpy (to, from, len);
33965Sjdp		      to += len;
33965Sjdp		      from += len;
33965Sjdp		    }
33965Sjdp		  else
33965Sjdp		    {
33965Sjdp		      switch (len)
33965Sjdp			{
33965Sjdp			case 8: *to++ = *from++;
33965Sjdp			case 7: *to++ = *from++;
33965Sjdp			case 6: *to++ = *from++;
33965Sjdp			case 5: *to++ = *from++;
33965Sjdp			case 4: *to++ = *from++;
33965Sjdp			case 3: *to++ = *from++;
33965Sjdp			case 2: *to++ = *from++;
33965Sjdp			case 1: *to++ = *from++;
33965Sjdp			}
33965Sjdp		    }
33965Sjdp		  ch = GET ();
33965Sjdp		}
33965Sjdp	    }
33965Sjdp
33965Sjdp	  /* Fall through.  */
33965Sjdp	default:
33965Sjdp	de_fault:
33965Sjdp	  /* Some relatively `normal' character.  */
33965Sjdp	  if (state == 0)
33965Sjdp	    {
33965Sjdp	      state = 11;	/* Now seeing label definition */
33965Sjdp	    }
33965Sjdp	  else if (state == 1)
33965Sjdp	    {
33965Sjdp	      state = 2;	/* Ditto */
33965Sjdp	    }
33965Sjdp	  else if (state == 9)
33965Sjdp	    {
33965Sjdp	      if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
33965Sjdp		state = 3;
33965Sjdp	    }
33965Sjdp	  else if (state == 10)
33965Sjdp	    {
33965Sjdp	      state = 3;
33965Sjdp	    }
33965Sjdp	  PUT (ch);
33965Sjdp	  break;
33965Sjdp	}
33965Sjdp    }
33965Sjdp
33965Sjdp  /*NOTREACHED*/
33965Sjdp
33965Sjdp fromeof:
33965Sjdp  /* We have reached the end of the input.  */
33965Sjdp  return to - tostart;
33965Sjdp
33965Sjdp tofull:
33965Sjdp  /* The output buffer is full.  Save any input we have not yet
33965Sjdp     processed.  */
33965Sjdp  if (fromend > from)
33965Sjdp    {
33965Sjdp      char *save;
33965Sjdp
33965Sjdp      save = (char *) xmalloc (fromend - from);
33965Sjdp      memcpy (save, from, fromend - from);
33965Sjdp      if (saved_input != NULL)
33965Sjdp	free (saved_input);
33965Sjdp      saved_input = save;
33965Sjdp      saved_input_len = fromend - from;
33965Sjdp    }
33965Sjdp  else
33965Sjdp    {
33965Sjdp      if (saved_input != NULL)
33965Sjdp	{
33965Sjdp	  free (saved_input);
33965Sjdp	  saved_input = NULL;
33965Sjdp	}
33965Sjdp    }
33965Sjdp  return to - tostart;
33965Sjdp}
33965Sjdp
33965Sjdp/* end of app.c */