binutils/gas/app.c

187938Semax/* This is the Assembler Pre-Processor
187938Semax   Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
187938Semax   1999, 2000
187938Semax   Free Software Foundation, Inc.
187938Semax
187938Semax   This file is part of GAS, the GNU Assembler.
187938Semax
187938Semax   GAS is free software; you can redistribute it and/or modify
187938Semax   it under the terms of the GNU General Public License as published by
187938Semax   the Free Software Foundation; either version 2, or (at your option)
187938Semax   any later version.
187938Semax
187938Semax   GAS is distributed in the hope that it will be useful,
187938Semax   but WITHOUT ANY WARRANTY; without even the implied warranty of
187938Semax   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
187938Semax   GNU General Public License for more details.
187938Semax
187938Semax   You should have received a copy of the GNU General Public License
187938Semax   along with GAS; see the file COPYING.  If not, write to the Free
187938Semax   Software Foundation, 59 Temple Place - Suite 330, Boston, MA
187938Semax   02111-1307, USA.  */
187938Semax
187938Semax/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
187938Semax/* App, the assembler pre-processor.  This pre-processor strips out excess
187938Semax   spaces, turns single-quoted characters into a decimal constant, and turns
187938Semax   # <number> <filename> <garbage> into a .line <number>\n.file <filename>
187938Semax   pair.  This needs better error-handling.  */
187938Semax
187938Semax#include <stdio.h>
187938Semax#include "as.h"			/* For BAD_CASE() only */
187938Semax
187938Semax#if (__STDC__ != 1)
187938Semax#ifndef const
187938Semax#define const  /* empty */
187938Semax#endif
187938Semax#endif
187938Semax
187938Semax#ifdef TC_M68K
187938Semax/* Whether we are scrubbing in m68k MRI mode.  This is different from
187938Semax   flag_m68k_mri, because the two flags will be affected by the .mri
187938Semax   pseudo-op at different times.  */
187938Semaxstatic int scrub_m68k_mri;
187938Semax#else
187938Semax#define scrub_m68k_mri 0
187938Semax#endif
187938Semax
187938Semax/* The pseudo-op which switches in and out of MRI mode.  See the
187938Semax   comment in do_scrub_chars.  */
187938Semaxstatic const char mri_pseudo[] = ".mri 0";
187938Semax
187938Semax#if defined TC_ARM && defined OBJ_ELF
187938Semax/* The pseudo-op for which we need to special-case `@' characters.
187938Semax   See the comment in do_scrub_chars.  */
187938Semaxstatic const char   symver_pseudo[] = ".symver";
187938Semaxstatic const char * symver_state;
187938Semax#endif
187938Semax
187938Semaxstatic char lex[256];
187938Semaxstatic const char symbol_chars[] =
187938Semax"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
187938Semax
187938Semax#define LEX_IS_SYMBOL_COMPONENT		1
187938Semax#define LEX_IS_WHITESPACE		2
187938Semax#define LEX_IS_LINE_SEPARATOR		3
187938Semax#define LEX_IS_COMMENT_START		4
187938Semax#define LEX_IS_LINE_COMMENT_START	5
187938Semax#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
187938Semax#define	LEX_IS_STRINGQUOTE		8
187938Semax#define	LEX_IS_COLON			9
187938Semax#define	LEX_IS_NEWLINE			10
187938Semax#define	LEX_IS_ONECHAR_QUOTE		11
187938Semax#ifdef TC_V850
187938Semax#define LEX_IS_DOUBLEDASH_1ST		12
187938Semax#endif
187938Semax#ifdef TC_M32R
187938Semax#define DOUBLEBAR_PARALLEL
187938Semax#endif
187938Semax#ifdef DOUBLEBAR_PARALLEL
187938Semax#define LEX_IS_DOUBLEBAR_1ST		13
187938Semax#endif
187938Semax#define LEX_IS_PARALLEL_SEPARATOR	14
187938Semax#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
187938Semax#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
187938Semax#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
187938Semax#define IS_PARALLEL_SEPARATOR(c)	(lex[c] == LEX_IS_PARALLEL_SEPARATOR)
187938Semax#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
187938Semax#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
187938Semax#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
187938Semax
187938Semaxstatic int process_escape PARAMS ((int));
187938Semax
187938Semax/* FIXME-soon: The entire lexer/parser thingy should be
187938Semax   built statically at compile time rather than dynamically
187938Semax   each and every time the assembler is run.  xoxorich.  */
187938Semax
187938Semaxvoid
187938Semaxdo_scrub_begin (m68k_mri)
187938Semax     int m68k_mri ATTRIBUTE_UNUSED;
187938Semax{
187938Semax  const char *p;
187938Semax  int c;
187938Semax
187938Semax  lex[' '] = LEX_IS_WHITESPACE;
187938Semax  lex['\t'] = LEX_IS_WHITESPACE;
187938Semax  lex['\r'] = LEX_IS_WHITESPACE;
187938Semax  lex['\n'] = LEX_IS_NEWLINE;
187938Semax  lex[':'] = LEX_IS_COLON;
187938Semax
187938Semax#ifdef TC_M68K
187938Semax  scrub_m68k_mri = m68k_mri;
187938Semax
187938Semax  if (! m68k_mri)
187938Semax#endif
187938Semax    {
187938Semax      lex['"'] = LEX_IS_STRINGQUOTE;
187938Semax
187938Semax#if ! defined (TC_HPPA) && ! defined (TC_I370)
187938Semax      /* I370 uses single-quotes to delimit integer, float constants */
187938Semax      lex['\''] = LEX_IS_ONECHAR_QUOTE;
187938Semax#endif
187938Semax
187938Semax#ifdef SINGLE_QUOTE_STRINGS
187938Semax      lex['\''] = LEX_IS_STRINGQUOTE;
187938Semax#endif
187938Semax    }
187938Semax
187938Semax  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
187938Semax     in state 5 of do_scrub_chars must be changed.  */
187938Semax
187938Semax  /* Note that these override the previous defaults, e.g. if ';' is a
187938Semax     comment char, then it isn't a line separator.  */
187938Semax  for (p = symbol_chars; *p; ++p)
187938Semax    {
187938Semax      lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
187938Semax    }				/* declare symbol characters */
187938Semax
187938Semax  for (c = 128; c < 256; ++c)
187938Semax    lex[c] = LEX_IS_SYMBOL_COMPONENT;
187938Semax
187938Semax#ifdef tc_symbol_chars
187938Semax  /* This macro permits the processor to specify all characters which
187938Semax     may appears in an operand.  This will prevent the scrubber from
187938Semax     discarding meaningful whitespace in certain cases.  The i386
187938Semax     backend uses this to support prefixes, which can confuse the
187938Semax     scrubber as to whether it is parsing operands or opcodes.  */
187938Semax  for (p = tc_symbol_chars; *p; ++p)
187938Semax    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
187938Semax#endif
187938Semax
187938Semax  /* The m68k backend wants to be able to change comment_chars.  */
187938Semax#ifndef tc_comment_chars
187938Semax#define tc_comment_chars comment_chars
187938Semax#endif
187938Semax  for (p = tc_comment_chars; *p; p++)
187938Semax    {
187938Semax      lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
187938Semax    }				/* declare comment chars */
187938Semax
187938Semax  for (p = line_comment_chars; *p; p++)
187938Semax    {
187938Semax      lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
187938Semax    }				/* declare line comment chars */
187938Semax
187938Semax  for (p = line_separator_chars; *p; p++)
187938Semax    {
187938Semax      lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
187938Semax    }				/* declare line separators */
187938Semax
187938Semax#ifdef tc_parallel_separator_chars
187938Semax  /* This macro permits the processor to specify all characters which
187938Semax     separate parallel insns on the same line.  */
187938Semax  for (p = tc_parallel_separator_chars; *p; p++)
187938Semax    {
187938Semax      lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
187938Semax    }				/* declare parallel separators */
187938Semax#endif
187938Semax
187938Semax  /* Only allow slash-star comments if slash is not in use.
187938Semax     FIXME: This isn't right.  We should always permit them.  */
187938Semax  if (lex['/'] == 0)
187938Semax    {
187938Semax      lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
187938Semax    }
187938Semax
187938Semax#ifdef TC_M68K
187938Semax  if (m68k_mri)
187938Semax    {
187938Semax      lex['\''] = LEX_IS_STRINGQUOTE;
187938Semax      lex[';'] = LEX_IS_COMMENT_START;
187938Semax      lex['*'] = LEX_IS_LINE_COMMENT_START;
187938Semax      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
187938Semax         then it can't be used in an expression.  */
187938Semax      lex['!'] = LEX_IS_LINE_COMMENT_START;
187938Semax    }
187938Semax#endif
187938Semax
187938Semax#ifdef TC_V850
187938Semax  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
187938Semax#endif
187938Semax#ifdef DOUBLEBAR_PARALLEL
187938Semax  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
187938Semax#endif
187938Semax#ifdef TC_D30V
187938Semax  /* must do this is we want VLIW instruction with "->" or "<-" */
187938Semax  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
187938Semax#endif
187938Semax}				/* do_scrub_begin() */
187938Semax
187938Semax/* Saved state of the scrubber */
187938Semaxstatic int state;
187938Semaxstatic int old_state;
187938Semaxstatic char *out_string;
187938Semaxstatic char out_buf[20];
187938Semaxstatic int add_newlines;
187938Semaxstatic char *saved_input;
187938Semaxstatic int saved_input_len;
187938Semaxstatic char input_buffer[32 * 1024];
187938Semaxstatic const char *mri_state;
187938Semaxstatic char mri_last_ch;
187938Semax
187938Semax/* Data structure for saving the state of app across #include's.  Note that
187938Semax   app is called asynchronously to the parsing of the .include's, so our
187938Semax   state at the time .include is interpreted is completely unrelated.
187938Semax   That's why we have to save it all.  */
187938Semax
187938Semaxstruct app_save {
187938Semax  int          state;
187938Semax  int          old_state;
187938Semax  char *       out_string;
187938Semax  char         out_buf[sizeof (out_buf)];
187938Semax  int          add_newlines;
187938Semax  char *       saved_input;
187938Semax  int          saved_input_len;
187938Semax#ifdef TC_M68K
187938Semax  int          scrub_m68k_mri;
187938Semax#endif
187938Semax  const char * mri_state;
187938Semax  char         mri_last_ch;
187938Semax#if defined TC_ARM && defined OBJ_ELF
187938Semax  const char * symver_state;
187938Semax#endif
187938Semax};
187938Semax
187938Semaxchar *
187938Semaxapp_push ()
187938Semax{
187938Semax  register struct app_save *saved;
187938Semax
187938Semax  saved = (struct app_save *) xmalloc (sizeof (*saved));
187938Semax  saved->state = state;
187938Semax  saved->old_state = old_state;
187938Semax  saved->out_string = out_string;
187938Semax  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
187938Semax  saved->add_newlines = add_newlines;
187938Semax  if (saved_input == NULL)
187938Semax    saved->saved_input = NULL;
187938Semax  else
187938Semax    {
187938Semax      saved->saved_input = xmalloc (saved_input_len);
187938Semax      memcpy (saved->saved_input, saved_input, saved_input_len);
187938Semax      saved->saved_input_len = saved_input_len;
187938Semax    }
187938Semax#ifdef TC_M68K
187938Semax  saved->scrub_m68k_mri = scrub_m68k_mri;
187938Semax#endif
187938Semax  saved->mri_state = mri_state;
187938Semax  saved->mri_last_ch = mri_last_ch;
187938Semax#if defined TC_ARM && defined OBJ_ELF
187938Semax  saved->symver_state = symver_state;
187938Semax#endif
187938Semax
187938Semax  /* do_scrub_begin() is not useful, just wastes time.  */
187938Semax
187938Semax  state = 0;
187938Semax  saved_input = NULL;
187938Semax
187938Semax  return (char *) saved;
187938Semax}
187938Semax
187938Semaxvoid
187938Semaxapp_pop (arg)
187938Semax     char *arg;
187938Semax{
187938Semax  register struct app_save *saved = (struct app_save *) arg;
187938Semax
187938Semax  /* There is no do_scrub_end ().  */
187938Semax  state = saved->state;
187938Semax  old_state = saved->old_state;
187938Semax  out_string = saved->out_string;
187938Semax  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
187938Semax  add_newlines = saved->add_newlines;
187938Semax  if (saved->saved_input == NULL)
187938Semax    saved_input = NULL;
187938Semax  else
187938Semax    {
187938Semax      assert (saved->saved_input_len <= (int) (sizeof input_buffer));
187938Semax      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
187938Semax      saved_input = input_buffer;
187938Semax      saved_input_len = saved->saved_input_len;
187938Semax      free (saved->saved_input);
187938Semax    }
187938Semax#ifdef TC_M68K
187938Semax  scrub_m68k_mri = saved->scrub_m68k_mri;
187938Semax#endif
187938Semax  mri_state = saved->mri_state;
187938Semax  mri_last_ch = saved->mri_last_ch;
187938Semax#if defined TC_ARM && defined OBJ_ELF
187938Semax  symver_state = saved->symver_state;
187938Semax#endif
187938Semax
187938Semax  free (arg);
187938Semax}				/* app_pop() */
187938Semax
187938Semax/* @@ This assumes that \n &c are the same on host and target.  This is not
187938Semax   necessarily true.  */
187938Semaxstatic int
187938Semaxprocess_escape (ch)
187938Semax     int ch;
187938Semax{
187938Semax  switch (ch)
187938Semax    {
187938Semax    case 'b':
187938Semax      return '\b';
187938Semax    case 'f':
187938Semax      return '\f';
187938Semax    case 'n':
187938Semax      return '\n';
187938Semax    case 'r':
187938Semax      return '\r';
187938Semax    case 't':
187938Semax      return '\t';
187938Semax    case '\'':
187938Semax      return '\'';
187938Semax    case '"':
187938Semax      return '\"';
187938Semax    default:
187938Semax      return ch;
187938Semax    }
187938Semax}
187938Semax
187938Semax/* This function is called to process input characters.  The GET
187938Semax   parameter is used to retrieve more input characters.  GET should
187938Semax   set its parameter to point to a buffer, and return the length of
187938Semax   the buffer; it should return 0 at end of file.  The scrubbed output
187938Semax   characters are put into the buffer starting at TOSTART; the TOSTART
187938Semax   buffer is TOLEN bytes in length.  The function returns the number
187938Semax   of scrubbed characters put into TOSTART.  This will be TOLEN unless
187938Semax   end of file was seen.  This function is arranged as a state
187938Semax   machine, and saves its state so that it may return at any point.
187938Semax   This is the way the old code used to work.  */
187938Semax
187938Semaxint
187938Semaxdo_scrub_chars (get, tostart, tolen)
187938Semax     int (*get) PARAMS ((char *, int));
187938Semax     char *tostart;
187938Semax     int tolen;
187938Semax{
187938Semax  char *to = tostart;
187938Semax  char *toend = tostart + tolen;
187938Semax  char *from;
187938Semax  char *fromend;
187938Semax  int fromlen;
187938Semax  register int ch, ch2 = 0;
187938Semax
187938Semax  /*State 0: beginning of normal line
187938Semax	  1: After first whitespace on line (flush more white)
187938Semax	  2: After first non-white (opcode) on line (keep 1white)
187938Semax	  3: after second white on line (into operands) (flush white)
187938Semax	  4: after putting out a .line, put out digits
187938Semax	  5: parsing a string, then go to old-state
187938Semax	  6: putting out \ escape in a "d string.
187938Semax	  7: After putting out a .appfile, put out string.
187938Semax	  8: After putting out a .appfile string, flush until newline.
187938Semax	  9: After seeing symbol char in state 3 (keep 1white after symchar)
187938Semax	 10: After seeing whitespace in state 9 (keep white before symchar)
187938Semax	 11: After seeing a symbol character in state 0 (eg a label definition)
187938Semax	 -1: output string in out_string and go to the state in old_state
187938Semax	 -2: flush text until a '*' '/' is seen, then go to state old_state
187938Semax#ifdef TC_V850
187938Semax         12: After seeing a dash, looking for a second dash as a start of comment.
187938Semax#endif
187938Semax#ifdef DOUBLEBAR_PARALLEL
187938Semax	 13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
187938Semax#endif
187938Semax	  */
187938Semax
187938Semax  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
187938Semax     constructs like ``.loc 1 20''.  This was turning into ``.loc
187938Semax     120''.  States 9 and 10 ensure that a space is never dropped in
187938Semax     between characters which could appear in an identifier.  Ian
187938Semax     Taylor, ian@cygnus.com.
187938Semax
187938Semax     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
187938Semax     correctly on the PA (and any other target where colons are optional).
187938Semax     Jeff Law, law@cs.utah.edu.
187938Semax
187938Semax     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
187938Semax     get squashed into "cmp r1,r2||trap#1", with the all important space
187938Semax     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
187938Semax
187938Semax  /* This macro gets the next input character.  */
187938Semax
187938Semax#define GET()							\
187938Semax  (from < fromend						\
187938Semax   ? * (unsigned char *) (from++)				\
187938Semax   : (saved_input = NULL,					\
187938Semax      fromlen = (*get) (input_buffer, sizeof input_buffer),	\
187938Semax      from = input_buffer,					\
187938Semax      fromend = from + fromlen,					\
187938Semax      (fromlen == 0						\
187938Semax       ? EOF							\
187938Semax       : * (unsigned char *) (from++))))
187938Semax
187938Semax  /* This macro pushes a character back on the input stream.  */
187938Semax
187938Semax#define UNGET(uch) (*--from = (uch))
187938Semax
187938Semax  /* This macro puts a character into the output buffer.  If this
187938Semax     character fills the output buffer, this macro jumps to the label
187938Semax     TOFULL.  We use this rather ugly approach because we need to
187938Semax     handle two different termination conditions: EOF on the input
187938Semax     stream, and a full output buffer.  It would be simpler if we
187938Semax     always read in the entire input stream before processing it, but
187938Semax     I don't want to make such a significant change to the assembler's
187938Semax     memory usage.  */
187938Semax
187938Semax#define PUT(pch)			\
187938Semax  do					\
187938Semax    {					\
187938Semax      *to++ = (pch);			\
187938Semax      if (to >= toend)			\
187938Semax        goto tofull;			\
187938Semax    }					\
187938Semax  while (0)
187938Semax
187938Semax  if (saved_input != NULL)
187938Semax    {
187938Semax      from = saved_input;
187938Semax      fromend = from + saved_input_len;
187938Semax    }
187938Semax  else
187938Semax    {
187938Semax      fromlen = (*get) (input_buffer, sizeof input_buffer);
187938Semax      if (fromlen == 0)
187938Semax	return 0;
187938Semax      from = input_buffer;
187938Semax      fromend = from + fromlen;
187938Semax    }
187938Semax
187938Semax  while (1)
187938Semax    {
187938Semax      /* The cases in this switch end with continue, in order to
187938Semax         branch back to the top of this while loop and generate the
187938Semax         next output character in the appropriate state.  */
187938Semax      switch (state)
187938Semax	{
187938Semax	case -1:
187938Semax	  ch = *out_string++;
187938Semax	  if (*out_string == '\0')
187938Semax	    {
187938Semax	      state = old_state;
187938Semax	      old_state = 3;
187938Semax	    }
187938Semax	  PUT (ch);
187938Semax	  continue;
187938Semax
187938Semax	case -2:
187938Semax	  for (;;)
187938Semax	    {
187938Semax	      do
187938Semax		{
187938Semax		  ch = GET ();
187938Semax
187938Semax		  if (ch == EOF)
187938Semax		    {
187938Semax		      as_warn (_("end of file in comment"));
187938Semax		      goto fromeof;
187938Semax		    }
187938Semax
187938Semax		  if (ch == '\n')
187938Semax		    PUT ('\n');
187938Semax		}
187938Semax	      while (ch != '*');
187938Semax
187938Semax	      while ((ch = GET ()) == '*')
187938Semax		;
187938Semax
187938Semax	      if (ch == EOF)
187938Semax		{
187938Semax		  as_warn (_("end of file in comment"));
187938Semax		  goto fromeof;
187938Semax		}
187938Semax
187938Semax	      if (ch == '/')
187938Semax		break;
187938Semax
187938Semax	      UNGET (ch);
187938Semax	    }
187938Semax
187938Semax	  state = old_state;
187938Semax	  UNGET (' ');
187938Semax	  continue;
187938Semax
187938Semax	case 4:
187938Semax	  ch = GET ();
187938Semax	  if (ch == EOF)
187938Semax	    goto fromeof;
187938Semax	  else if (ch >= '0' && ch <= '9')
187938Semax	    PUT (ch);
187938Semax	  else
187938Semax	    {
187938Semax	      while (ch != EOF && IS_WHITESPACE (ch))
187938Semax		ch = GET ();
187938Semax	      if (ch == '"')
187938Semax		{
187938Semax		  UNGET (ch);
187938Semax		  if (scrub_m68k_mri)
187938Semax		    out_string = "\n\tappfile ";
187938Semax		  else
187938Semax		    out_string = "\n\t.appfile ";
187938Semax		  old_state = 7;
187938Semax		  state = -1;
187938Semax		  PUT (*out_string++);
187938Semax		}
187938Semax	      else
187938Semax		{
187938Semax		  while (ch != EOF && ch != '\n')
187938Semax		    ch = GET ();
187938Semax		  state = 0;
187938Semax		  PUT (ch);
187938Semax		}
187938Semax	    }
187938Semax	  continue;
187938Semax
187938Semax	case 5:
187938Semax	  /* We are going to copy everything up to a quote character,
187938Semax             with special handling for a backslash.  We try to
187938Semax             optimize the copying in the simple case without using the
187938Semax             GET and PUT macros.  */
187938Semax	  {
187938Semax	    char *s;
187938Semax	    int len;
187938Semax
187938Semax	    for (s = from; s < fromend; s++)
187938Semax	      {
187938Semax		ch = *s;
187938Semax		/* This condition must be changed if the type of any
187938Semax                   other character can be LEX_IS_STRINGQUOTE.  */
187938Semax		if (ch == '\\'
187938Semax		    || ch == '"'
187938Semax		    || ch == '\''
187938Semax		    || ch == '\n')
187938Semax		  break;
187938Semax	      }
187938Semax	    len = s - from;
187938Semax	    if (len > toend - to)
187938Semax	      len = toend - to;
187938Semax	    if (len > 0)
187938Semax	      {
187938Semax		memcpy (to, from, len);
187938Semax		to += len;
187938Semax		from += len;
187938Semax	      }
187938Semax	  }
187938Semax
187938Semax	  ch = GET ();
187938Semax	  if (ch == EOF)
187938Semax	    {
187938Semax	      as_warn (_("end of file in string; inserted '\"'"));
187938Semax	      state = old_state;
187938Semax	      UNGET ('\n');
187938Semax	      PUT ('"');
187938Semax	    }
187938Semax	  else if (lex[ch] == LEX_IS_STRINGQUOTE)
187938Semax	    {
305287Sdim	      state = old_state;
187938Semax	      PUT (ch);
187938Semax	    }
187938Semax#ifndef NO_STRING_ESCAPES
187938Semax	  else if (ch == '\\')
187938Semax	    {
187938Semax	      state = 6;
187938Semax	      PUT (ch);
187938Semax	    }
187938Semax#endif
187938Semax	  else if (scrub_m68k_mri && ch == '\n')
187938Semax	    {
187938Semax	      /* Just quietly terminate the string.  This permits lines like
187938Semax		   bne	label	loop if we haven't reach end yet
187938Semax		 */
187938Semax	      state = old_state;
305287Sdim	      UNGET (ch);
187938Semax	      PUT ('\'');
187938Semax	    }
187938Semax	  else
187938Semax	    {
187938Semax	      PUT (ch);
187938Semax	    }
187938Semax	  continue;
187938Semax
187938Semax	case 6:
187938Semax	  state = 5;
187938Semax	  ch = GET ();
187938Semax	  switch (ch)
187938Semax	    {
187938Semax	      /* Handle strings broken across lines, by turning '\n' into
187938Semax		 '\\' and 'n'.  */
187938Semax	    case '\n':
187938Semax	      UNGET ('n');
187938Semax	      add_newlines++;
187938Semax	      PUT ('\\');
187938Semax	      continue;
187938Semax
187938Semax	    case '"':
187938Semax	    case '\\':
187938Semax	    case 'b':
187938Semax	    case 'f':
187938Semax	    case 'n':
187938Semax	    case 'r':
187938Semax	    case 't':
187938Semax	    case 'v':
187938Semax	    case 'x':
187938Semax	    case 'X':
187938Semax	    case '0':
187938Semax	    case '1':
187938Semax	    case '2':
187938Semax	    case '3':
187938Semax	    case '4':
187938Semax	    case '5':
187938Semax	    case '6':
187938Semax	    case '7':
187938Semax	      break;
187938Semax#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
187938Semax	    default:
187938Semax	      as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
187938Semax	      break;
187938Semax#else  /* ONLY_STANDARD_ESCAPES */
187938Semax	    default:
187938Semax	      /* Accept \x as x for any x */
187938Semax	      break;
187938Semax#endif /* ONLY_STANDARD_ESCAPES */
187938Semax
187938Semax	    case EOF:
187938Semax	      as_warn (_("end of file in string; '\"' inserted"));
187938Semax	      PUT ('"');
187938Semax	      continue;
187938Semax	    }
187938Semax	  PUT (ch);
187938Semax	  continue;
187938Semax
187938Semax	case 7:
187938Semax	  ch = GET ();
187938Semax	  state = 5;
187938Semax	  old_state = 8;
187938Semax	  if (ch == EOF)
187938Semax	    goto fromeof;
187938Semax	  PUT (ch);
187938Semax	  continue;
187938Semax
187938Semax	case 8:
187938Semax	  do
187938Semax	    ch = GET ();
187938Semax	  while (ch != '\n' && ch != EOF);
187938Semax	  if (ch == EOF)
187938Semax	    goto fromeof;
187938Semax	  state = 0;
187938Semax	  PUT (ch);
187938Semax	  continue;
187938Semax	}
187938Semax
187938Semax      /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
187938Semax
187938Semax      /* flushchar: */
187938Semax      ch = GET ();
187938Semax
187938Semax    recycle:
187938Semax
187938Semax#if defined TC_ARM && defined OBJ_ELF
187938Semax      /* We need to watch out for .symver directives.  See the comment later
187938Semax	 in this function.  */
187938Semax      if (symver_state == NULL)
187938Semax	{
187938Semax	  if ((state == 0 || state == 1) && ch == symver_pseudo[0])
187938Semax	    symver_state = symver_pseudo + 1;
187938Semax	}
187938Semax      else
187938Semax	{
187938Semax	  /* We advance to the next state if we find the right
187938Semax	     character.  */
187938Semax	  if (ch != '\0' && (*symver_state == ch))
187938Semax	    ++symver_state;
187938Semax	  else if (*symver_state != '\0')
187938Semax	    /* We did not get the expected character, or we didn't
187938Semax	       get a valid terminating character after seeing the
187938Semax	       entire pseudo-op, so we must go back to the beginning.  */
187938Semax	    symver_state = NULL;
187938Semax	  else
187938Semax	    {
187938Semax	      /* We've read the entire pseudo-op.  If this is the end
187938Semax		 of the line, go back to the beginning.  */
187938Semax	      if (IS_NEWLINE (ch))
187938Semax		symver_state = NULL;
187938Semax	    }
187938Semax	}
187938Semax#endif /* TC_ARM && OBJ_ELF */
187938Semax
187938Semax#ifdef TC_M68K
187938Semax      /* We want to have pseudo-ops which control whether we are in
187938Semax         MRI mode or not.  Unfortunately, since m68k MRI mode affects
187938Semax         the scrubber, that means that we need a special purpose
187938Semax         recognizer here.  */
187938Semax      if (mri_state == NULL)
187938Semax	{
187938Semax	  if ((state == 0 || state == 1)
187938Semax	      && ch == mri_pseudo[0])
187938Semax	    mri_state = mri_pseudo + 1;
187938Semax	}
187938Semax      else
187938Semax	{
187938Semax	  /* We advance to the next state if we find the right
187938Semax	     character, or if we need a space character and we get any
187938Semax	     whitespace character, or if we need a '0' and we get a
187938Semax	     '1' (this is so that we only need one state to handle
187938Semax	     ``.mri 0'' and ``.mri 1'').  */
187938Semax	  if (ch != '\0'
187938Semax	      && (*mri_state == ch
187938Semax		  || (*mri_state == ' '
187938Semax		      && lex[ch] == LEX_IS_WHITESPACE)
187938Semax		  || (*mri_state == '0'
187938Semax		      && ch == '1')))
187938Semax	    {
187938Semax	      mri_last_ch = ch;
187938Semax	      ++mri_state;
187938Semax	    }
187938Semax	  else if (*mri_state != '\0'
187938Semax		   || (lex[ch] != LEX_IS_WHITESPACE
187938Semax		       && lex[ch] != LEX_IS_NEWLINE))
187938Semax	    {
187938Semax	      /* We did not get the expected character, or we didn't
187938Semax		 get a valid terminating character after seeing the
187938Semax		 entire pseudo-op, so we must go back to the
187938Semax		 beginning.  */
187938Semax	      mri_state = NULL;
187938Semax	    }
187938Semax	  else
187938Semax	    {
187938Semax	      /* We've read the entire pseudo-op.  mips_last_ch is
187938Semax                 either '0' or '1' indicating whether to enter or
187938Semax                 leave MRI mode.  */
187938Semax	      do_scrub_begin (mri_last_ch == '1');
187938Semax	      mri_state = NULL;
187938Semax
187938Semax	      /* We continue handling the character as usual.  The
187938Semax                 main gas reader must also handle the .mri pseudo-op
187938Semax                 to control expression parsing and the like.  */
	    }
	}
#endif

      if (ch == EOF)
	{
	  if (state != 0)
	    {
	      as_warn (_("end of file not at end of a line; newline inserted"));
	      state = 0;
	      PUT ('\n');
	    }
	  goto fromeof;
	}

      switch (lex[ch])
	{
	case LEX_IS_WHITESPACE:
	  do
	    {
	      ch = GET ();
	    }
	  while (ch != EOF && IS_WHITESPACE (ch));
	  if (ch == EOF)
	    goto fromeof;

	  if (state == 0)
	    {
	      /* Preserve a single whitespace character at the
		 beginning of a line.  */
	      state = 1;
	      UNGET (ch);
	      PUT (' ');
	      break;
	    }

#ifdef KEEP_WHITE_AROUND_COLON
	  if (lex[ch] == LEX_IS_COLON)
	    {
	      /* Only keep this white if there's no white *after* the
                 colon.  */
	      ch2 = GET ();
	      UNGET (ch2);
	      if (!IS_WHITESPACE (ch2))
		{
		  state = 9;
		  UNGET (ch);
		  PUT (' ');
		  break;
		}
	    }
#endif
	  if (IS_COMMENT (ch)
	      || ch == '/'
	      || IS_LINE_SEPARATOR (ch)
	      || IS_PARALLEL_SEPARATOR (ch))
	    {
	      if (scrub_m68k_mri)
		{
		  /* In MRI mode, we keep these spaces.  */
		  UNGET (ch);
		  PUT (' ');
		  break;
		}
	      goto recycle;
	    }

	  /* If we're in state 2 or 11, we've seen a non-white
	     character followed by whitespace.  If the next character
	     is ':', this is whitespace after a label name which we
	     normally must ignore.  In MRI mode, though, spaces are
	     not permitted between the label and the colon.  */
	  if ((state == 2 || state == 11)
	      && lex[ch] == LEX_IS_COLON
	      && ! scrub_m68k_mri)
	    {
	      state = 1;
	      PUT (ch);
	      break;
	    }

	  switch (state)
	    {
	    case 0:
	      state++;
	      goto recycle;	/* Punted leading sp */
	    case 1:
	      /* We can arrive here if we leave a leading whitespace
		 character at the beginning of a line.  */
	      goto recycle;
	    case 2:
	      state = 3;
	      if (to + 1 < toend)
		{
		  /* Optimize common case by skipping UNGET/GET.  */
		  PUT (' ');	/* Sp after opco */
		  goto recycle;
		}
	      UNGET (ch);
	      PUT (' ');
	      break;
	    case 3:
	      if (scrub_m68k_mri)
		{
		  /* In MRI mode, we keep these spaces.  */
		  UNGET (ch);
		  PUT (' ');
		  break;
		}
	      goto recycle;	/* Sp in operands */
	    case 9:
	    case 10:
	      if (scrub_m68k_mri)
		{
		  /* In MRI mode, we keep these spaces.  */
		  state = 3;
		  UNGET (ch);
		  PUT (' ');
		  break;
		}
	      state = 10;	/* Sp after symbol char */
	      goto recycle;
	    case 11:
	      if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
		state = 1;
	      else
		{
		  /* We know that ch is not ':', since we tested that
                     case above.  Therefore this is not a label, so it
                     must be the opcode, and we've just seen the
                     whitespace after it.  */
		  state = 3;
		}
	      UNGET (ch);
	      PUT (' ');	/* Sp after label definition.  */
	      break;
	    default:
	      BAD_CASE (state);
	    }
	  break;

	case LEX_IS_TWOCHAR_COMMENT_1ST:
	  ch2 = GET ();
	  if (ch2 == '*')
	    {
	      for (;;)
		{
		  do
		    {
		      ch2 = GET ();
		      if (ch2 != EOF && IS_NEWLINE (ch2))
			add_newlines++;
		    }
		  while (ch2 != EOF && ch2 != '*');

		  while (ch2 == '*')
		    ch2 = GET ();

		  if (ch2 == EOF || ch2 == '/')
		    break;

		  /* This UNGET will ensure that we count newlines
                     correctly.  */
		  UNGET (ch2);
		}

	      if (ch2 == EOF)
		as_warn (_("end of file in multiline comment"));

	      ch = ' ';
	      goto recycle;
	    }
#ifdef DOUBLESLASH_LINE_COMMENTS
	  else if (ch2 == '/')
	    {
	      do
		{
		  ch = GET ();
		}
	      while (ch != EOF && !IS_NEWLINE (ch));
	      if (ch == EOF)
		as_warn ("end of file in comment; newline inserted");
	      state = 0;
	      PUT ('\n');
	      break;
	    }
#endif
	  else
	    {
	      if (ch2 != EOF)
		UNGET (ch2);
	      if (state == 9 || state == 10)
		state = 3;
	      PUT (ch);
	    }
	  break;

	case LEX_IS_STRINGQUOTE:
	  if (state == 10)
	    {
	      /* Preserve the whitespace in foo "bar" */
	      UNGET (ch);
	      state = 3;
	      PUT (' ');

	      /* PUT didn't jump out.  We could just break, but we
                 know what will happen, so optimize a bit.  */
	      ch = GET ();
	      old_state = 3;
	    }
	  else if (state == 9)
	    old_state = 3;
	  else
	    old_state = state;
	  state = 5;
	  PUT (ch);
	  break;

#ifndef IEEE_STYLE
	case LEX_IS_ONECHAR_QUOTE:
	  if (state == 10)
	    {
	      /* Preserve the whitespace in foo 'b' */
	      UNGET (ch);
	      state = 3;
	      PUT (' ');
	      break;
	    }
	  ch = GET ();
	  if (ch == EOF)
	    {
	      as_warn (_("end of file after a one-character quote; \\0 inserted"));
	      ch = 0;
	    }
	  if (ch == '\\')
	    {
	      ch = GET ();
	      if (ch == EOF)
		{
		  as_warn (_("end of file in escape character"));
		  ch = '\\';
		}
	      else
		ch = process_escape (ch);
	    }
	  sprintf (out_buf, "%d", (int) (unsigned char) ch);

	  /* None of these 'x constants for us.  We want 'x'.  */
	  if ((ch = GET ()) != '\'')
	    {
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
	      as_warn (_("missing close quote; (assumed)"));
#else
	      if (ch != EOF)
		UNGET (ch);
#endif
	    }
	  if (strlen (out_buf) == 1)
	    {
	      PUT (out_buf[0]);
	      break;
	    }
	  if (state == 9)
	    old_state = 3;
	  else
	    old_state = state;
	  state = -1;
	  out_string = out_buf;
	  PUT (*out_string++);
	  break;
#endif

	case LEX_IS_COLON:
#ifdef KEEP_WHITE_AROUND_COLON
	  state = 9;
#else
	  if (state == 9 || state == 10)
	    state = 3;
	  else if (state != 3)
	    state = 1;
#endif
	  PUT (ch);
	  break;

	case LEX_IS_NEWLINE:
	  /* Roll out a bunch of newlines from inside comments, etc.  */
	  if (add_newlines)
	    {
	      --add_newlines;
	      UNGET (ch);
	    }
	  /* Fall through.  */

	case LEX_IS_LINE_SEPARATOR:
	  state = 0;
	  PUT (ch);
	  break;

	case LEX_IS_PARALLEL_SEPARATOR:
	  state = 1;
	  PUT (ch);
	  break;

#ifdef TC_V850
	case LEX_IS_DOUBLEDASH_1ST:
	  ch2 = GET ();
	  if (ch2 != '-')
	    {
	      UNGET (ch2);
	      goto de_fault;
	    }
	  /* Read and skip to end of line.  */
	  do
	    {
	      ch = GET ();
	    }
	  while (ch != EOF && ch != '\n');
	  if (ch == EOF)
	    {
	      as_warn (_("end of file in comment; newline inserted"));
	    }
	  state = 0;
	  PUT ('\n');
	  break;
#endif
#ifdef DOUBLEBAR_PARALLEL
	case LEX_IS_DOUBLEBAR_1ST:
	  ch2 = GET ();
	  if (ch2 != '|')
	    {
	      UNGET (ch2);
	      goto de_fault;
	    }
	  /* Reset back to state 1 and pretend that we are parsing a line from
	     just after the first white space.  */
	  state = 1;
	  PUT ('|');
	  PUT ('|');
	  break;
#endif
	case LEX_IS_LINE_COMMENT_START:
	  /* FIXME-someday: The two character comment stuff was badly
	     thought out.  On i386, we want '/' as line comment start
	     AND we want C style comments.  hence this hack.  The
	     whole lexical process should be reworked.  xoxorich.  */
	  if (ch == '/')
	    {
	      ch2 = GET ();
	      if (ch2 == '*')
		{
		  old_state = 3;
		  state = -2;
		  break;
		}
	      else
		{
		  UNGET (ch2);
		}
	    } /* bad hack */

	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
	    {
	      int startch;

	      startch = ch;

	      do
		{
		  ch = GET ();
		}
	      while (ch != EOF && IS_WHITESPACE (ch));
	      if (ch == EOF)
		{
		  as_warn (_("end of file in comment; newline inserted"));
		  PUT ('\n');
		  break;
		}
	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
		{
		  /* Not a cpp line.  */
		  while (ch != EOF && !IS_NEWLINE (ch))
		    ch = GET ();
		  if (ch == EOF)
		    as_warn (_("end of file in comment; newline inserted"));
		  state = 0;
		  PUT ('\n');
		  break;
		}
	      /* Looks like `# 123 "filename"' from cpp.  */
	      UNGET (ch);
	      old_state = 4;
	      state = -1;
	      if (scrub_m68k_mri)
		out_string = "\tappline ";
	      else
		out_string = "\t.appline ";
	      PUT (*out_string++);
	      break;
	    }

#ifdef TC_D10V
	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
	     Trap is the only short insn that has a first operand that is
	     neither register nor label.
	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
	     already LEX_IS_LINE_COMMENT_START.  However, it is the
	     only character in line_comment_chars for d10v, hence we
	     can recognize it as such.  */
	  /* An alternative approach would be to reset the state to 1 when
	     we see '||', '<'- or '->', but that seems to be overkill.  */
	  if (state == 10)
	    PUT (' ');
#endif
	  /* We have a line comment character which is not at the
	     start of a line.  If this is also a normal comment
	     character, fall through.  Otherwise treat it as a default
	     character.  */
	  if (strchr (tc_comment_chars, ch) == NULL
	      && (! scrub_m68k_mri
		  || (ch != '!' && ch != '*')))
	    goto de_fault;
	  if (scrub_m68k_mri
	      && (ch == '!' || ch == '*' || ch == '#')
	      && state != 1
	      && state != 10)
	    goto de_fault;
	  /* Fall through.  */
	case LEX_IS_COMMENT_START:
#if defined TC_ARM && defined OBJ_ELF
	  /* On the ARM, `@' is the comment character.
	     Unfortunately this is also a special character in ELF .symver
	     directives (and .type, though we deal with those another way).
	     So we check if this line is such a directive, and treat
	     the character as default if so.  This is a hack.  */
	  if ((symver_state != NULL) && (*symver_state == 0))
	    goto de_fault;
#endif
#ifdef WARN_COMMENTS
	  if (!found_comment)
	    as_where (&found_comment_file, &found_comment);
#endif
	  do
	    {
	      ch = GET ();
	    }
	  while (ch != EOF && !IS_NEWLINE (ch));
	  if (ch == EOF)
	    as_warn (_("end of file in comment; newline inserted"));
	  state = 0;
	  PUT ('\n');
	  break;

	case LEX_IS_SYMBOL_COMPONENT:
	  if (state == 10)
	    {
	      /* This is a symbol character following another symbol
		 character, with whitespace in between.  We skipped
		 the whitespace earlier, so output it now.  */
	      UNGET (ch);
	      state = 3;
	      PUT (' ');
	      break;
	    }

	  if (state == 3)
	    state = 9;

	  /* This is a common case.  Quickly copy CH and all the
             following symbol component or normal characters.  */
	  if (to + 1 < toend
	      && mri_state == NULL
#if defined TC_ARM && defined OBJ_ELF
	      && symver_state == NULL
#endif
	      )
	    {
	      char *s;
	      int len;

	      for (s = from; s < fromend; s++)
		{
		  int type;

		  ch2 = *(unsigned char *) s;
		  type = lex[ch2];
		  if (type != 0
		      && type != LEX_IS_SYMBOL_COMPONENT)
		    break;
		}
	      if (s > from)
		{
		  /* Handle the last character normally, for
                     simplicity.  */
		  --s;
		}
	      len = s - from;
	      if (len > (toend - to) - 1)
		len = (toend - to) - 1;
	      if (len > 0)
		{
		  PUT (ch);
		  if (len > 8)
		    {
		      memcpy (to, from, len);
		      to += len;
		      from += len;
		    }
		  else
		    {
		      switch (len)
			{
			case 8: *to++ = *from++;
			case 7: *to++ = *from++;
			case 6: *to++ = *from++;
			case 5: *to++ = *from++;
			case 4: *to++ = *from++;
			case 3: *to++ = *from++;
			case 2: *to++ = *from++;
			case 1: *to++ = *from++;
			}
		    }
		  ch = GET ();
		}
	    }

	  /* Fall through.  */
	default:
	de_fault:
	  /* Some relatively `normal' character.  */
	  if (state == 0)
	    {
	      state = 11;	/* Now seeing label definition */
	    }
	  else if (state == 1)
	    {
	      state = 2;	/* Ditto */
	    }
	  else if (state == 9)
	    {
	      if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
		state = 3;
	    }
	  else if (state == 10)
	    {
	      if (ch == '\\')
		{
		  /* Special handling for backslash: a backslash may
		     be the beginning of a formal parameter (of a
		     macro) following another symbol character, with
		     whitespace in between.  If that is the case, we
		     output a space before the parameter.  Strictly
		     speaking, correct handling depends upon what the
		     macro parameter expands into; if the parameter
		     expands into something which does not start with
		     an operand character, then we don't want to keep
		     the space.  We don't have enough information to
		     make the right choice, so here we are making the
		     choice which is more likely to be correct.  */
		  PUT (' ');
		}

	      state = 3;
	    }
	  PUT (ch);
	  break;
	}
    }

  /*NOTREACHED*/

 fromeof:
  /* We have reached the end of the input.  */
  return to - tostart;

 tofull:
  /* The output buffer is full.  Save any input we have not yet
     processed.  */
  if (fromend > from)
    {
      saved_input = from;
      saved_input_len = fromend - from;
    }
  else
    saved_input = NULL;

  return to - tostart;
}

/* end of app.c */