app.c revision 78828
133965Sjdp/* This is the Assembler Pre-Processor
278828Sobrien   Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
378828Sobrien   1999, 2000
433965Sjdp   Free Software Foundation, Inc.
533965Sjdp
633965Sjdp   This file is part of GAS, the GNU Assembler.
733965Sjdp
833965Sjdp   GAS is free software; you can redistribute it and/or modify
933965Sjdp   it under the terms of the GNU General Public License as published by
1033965Sjdp   the Free Software Foundation; either version 2, or (at your option)
1133965Sjdp   any later version.
1233965Sjdp
1333965Sjdp   GAS is distributed in the hope that it will be useful,
1433965Sjdp   but WITHOUT ANY WARRANTY; without even the implied warranty of
1533965Sjdp   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1633965Sjdp   GNU General Public License for more details.
1733965Sjdp
1833965Sjdp   You should have received a copy of the GNU General Public License
1933965Sjdp   along with GAS; see the file COPYING.  If not, write to the Free
2033965Sjdp   Software Foundation, 59 Temple Place - Suite 330, Boston, MA
2133965Sjdp   02111-1307, USA.  */
2233965Sjdp
2333965Sjdp/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
2433965Sjdp/* App, the assembler pre-processor.  This pre-processor strips out excess
2533965Sjdp   spaces, turns single-quoted characters into a decimal constant, and turns
2633965Sjdp   # <number> <filename> <garbage> into a .line <number>\n.file <filename>
2733965Sjdp   pair.  This needs better error-handling.  */
2833965Sjdp
2933965Sjdp#include <stdio.h>
3033965Sjdp#include "as.h"			/* For BAD_CASE() only */
3133965Sjdp
3233965Sjdp#if (__STDC__ != 1)
3333965Sjdp#ifndef const
3433965Sjdp#define const  /* empty */
3533965Sjdp#endif
3633965Sjdp#endif
3733965Sjdp
3860484Sobrien#ifdef TC_M68K
3933965Sjdp/* Whether we are scrubbing in m68k MRI mode.  This is different from
4033965Sjdp   flag_m68k_mri, because the two flags will be affected by the .mri
4133965Sjdp   pseudo-op at different times.  */
4233965Sjdpstatic int scrub_m68k_mri;
4360484Sobrien#else
4460484Sobrien#define scrub_m68k_mri 0
4560484Sobrien#endif
4633965Sjdp
4733965Sjdp/* The pseudo-op which switches in and out of MRI mode.  See the
4833965Sjdp   comment in do_scrub_chars.  */
4933965Sjdpstatic const char mri_pseudo[] = ".mri 0";
5033965Sjdp
5160484Sobrien#if defined TC_ARM && defined OBJ_ELF
5277298Sobrien/* The pseudo-op for which we need to special-case `@' characters.
5360484Sobrien   See the comment in do_scrub_chars.  */
5460484Sobrienstatic const char   symver_pseudo[] = ".symver";
5560484Sobrienstatic const char * symver_state;
5660484Sobrien#endif
5760484Sobrien
5833965Sjdpstatic char lex[256];
5933965Sjdpstatic const char symbol_chars[] =
6033965Sjdp"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
6133965Sjdp
6233965Sjdp#define LEX_IS_SYMBOL_COMPONENT		1
6333965Sjdp#define LEX_IS_WHITESPACE		2
6433965Sjdp#define LEX_IS_LINE_SEPARATOR		3
6533965Sjdp#define LEX_IS_COMMENT_START		4
6633965Sjdp#define LEX_IS_LINE_COMMENT_START	5
6733965Sjdp#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
6833965Sjdp#define	LEX_IS_STRINGQUOTE		8
6933965Sjdp#define	LEX_IS_COLON			9
7033965Sjdp#define	LEX_IS_NEWLINE			10
7133965Sjdp#define	LEX_IS_ONECHAR_QUOTE		11
7238889Sjdp#ifdef TC_V850
7338889Sjdp#define LEX_IS_DOUBLEDASH_1ST		12
7438889Sjdp#endif
7538889Sjdp#ifdef TC_M32R
7660484Sobrien#define DOUBLEBAR_PARALLEL
7760484Sobrien#endif
7860484Sobrien#ifdef DOUBLEBAR_PARALLEL
7938889Sjdp#define LEX_IS_DOUBLEBAR_1ST		13
8038889Sjdp#endif
8133965Sjdp#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
8233965Sjdp#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
8333965Sjdp#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
8433965Sjdp#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
8533965Sjdp#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
8633965Sjdp#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
8733965Sjdp
8833965Sjdpstatic int process_escape PARAMS ((int));
8933965Sjdp
9033965Sjdp/* FIXME-soon: The entire lexer/parser thingy should be
9133965Sjdp   built statically at compile time rather than dynamically
9277298Sobrien   each and every time the assembler is run.  xoxorich.  */
9333965Sjdp
9477298Sobrienvoid
9533965Sjdpdo_scrub_begin (m68k_mri)
9660484Sobrien     int m68k_mri ATTRIBUTE_UNUSED;
9733965Sjdp{
9833965Sjdp  const char *p;
9960484Sobrien  int c;
10033965Sjdp
10133965Sjdp  lex[' '] = LEX_IS_WHITESPACE;
10233965Sjdp  lex['\t'] = LEX_IS_WHITESPACE;
10338889Sjdp  lex['\r'] = LEX_IS_WHITESPACE;
10433965Sjdp  lex['\n'] = LEX_IS_NEWLINE;
10533965Sjdp  lex[':'] = LEX_IS_COLON;
10633965Sjdp
10760484Sobrien#ifdef TC_M68K
10860484Sobrien  scrub_m68k_mri = m68k_mri;
10960484Sobrien
11033965Sjdp  if (! m68k_mri)
11160484Sobrien#endif
11233965Sjdp    {
11333965Sjdp      lex['"'] = LEX_IS_STRINGQUOTE;
11433965Sjdp
11560484Sobrien#if ! defined (TC_HPPA) && ! defined (TC_I370)
11660484Sobrien      /* I370 uses single-quotes to delimit integer, float constants */
11733965Sjdp      lex['\''] = LEX_IS_ONECHAR_QUOTE;
11833965Sjdp#endif
11933965Sjdp
12033965Sjdp#ifdef SINGLE_QUOTE_STRINGS
12133965Sjdp      lex['\''] = LEX_IS_STRINGQUOTE;
12233965Sjdp#endif
12333965Sjdp    }
12433965Sjdp
12533965Sjdp  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
12633965Sjdp     in state 5 of do_scrub_chars must be changed.  */
12733965Sjdp
12833965Sjdp  /* Note that these override the previous defaults, e.g. if ';' is a
12933965Sjdp     comment char, then it isn't a line separator.  */
13033965Sjdp  for (p = symbol_chars; *p; ++p)
13133965Sjdp    {
13233965Sjdp      lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
13333965Sjdp    }				/* declare symbol characters */
13433965Sjdp
13560484Sobrien  for (c = 128; c < 256; ++c)
13660484Sobrien    lex[c] = LEX_IS_SYMBOL_COMPONENT;
13760484Sobrien
13860484Sobrien#ifdef tc_symbol_chars
13960484Sobrien  /* This macro permits the processor to specify all characters which
14060484Sobrien     may appears in an operand.  This will prevent the scrubber from
14160484Sobrien     discarding meaningful whitespace in certain cases.  The i386
14260484Sobrien     backend uses this to support prefixes, which can confuse the
14360484Sobrien     scrubber as to whether it is parsing operands or opcodes.  */
14460484Sobrien  for (p = tc_symbol_chars; *p; ++p)
14560484Sobrien    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
14660484Sobrien#endif
14760484Sobrien
14833965Sjdp  /* The m68k backend wants to be able to change comment_chars.  */
14933965Sjdp#ifndef tc_comment_chars
15033965Sjdp#define tc_comment_chars comment_chars
15133965Sjdp#endif
15233965Sjdp  for (p = tc_comment_chars; *p; p++)
15333965Sjdp    {
15433965Sjdp      lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
15533965Sjdp    }				/* declare comment chars */
15633965Sjdp
15733965Sjdp  for (p = line_comment_chars; *p; p++)
15833965Sjdp    {
15933965Sjdp      lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
16033965Sjdp    }				/* declare line comment chars */
16133965Sjdp
16233965Sjdp  for (p = line_separator_chars; *p; p++)
16333965Sjdp    {
16433965Sjdp      lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
16533965Sjdp    }				/* declare line separators */
16633965Sjdp
16733965Sjdp  /* Only allow slash-star comments if slash is not in use.
16833965Sjdp     FIXME: This isn't right.  We should always permit them.  */
16933965Sjdp  if (lex['/'] == 0)
17033965Sjdp    {
17133965Sjdp      lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
17233965Sjdp    }
17333965Sjdp
17460484Sobrien#ifdef TC_M68K
17533965Sjdp  if (m68k_mri)
17633965Sjdp    {
17733965Sjdp      lex['\''] = LEX_IS_STRINGQUOTE;
17833965Sjdp      lex[';'] = LEX_IS_COMMENT_START;
17933965Sjdp      lex['*'] = LEX_IS_LINE_COMMENT_START;
18033965Sjdp      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
18133965Sjdp         then it can't be used in an expression.  */
18233965Sjdp      lex['!'] = LEX_IS_LINE_COMMENT_START;
18333965Sjdp    }
18460484Sobrien#endif
18538889Sjdp
18638889Sjdp#ifdef TC_V850
18738889Sjdp  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
18838889Sjdp#endif
18960484Sobrien#ifdef DOUBLEBAR_PARALLEL
19038889Sjdp  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
19138889Sjdp#endif
19260484Sobrien#ifdef TC_D30V
19360484Sobrien  /* must do this is we want VLIW instruction with "->" or "<-" */
19460484Sobrien  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
19560484Sobrien#endif
19633965Sjdp}				/* do_scrub_begin() */
19733965Sjdp
19833965Sjdp/* Saved state of the scrubber */
19933965Sjdpstatic int state;
20033965Sjdpstatic int old_state;
20133965Sjdpstatic char *out_string;
20233965Sjdpstatic char out_buf[20];
20333965Sjdpstatic int add_newlines;
20433965Sjdpstatic char *saved_input;
20533965Sjdpstatic int saved_input_len;
20660484Sobrienstatic char input_buffer[32 * 1024];
20733965Sjdpstatic const char *mri_state;
20833965Sjdpstatic char mri_last_ch;
20933965Sjdp
21033965Sjdp/* Data structure for saving the state of app across #include's.  Note that
21133965Sjdp   app is called asynchronously to the parsing of the .include's, so our
21233965Sjdp   state at the time .include is interpreted is completely unrelated.
21333965Sjdp   That's why we have to save it all.  */
21433965Sjdp
21577298Sobrienstruct app_save {
21677298Sobrien  int          state;
21777298Sobrien  int          old_state;
21877298Sobrien  char *       out_string;
21977298Sobrien  char         out_buf[sizeof (out_buf)];
22077298Sobrien  int          add_newlines;
22177298Sobrien  char *       saved_input;
22277298Sobrien  int          saved_input_len;
22360484Sobrien#ifdef TC_M68K
22477298Sobrien  int          scrub_m68k_mri;
22560484Sobrien#endif
22677298Sobrien  const char * mri_state;
22777298Sobrien  char         mri_last_ch;
22860484Sobrien#if defined TC_ARM && defined OBJ_ELF
22977298Sobrien  const char * symver_state;
23060484Sobrien#endif
23177298Sobrien};
23233965Sjdp
23333965Sjdpchar *
23433965Sjdpapp_push ()
23533965Sjdp{
23633965Sjdp  register struct app_save *saved;
23733965Sjdp
23833965Sjdp  saved = (struct app_save *) xmalloc (sizeof (*saved));
23933965Sjdp  saved->state = state;
24033965Sjdp  saved->old_state = old_state;
24133965Sjdp  saved->out_string = out_string;
24233965Sjdp  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
24333965Sjdp  saved->add_newlines = add_newlines;
24460484Sobrien  if (saved_input == NULL)
24560484Sobrien    saved->saved_input = NULL;
24660484Sobrien  else
24760484Sobrien    {
24860484Sobrien      saved->saved_input = xmalloc (saved_input_len);
24960484Sobrien      memcpy (saved->saved_input, saved_input, saved_input_len);
25060484Sobrien      saved->saved_input_len = saved_input_len;
25160484Sobrien    }
25260484Sobrien#ifdef TC_M68K
25333965Sjdp  saved->scrub_m68k_mri = scrub_m68k_mri;
25460484Sobrien#endif
25533965Sjdp  saved->mri_state = mri_state;
25633965Sjdp  saved->mri_last_ch = mri_last_ch;
25760484Sobrien#if defined TC_ARM && defined OBJ_ELF
25860484Sobrien  saved->symver_state = symver_state;
25960484Sobrien#endif
26033965Sjdp
26177298Sobrien  /* do_scrub_begin() is not useful, just wastes time.  */
26233965Sjdp
26333965Sjdp  state = 0;
26433965Sjdp  saved_input = NULL;
26533965Sjdp
26633965Sjdp  return (char *) saved;
26733965Sjdp}
26833965Sjdp
26977298Sobrienvoid
27033965Sjdpapp_pop (arg)
27133965Sjdp     char *arg;
27233965Sjdp{
27333965Sjdp  register struct app_save *saved = (struct app_save *) arg;
27433965Sjdp
27577298Sobrien  /* There is no do_scrub_end ().  */
27633965Sjdp  state = saved->state;
27733965Sjdp  old_state = saved->old_state;
27833965Sjdp  out_string = saved->out_string;
27933965Sjdp  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
28033965Sjdp  add_newlines = saved->add_newlines;
28160484Sobrien  if (saved->saved_input == NULL)
28260484Sobrien    saved_input = NULL;
28360484Sobrien  else
28460484Sobrien    {
28560484Sobrien      assert (saved->saved_input_len <= (int) (sizeof input_buffer));
28660484Sobrien      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
28760484Sobrien      saved_input = input_buffer;
28860484Sobrien      saved_input_len = saved->saved_input_len;
28960484Sobrien      free (saved->saved_input);
29060484Sobrien    }
29160484Sobrien#ifdef TC_M68K
29233965Sjdp  scrub_m68k_mri = saved->scrub_m68k_mri;
29360484Sobrien#endif
29433965Sjdp  mri_state = saved->mri_state;
29533965Sjdp  mri_last_ch = saved->mri_last_ch;
29660484Sobrien#if defined TC_ARM && defined OBJ_ELF
29760484Sobrien  symver_state = saved->symver_state;
29860484Sobrien#endif
29933965Sjdp
30033965Sjdp  free (arg);
30133965Sjdp}				/* app_pop() */
30233965Sjdp
30333965Sjdp/* @@ This assumes that \n &c are the same on host and target.  This is not
30433965Sjdp   necessarily true.  */
30577298Sobrienstatic int
30633965Sjdpprocess_escape (ch)
30733965Sjdp     int ch;
30833965Sjdp{
30933965Sjdp  switch (ch)
31033965Sjdp    {
31133965Sjdp    case 'b':
31233965Sjdp      return '\b';
31333965Sjdp    case 'f':
31433965Sjdp      return '\f';
31533965Sjdp    case 'n':
31633965Sjdp      return '\n';
31733965Sjdp    case 'r':
31833965Sjdp      return '\r';
31933965Sjdp    case 't':
32033965Sjdp      return '\t';
32133965Sjdp    case '\'':
32233965Sjdp      return '\'';
32333965Sjdp    case '"':
32433965Sjdp      return '\"';
32533965Sjdp    default:
32633965Sjdp      return ch;
32733965Sjdp    }
32833965Sjdp}
32933965Sjdp
33033965Sjdp/* This function is called to process input characters.  The GET
33133965Sjdp   parameter is used to retrieve more input characters.  GET should
33233965Sjdp   set its parameter to point to a buffer, and return the length of
33333965Sjdp   the buffer; it should return 0 at end of file.  The scrubbed output
33433965Sjdp   characters are put into the buffer starting at TOSTART; the TOSTART
33533965Sjdp   buffer is TOLEN bytes in length.  The function returns the number
33633965Sjdp   of scrubbed characters put into TOSTART.  This will be TOLEN unless
33733965Sjdp   end of file was seen.  This function is arranged as a state
33833965Sjdp   machine, and saves its state so that it may return at any point.
33933965Sjdp   This is the way the old code used to work.  */
34033965Sjdp
34133965Sjdpint
34233965Sjdpdo_scrub_chars (get, tostart, tolen)
34360484Sobrien     int (*get) PARAMS ((char *, int));
34433965Sjdp     char *tostart;
34533965Sjdp     int tolen;
34633965Sjdp{
34733965Sjdp  char *to = tostart;
34833965Sjdp  char *toend = tostart + tolen;
34933965Sjdp  char *from;
35033965Sjdp  char *fromend;
35133965Sjdp  int fromlen;
35233965Sjdp  register int ch, ch2 = 0;
35333965Sjdp
35433965Sjdp  /*State 0: beginning of normal line
35533965Sjdp	  1: After first whitespace on line (flush more white)
35633965Sjdp	  2: After first non-white (opcode) on line (keep 1white)
35733965Sjdp	  3: after second white on line (into operands) (flush white)
35833965Sjdp	  4: after putting out a .line, put out digits
35933965Sjdp	  5: parsing a string, then go to old-state
36033965Sjdp	  6: putting out \ escape in a "d string.
36133965Sjdp	  7: After putting out a .appfile, put out string.
36233965Sjdp	  8: After putting out a .appfile string, flush until newline.
36333965Sjdp	  9: After seeing symbol char in state 3 (keep 1white after symchar)
36433965Sjdp	 10: After seeing whitespace in state 9 (keep white before symchar)
36533965Sjdp	 11: After seeing a symbol character in state 0 (eg a label definition)
36633965Sjdp	 -1: output string in out_string and go to the state in old_state
36733965Sjdp	 -2: flush text until a '*' '/' is seen, then go to state old_state
36838889Sjdp#ifdef TC_V850
36938889Sjdp         12: After seeing a dash, looking for a second dash as a start of comment.
37038889Sjdp#endif
37160484Sobrien#ifdef DOUBLEBAR_PARALLEL
37238889Sjdp	 13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
37338889Sjdp#endif
37433965Sjdp	  */
37533965Sjdp
37633965Sjdp  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
37733965Sjdp     constructs like ``.loc 1 20''.  This was turning into ``.loc
37833965Sjdp     120''.  States 9 and 10 ensure that a space is never dropped in
37933965Sjdp     between characters which could appear in a identifier.  Ian
38033965Sjdp     Taylor, ian@cygnus.com.
38133965Sjdp
38233965Sjdp     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
38333965Sjdp     correctly on the PA (and any other target where colons are optional).
38438889Sjdp     Jeff Law, law@cs.utah.edu.
38533965Sjdp
38638889Sjdp     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
38738889Sjdp     get squashed into "cmp r1,r2||trap#1", with the all important space
38838889Sjdp     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
38938889Sjdp
39033965Sjdp  /* This macro gets the next input character.  */
39133965Sjdp
39260484Sobrien#define GET()							\
39360484Sobrien  (from < fromend						\
39460484Sobrien   ? * (unsigned char *) (from++)				\
39560484Sobrien   : (saved_input = NULL,					\
39660484Sobrien      fromlen = (*get) (input_buffer, sizeof input_buffer),	\
39760484Sobrien      from = input_buffer,					\
39860484Sobrien      fromend = from + fromlen,					\
39960484Sobrien      (fromlen == 0						\
40060484Sobrien       ? EOF							\
40160484Sobrien       : * (unsigned char *) (from++))))
40233965Sjdp
40333965Sjdp  /* This macro pushes a character back on the input stream.  */
40433965Sjdp
40533965Sjdp#define UNGET(uch) (*--from = (uch))
40633965Sjdp
40733965Sjdp  /* This macro puts a character into the output buffer.  If this
40833965Sjdp     character fills the output buffer, this macro jumps to the label
40933965Sjdp     TOFULL.  We use this rather ugly approach because we need to
41033965Sjdp     handle two different termination conditions: EOF on the input
41133965Sjdp     stream, and a full output buffer.  It would be simpler if we
41233965Sjdp     always read in the entire input stream before processing it, but
41333965Sjdp     I don't want to make such a significant change to the assembler's
41433965Sjdp     memory usage.  */
41533965Sjdp
41633965Sjdp#define PUT(pch)			\
41733965Sjdp  do					\
41833965Sjdp    {					\
41933965Sjdp      *to++ = (pch);			\
42033965Sjdp      if (to >= toend)			\
42133965Sjdp        goto tofull;			\
42233965Sjdp    }					\
42333965Sjdp  while (0)
42433965Sjdp
42533965Sjdp  if (saved_input != NULL)
42633965Sjdp    {
42733965Sjdp      from = saved_input;
42833965Sjdp      fromend = from + saved_input_len;
42933965Sjdp    }
43033965Sjdp  else
43133965Sjdp    {
43260484Sobrien      fromlen = (*get) (input_buffer, sizeof input_buffer);
43333965Sjdp      if (fromlen == 0)
43433965Sjdp	return 0;
43560484Sobrien      from = input_buffer;
43633965Sjdp      fromend = from + fromlen;
43733965Sjdp    }
43833965Sjdp
43933965Sjdp  while (1)
44033965Sjdp    {
44133965Sjdp      /* The cases in this switch end with continue, in order to
44233965Sjdp         branch back to the top of this while loop and generate the
44333965Sjdp         next output character in the appropriate state.  */
44433965Sjdp      switch (state)
44533965Sjdp	{
44633965Sjdp	case -1:
44733965Sjdp	  ch = *out_string++;
44833965Sjdp	  if (*out_string == '\0')
44933965Sjdp	    {
45033965Sjdp	      state = old_state;
45133965Sjdp	      old_state = 3;
45233965Sjdp	    }
45333965Sjdp	  PUT (ch);
45433965Sjdp	  continue;
45533965Sjdp
45633965Sjdp	case -2:
45733965Sjdp	  for (;;)
45833965Sjdp	    {
45933965Sjdp	      do
46033965Sjdp		{
46133965Sjdp		  ch = GET ();
46233965Sjdp
46333965Sjdp		  if (ch == EOF)
46433965Sjdp		    {
46560484Sobrien		      as_warn (_("end of file in comment"));
46633965Sjdp		      goto fromeof;
46733965Sjdp		    }
46833965Sjdp
46933965Sjdp		  if (ch == '\n')
47033965Sjdp		    PUT ('\n');
47133965Sjdp		}
47233965Sjdp	      while (ch != '*');
47333965Sjdp
47433965Sjdp	      while ((ch = GET ()) == '*')
47533965Sjdp		;
47633965Sjdp
47733965Sjdp	      if (ch == EOF)
47833965Sjdp		{
47960484Sobrien		  as_warn (_("end of file in comment"));
48033965Sjdp		  goto fromeof;
48133965Sjdp		}
48233965Sjdp
48333965Sjdp	      if (ch == '/')
48433965Sjdp		break;
48533965Sjdp
48633965Sjdp	      UNGET (ch);
48733965Sjdp	    }
48833965Sjdp
48933965Sjdp	  state = old_state;
49033965Sjdp	  UNGET (' ');
49133965Sjdp	  continue;
49233965Sjdp
49333965Sjdp	case 4:
49433965Sjdp	  ch = GET ();
49533965Sjdp	  if (ch == EOF)
49633965Sjdp	    goto fromeof;
49733965Sjdp	  else if (ch >= '0' && ch <= '9')
49833965Sjdp	    PUT (ch);
49933965Sjdp	  else
50033965Sjdp	    {
50133965Sjdp	      while (ch != EOF && IS_WHITESPACE (ch))
50233965Sjdp		ch = GET ();
50333965Sjdp	      if (ch == '"')
50433965Sjdp		{
50533965Sjdp		  UNGET (ch);
50633965Sjdp		  if (scrub_m68k_mri)
50733965Sjdp		    out_string = "\n\tappfile ";
50833965Sjdp		  else
50933965Sjdp		    out_string = "\n\t.appfile ";
51033965Sjdp		  old_state = 7;
51133965Sjdp		  state = -1;
51233965Sjdp		  PUT (*out_string++);
51333965Sjdp		}
51433965Sjdp	      else
51533965Sjdp		{
51633965Sjdp		  while (ch != EOF && ch != '\n')
51733965Sjdp		    ch = GET ();
51833965Sjdp		  state = 0;
51933965Sjdp		  PUT (ch);
52033965Sjdp		}
52133965Sjdp	    }
52233965Sjdp	  continue;
52333965Sjdp
52433965Sjdp	case 5:
52533965Sjdp	  /* We are going to copy everything up to a quote character,
52633965Sjdp             with special handling for a backslash.  We try to
52733965Sjdp             optimize the copying in the simple case without using the
52833965Sjdp             GET and PUT macros.  */
52933965Sjdp	  {
53033965Sjdp	    char *s;
53133965Sjdp	    int len;
53233965Sjdp
53333965Sjdp	    for (s = from; s < fromend; s++)
53433965Sjdp	      {
53533965Sjdp		ch = *s;
53633965Sjdp		/* This condition must be changed if the type of any
53733965Sjdp                   other character can be LEX_IS_STRINGQUOTE.  */
53833965Sjdp		if (ch == '\\'
53933965Sjdp		    || ch == '"'
54033965Sjdp		    || ch == '\''
54133965Sjdp		    || ch == '\n')
54233965Sjdp		  break;
54333965Sjdp	      }
54433965Sjdp	    len = s - from;
54533965Sjdp	    if (len > toend - to)
54633965Sjdp	      len = toend - to;
54733965Sjdp	    if (len > 0)
54833965Sjdp	      {
54933965Sjdp		memcpy (to, from, len);
55033965Sjdp		to += len;
55133965Sjdp		from += len;
55233965Sjdp	      }
55333965Sjdp	  }
55433965Sjdp
55533965Sjdp	  ch = GET ();
55633965Sjdp	  if (ch == EOF)
55733965Sjdp	    {
55860484Sobrien	      as_warn (_("end of file in string: inserted '\"'"));
55933965Sjdp	      state = old_state;
56033965Sjdp	      UNGET ('\n');
56133965Sjdp	      PUT ('"');
56233965Sjdp	    }
56333965Sjdp	  else if (lex[ch] == LEX_IS_STRINGQUOTE)
56433965Sjdp	    {
56533965Sjdp	      state = old_state;
56633965Sjdp	      PUT (ch);
56733965Sjdp	    }
56833965Sjdp#ifndef NO_STRING_ESCAPES
56933965Sjdp	  else if (ch == '\\')
57033965Sjdp	    {
57133965Sjdp	      state = 6;
57233965Sjdp	      PUT (ch);
57333965Sjdp	    }
57433965Sjdp#endif
57533965Sjdp	  else if (scrub_m68k_mri && ch == '\n')
57633965Sjdp	    {
57733965Sjdp	      /* Just quietly terminate the string.  This permits lines like
57833965Sjdp		   bne	label	loop if we haven't reach end yet
57933965Sjdp		 */
58033965Sjdp	      state = old_state;
58133965Sjdp	      UNGET (ch);
58233965Sjdp	      PUT ('\'');
58333965Sjdp	    }
58433965Sjdp	  else
58533965Sjdp	    {
58633965Sjdp	      PUT (ch);
58733965Sjdp	    }
58833965Sjdp	  continue;
58933965Sjdp
59033965Sjdp	case 6:
59133965Sjdp	  state = 5;
59233965Sjdp	  ch = GET ();
59333965Sjdp	  switch (ch)
59433965Sjdp	    {
59533965Sjdp	      /* Handle strings broken across lines, by turning '\n' into
59633965Sjdp		 '\\' and 'n'.  */
59733965Sjdp	    case '\n':
59833965Sjdp	      UNGET ('n');
59933965Sjdp	      add_newlines++;
60033965Sjdp	      PUT ('\\');
60133965Sjdp	      continue;
60233965Sjdp
60333965Sjdp	    case '"':
60433965Sjdp	    case '\\':
60533965Sjdp	    case 'b':
60633965Sjdp	    case 'f':
60733965Sjdp	    case 'n':
60833965Sjdp	    case 'r':
60933965Sjdp	    case 't':
61033965Sjdp	    case 'v':
61133965Sjdp	    case 'x':
61233965Sjdp	    case 'X':
61333965Sjdp	    case '0':
61433965Sjdp	    case '1':
61533965Sjdp	    case '2':
61633965Sjdp	    case '3':
61733965Sjdp	    case '4':
61833965Sjdp	    case '5':
61933965Sjdp	    case '6':
62033965Sjdp	    case '7':
62133965Sjdp	      break;
62233965Sjdp#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
62333965Sjdp	    default:
62460484Sobrien	      as_warn (_("Unknown escape '\\%c' in string: Ignored"), ch);
62533965Sjdp	      break;
62633965Sjdp#else  /* ONLY_STANDARD_ESCAPES */
62733965Sjdp	    default:
62833965Sjdp	      /* Accept \x as x for any x */
62933965Sjdp	      break;
63033965Sjdp#endif /* ONLY_STANDARD_ESCAPES */
63133965Sjdp
63233965Sjdp	    case EOF:
63360484Sobrien	      as_warn (_("End of file in string: '\"' inserted"));
63433965Sjdp	      PUT ('"');
63533965Sjdp	      continue;
63633965Sjdp	    }
63733965Sjdp	  PUT (ch);
63833965Sjdp	  continue;
63933965Sjdp
64033965Sjdp	case 7:
64133965Sjdp	  ch = GET ();
64233965Sjdp	  state = 5;
64333965Sjdp	  old_state = 8;
64433965Sjdp	  if (ch == EOF)
64533965Sjdp	    goto fromeof;
64633965Sjdp	  PUT (ch);
64733965Sjdp	  continue;
64833965Sjdp
64933965Sjdp	case 8:
65033965Sjdp	  do
65133965Sjdp	    ch = GET ();
65233965Sjdp	  while (ch != '\n' && ch != EOF);
65333965Sjdp	  if (ch == EOF)
65433965Sjdp	    goto fromeof;
65533965Sjdp	  state = 0;
65633965Sjdp	  PUT (ch);
65733965Sjdp	  continue;
65833965Sjdp	}
65933965Sjdp
66033965Sjdp      /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
66133965Sjdp
66233965Sjdp      /* flushchar: */
66333965Sjdp      ch = GET ();
66433965Sjdp
66533965Sjdp    recycle:
66633965Sjdp
66760484Sobrien#if defined TC_ARM && defined OBJ_ELF
66860484Sobrien      /* We need to watch out for .symver directives.  See the comment later
66960484Sobrien	 in this function.  */
67060484Sobrien      if (symver_state == NULL)
67160484Sobrien	{
67260484Sobrien	  if ((state == 0 || state == 1) && ch == symver_pseudo[0])
67360484Sobrien	    symver_state = symver_pseudo + 1;
67460484Sobrien	}
67560484Sobrien      else
67660484Sobrien	{
67760484Sobrien	  /* We advance to the next state if we find the right
67860484Sobrien	     character.  */
67960484Sobrien	  if (ch != '\0' && (*symver_state == ch))
68060484Sobrien	    ++symver_state;
68160484Sobrien	  else if (*symver_state != '\0')
68260484Sobrien	    /* We did not get the expected character, or we didn't
68360484Sobrien	       get a valid terminating character after seeing the
68460484Sobrien	       entire pseudo-op, so we must go back to the beginning.  */
68560484Sobrien	    symver_state = NULL;
68660484Sobrien	  else
68760484Sobrien	    {
68860484Sobrien	      /* We've read the entire pseudo-op.  If this is the end
68960484Sobrien		 of the line, go back to the beginning.  */
69060484Sobrien	      if (IS_NEWLINE (ch))
69160484Sobrien		symver_state = NULL;
69260484Sobrien	    }
69360484Sobrien	}
69460484Sobrien#endif /* TC_ARM && OBJ_ELF */
69560484Sobrien
69633965Sjdp#ifdef TC_M68K
69733965Sjdp      /* We want to have pseudo-ops which control whether we are in
69833965Sjdp         MRI mode or not.  Unfortunately, since m68k MRI mode affects
69933965Sjdp         the scrubber, that means that we need a special purpose
70033965Sjdp         recognizer here.  */
70133965Sjdp      if (mri_state == NULL)
70233965Sjdp	{
70333965Sjdp	  if ((state == 0 || state == 1)
70433965Sjdp	      && ch == mri_pseudo[0])
70533965Sjdp	    mri_state = mri_pseudo + 1;
70633965Sjdp	}
70733965Sjdp      else
70833965Sjdp	{
70933965Sjdp	  /* We advance to the next state if we find the right
71033965Sjdp	     character, or if we need a space character and we get any
71133965Sjdp	     whitespace character, or if we need a '0' and we get a
71233965Sjdp	     '1' (this is so that we only need one state to handle
71333965Sjdp	     ``.mri 0'' and ``.mri 1'').  */
71433965Sjdp	  if (ch != '\0'
71533965Sjdp	      && (*mri_state == ch
71633965Sjdp		  || (*mri_state == ' '
71733965Sjdp		      && lex[ch] == LEX_IS_WHITESPACE)
71833965Sjdp		  || (*mri_state == '0'
71933965Sjdp		      && ch == '1')))
72033965Sjdp	    {
72133965Sjdp	      mri_last_ch = ch;
72233965Sjdp	      ++mri_state;
72333965Sjdp	    }
72433965Sjdp	  else if (*mri_state != '\0'
72533965Sjdp		   || (lex[ch] != LEX_IS_WHITESPACE
72633965Sjdp		       && lex[ch] != LEX_IS_NEWLINE))
72733965Sjdp	    {
72833965Sjdp	      /* We did not get the expected character, or we didn't
72933965Sjdp		 get a valid terminating character after seeing the
73033965Sjdp		 entire pseudo-op, so we must go back to the
73133965Sjdp		 beginning.  */
73233965Sjdp	      mri_state = NULL;
73333965Sjdp	    }
73433965Sjdp	  else
73533965Sjdp	    {
73633965Sjdp	      /* We've read the entire pseudo-op.  mips_last_ch is
73733965Sjdp                 either '0' or '1' indicating whether to enter or
73833965Sjdp                 leave MRI mode.  */
73933965Sjdp	      do_scrub_begin (mri_last_ch == '1');
74038889Sjdp	      mri_state = NULL;
74133965Sjdp
74233965Sjdp	      /* We continue handling the character as usual.  The
74333965Sjdp                 main gas reader must also handle the .mri pseudo-op
74433965Sjdp                 to control expression parsing and the like.  */
74533965Sjdp	    }
74633965Sjdp	}
74733965Sjdp#endif
74833965Sjdp
74933965Sjdp      if (ch == EOF)
75033965Sjdp	{
75133965Sjdp	  if (state != 0)
75233965Sjdp	    {
75360484Sobrien	      as_warn (_("end of file not at end of a line; newline inserted"));
75433965Sjdp	      state = 0;
75533965Sjdp	      PUT ('\n');
75633965Sjdp	    }
75733965Sjdp	  goto fromeof;
75833965Sjdp	}
75933965Sjdp
76033965Sjdp      switch (lex[ch])
76133965Sjdp	{
76233965Sjdp	case LEX_IS_WHITESPACE:
76333965Sjdp	  do
76433965Sjdp	    {
76533965Sjdp	      ch = GET ();
76633965Sjdp	    }
76733965Sjdp	  while (ch != EOF && IS_WHITESPACE (ch));
76833965Sjdp	  if (ch == EOF)
76933965Sjdp	    goto fromeof;
77033965Sjdp
77133965Sjdp	  if (state == 0)
77233965Sjdp	    {
77333965Sjdp	      /* Preserve a single whitespace character at the
77433965Sjdp		 beginning of a line.  */
77533965Sjdp	      state = 1;
77633965Sjdp	      UNGET (ch);
77733965Sjdp	      PUT (' ');
77833965Sjdp	      break;
77933965Sjdp	    }
78033965Sjdp
78160484Sobrien#ifdef KEEP_WHITE_AROUND_COLON
78277298Sobrien	  if (lex[ch] == LEX_IS_COLON)
78377298Sobrien	    {
78477298Sobrien	      /* Only keep this white if there's no white *after* the
78577298Sobrien                 colon.  */
78677298Sobrien	      ch2 = GET ();
78777298Sobrien	      UNGET (ch2);
78877298Sobrien	      if (!IS_WHITESPACE (ch2))
78977298Sobrien		{
79077298Sobrien		  state = 9;
79177298Sobrien		  UNGET (ch);
79277298Sobrien		  PUT (' ');
79377298Sobrien		  break;
79477298Sobrien		}
79577298Sobrien	    }
79660484Sobrien#endif
79733965Sjdp	  if (IS_COMMENT (ch)
79833965Sjdp	      || ch == '/'
79933965Sjdp	      || IS_LINE_SEPARATOR (ch))
80033965Sjdp	    {
80133965Sjdp	      if (scrub_m68k_mri)
80233965Sjdp		{
80333965Sjdp		  /* In MRI mode, we keep these spaces.  */
80433965Sjdp		  UNGET (ch);
80533965Sjdp		  PUT (' ');
80633965Sjdp		  break;
80733965Sjdp		}
80833965Sjdp	      goto recycle;
80933965Sjdp	    }
81033965Sjdp
81133965Sjdp	  /* If we're in state 2 or 11, we've seen a non-white
81233965Sjdp	     character followed by whitespace.  If the next character
81333965Sjdp	     is ':', this is whitespace after a label name which we
81433965Sjdp	     normally must ignore.  In MRI mode, though, spaces are
81533965Sjdp	     not permitted between the label and the colon.  */
81633965Sjdp	  if ((state == 2 || state == 11)
81733965Sjdp	      && lex[ch] == LEX_IS_COLON
81833965Sjdp	      && ! scrub_m68k_mri)
81933965Sjdp	    {
82033965Sjdp	      state = 1;
82133965Sjdp	      PUT (ch);
82233965Sjdp	      break;
82333965Sjdp	    }
82433965Sjdp
82533965Sjdp	  switch (state)
82633965Sjdp	    {
82733965Sjdp	    case 0:
82833965Sjdp	      state++;
82933965Sjdp	      goto recycle;	/* Punted leading sp */
83033965Sjdp	    case 1:
83133965Sjdp	      /* We can arrive here if we leave a leading whitespace
83233965Sjdp		 character at the beginning of a line.  */
83333965Sjdp	      goto recycle;
83433965Sjdp	    case 2:
83533965Sjdp	      state = 3;
83633965Sjdp	      if (to + 1 < toend)
83733965Sjdp		{
83833965Sjdp		  /* Optimize common case by skipping UNGET/GET.  */
83933965Sjdp		  PUT (' ');	/* Sp after opco */
84033965Sjdp		  goto recycle;
84133965Sjdp		}
84233965Sjdp	      UNGET (ch);
84333965Sjdp	      PUT (' ');
84433965Sjdp	      break;
84533965Sjdp	    case 3:
84633965Sjdp	      if (scrub_m68k_mri)
84733965Sjdp		{
84833965Sjdp		  /* In MRI mode, we keep these spaces.  */
84933965Sjdp		  UNGET (ch);
85033965Sjdp		  PUT (' ');
85133965Sjdp		  break;
85233965Sjdp		}
85333965Sjdp	      goto recycle;	/* Sp in operands */
85433965Sjdp	    case 9:
85533965Sjdp	    case 10:
85633965Sjdp	      if (scrub_m68k_mri)
85733965Sjdp		{
85833965Sjdp		  /* In MRI mode, we keep these spaces.  */
85933965Sjdp		  state = 3;
86033965Sjdp		  UNGET (ch);
86133965Sjdp		  PUT (' ');
86233965Sjdp		  break;
86333965Sjdp		}
86433965Sjdp	      state = 10;	/* Sp after symbol char */
86533965Sjdp	      goto recycle;
86633965Sjdp	    case 11:
86760484Sobrien	      if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
86833965Sjdp		state = 1;
86933965Sjdp	      else
87033965Sjdp		{
87133965Sjdp		  /* We know that ch is not ':', since we tested that
87233965Sjdp                     case above.  Therefore this is not a label, so it
87333965Sjdp                     must be the opcode, and we've just seen the
87433965Sjdp                     whitespace after it.  */
87533965Sjdp		  state = 3;
87633965Sjdp		}
87733965Sjdp	      UNGET (ch);
87833965Sjdp	      PUT (' ');	/* Sp after label definition.  */
87933965Sjdp	      break;
88033965Sjdp	    default:
88133965Sjdp	      BAD_CASE (state);
88233965Sjdp	    }
88333965Sjdp	  break;
88433965Sjdp
88533965Sjdp	case LEX_IS_TWOCHAR_COMMENT_1ST:
88633965Sjdp	  ch2 = GET ();
88733965Sjdp	  if (ch2 == '*')
88833965Sjdp	    {
88933965Sjdp	      for (;;)
89033965Sjdp		{
89133965Sjdp		  do
89233965Sjdp		    {
89333965Sjdp		      ch2 = GET ();
89433965Sjdp		      if (ch2 != EOF && IS_NEWLINE (ch2))
89533965Sjdp			add_newlines++;
89633965Sjdp		    }
89733965Sjdp		  while (ch2 != EOF && ch2 != '*');
89833965Sjdp
89933965Sjdp		  while (ch2 == '*')
90033965Sjdp		    ch2 = GET ();
90133965Sjdp
90233965Sjdp		  if (ch2 == EOF || ch2 == '/')
90333965Sjdp		    break;
90433965Sjdp
90533965Sjdp		  /* This UNGET will ensure that we count newlines
90633965Sjdp                     correctly.  */
90733965Sjdp		  UNGET (ch2);
90833965Sjdp		}
90933965Sjdp
91033965Sjdp	      if (ch2 == EOF)
91160484Sobrien		as_warn (_("end of file in multiline comment"));
91233965Sjdp
91333965Sjdp	      ch = ' ';
91433965Sjdp	      goto recycle;
91533965Sjdp	    }
91677298Sobrien#ifdef DOUBLESLASH_LINE_COMMENTS
91777298Sobrien	  else if (ch2 == '/')
91877298Sobrien	    {
91977298Sobrien	      do
92077298Sobrien		{
92177298Sobrien		  ch = GET ();
92277298Sobrien		}
92377298Sobrien	      while (ch != EOF && !IS_NEWLINE (ch));
92477298Sobrien	      if (ch == EOF)
92577298Sobrien		as_warn ("end of file in comment; newline inserted");
92677298Sobrien	      state = 0;
92777298Sobrien	      PUT ('\n');
92877298Sobrien	      break;
92977298Sobrien	    }
93077298Sobrien#endif
93133965Sjdp	  else
93233965Sjdp	    {
93333965Sjdp	      if (ch2 != EOF)
93433965Sjdp		UNGET (ch2);
93533965Sjdp	      if (state == 9 || state == 10)
93633965Sjdp		state = 3;
93733965Sjdp	      PUT (ch);
93833965Sjdp	    }
93933965Sjdp	  break;
94033965Sjdp
94133965Sjdp	case LEX_IS_STRINGQUOTE:
94233965Sjdp	  if (state == 10)
94333965Sjdp	    {
94433965Sjdp	      /* Preserve the whitespace in foo "bar" */
94533965Sjdp	      UNGET (ch);
94633965Sjdp	      state = 3;
94733965Sjdp	      PUT (' ');
94833965Sjdp
94933965Sjdp	      /* PUT didn't jump out.  We could just break, but we
95033965Sjdp                 know what will happen, so optimize a bit.  */
95133965Sjdp	      ch = GET ();
95233965Sjdp	      old_state = 3;
95333965Sjdp	    }
95433965Sjdp	  else if (state == 9)
95533965Sjdp	    old_state = 3;
95633965Sjdp	  else
95733965Sjdp	    old_state = state;
95833965Sjdp	  state = 5;
95933965Sjdp	  PUT (ch);
96033965Sjdp	  break;
96133965Sjdp
96233965Sjdp#ifndef IEEE_STYLE
96333965Sjdp	case LEX_IS_ONECHAR_QUOTE:
96433965Sjdp	  if (state == 10)
96533965Sjdp	    {
96633965Sjdp	      /* Preserve the whitespace in foo 'b' */
96733965Sjdp	      UNGET (ch);
96833965Sjdp	      state = 3;
96933965Sjdp	      PUT (' ');
97033965Sjdp	      break;
97133965Sjdp	    }
97233965Sjdp	  ch = GET ();
97333965Sjdp	  if (ch == EOF)
97433965Sjdp	    {
97560484Sobrien	      as_warn (_("end of file after a one-character quote; \\0 inserted"));
97633965Sjdp	      ch = 0;
97733965Sjdp	    }
97833965Sjdp	  if (ch == '\\')
97933965Sjdp	    {
98033965Sjdp	      ch = GET ();
98133965Sjdp	      if (ch == EOF)
98233965Sjdp		{
98360484Sobrien		  as_warn (_("end of file in escape character"));
98433965Sjdp		  ch = '\\';
98533965Sjdp		}
98633965Sjdp	      else
98733965Sjdp		ch = process_escape (ch);
98833965Sjdp	    }
98933965Sjdp	  sprintf (out_buf, "%d", (int) (unsigned char) ch);
99033965Sjdp
99133965Sjdp	  /* None of these 'x constants for us.  We want 'x'.  */
99233965Sjdp	  if ((ch = GET ()) != '\'')
99333965Sjdp	    {
99433965Sjdp#ifdef REQUIRE_CHAR_CLOSE_QUOTE
99560484Sobrien	      as_warn (_("Missing close quote: (assumed)"));
99633965Sjdp#else
99733965Sjdp	      if (ch != EOF)
99833965Sjdp		UNGET (ch);
99933965Sjdp#endif
100033965Sjdp	    }
100133965Sjdp	  if (strlen (out_buf) == 1)
100233965Sjdp	    {
100333965Sjdp	      PUT (out_buf[0]);
100433965Sjdp	      break;
100533965Sjdp	    }
100633965Sjdp	  if (state == 9)
100733965Sjdp	    old_state = 3;
100833965Sjdp	  else
100933965Sjdp	    old_state = state;
101033965Sjdp	  state = -1;
101133965Sjdp	  out_string = out_buf;
101233965Sjdp	  PUT (*out_string++);
101333965Sjdp	  break;
101433965Sjdp#endif
101533965Sjdp
101633965Sjdp	case LEX_IS_COLON:
101760484Sobrien#ifdef KEEP_WHITE_AROUND_COLON
101877298Sobrien	  state = 9;
101960484Sobrien#else
102033965Sjdp	  if (state == 9 || state == 10)
102133965Sjdp	    state = 3;
102233965Sjdp	  else if (state != 3)
102333965Sjdp	    state = 1;
102460484Sobrien#endif
102533965Sjdp	  PUT (ch);
102633965Sjdp	  break;
102733965Sjdp
102833965Sjdp	case LEX_IS_NEWLINE:
102933965Sjdp	  /* Roll out a bunch of newlines from inside comments, etc.  */
103033965Sjdp	  if (add_newlines)
103133965Sjdp	    {
103233965Sjdp	      --add_newlines;
103333965Sjdp	      UNGET (ch);
103433965Sjdp	    }
103577298Sobrien	  /* Fall through.  */
103633965Sjdp
103733965Sjdp	case LEX_IS_LINE_SEPARATOR:
103833965Sjdp	  state = 0;
103933965Sjdp	  PUT (ch);
104033965Sjdp	  break;
104133965Sjdp
104238889Sjdp#ifdef TC_V850
104338889Sjdp	case LEX_IS_DOUBLEDASH_1ST:
104477298Sobrien	  ch2 = GET ();
104538889Sjdp	  if (ch2 != '-')
104638889Sjdp	    {
104738889Sjdp	      UNGET (ch2);
104838889Sjdp	      goto de_fault;
104938889Sjdp	    }
105077298Sobrien	  /* Read and skip to end of line.  */
105138889Sjdp	  do
105238889Sjdp	    {
105338889Sjdp	      ch = GET ();
105438889Sjdp	    }
105538889Sjdp	  while (ch != EOF && ch != '\n');
105638889Sjdp	  if (ch == EOF)
105738889Sjdp	    {
105860484Sobrien	      as_warn (_("end of file in comment; newline inserted"));
105938889Sjdp	    }
106038889Sjdp	  state = 0;
106138889Sjdp	  PUT ('\n');
106238889Sjdp	  break;
106377298Sobrien#endif
106460484Sobrien#ifdef DOUBLEBAR_PARALLEL
106538889Sjdp	case LEX_IS_DOUBLEBAR_1ST:
106677298Sobrien	  ch2 = GET ();
106738889Sjdp	  if (ch2 != '|')
106838889Sjdp	    {
106938889Sjdp	      UNGET (ch2);
107038889Sjdp	      goto de_fault;
107138889Sjdp	    }
107238889Sjdp	  /* Reset back to state 1 and pretend that we are parsing a line from
107338889Sjdp	     just after the first white space.  */
107438889Sjdp	  state = 1;
107538889Sjdp	  PUT ('|');
107638889Sjdp	  PUT ('|');
107738889Sjdp	  break;
107877298Sobrien#endif
107933965Sjdp	case LEX_IS_LINE_COMMENT_START:
108033965Sjdp	  /* FIXME-someday: The two character comment stuff was badly
108133965Sjdp	     thought out.  On i386, we want '/' as line comment start
108233965Sjdp	     AND we want C style comments.  hence this hack.  The
108333965Sjdp	     whole lexical process should be reworked.  xoxorich.  */
108433965Sjdp	  if (ch == '/')
108533965Sjdp	    {
108633965Sjdp	      ch2 = GET ();
108733965Sjdp	      if (ch2 == '*')
108833965Sjdp		{
108933965Sjdp		  old_state = 3;
109033965Sjdp		  state = -2;
109133965Sjdp		  break;
109233965Sjdp		}
109333965Sjdp	      else
109433965Sjdp		{
109533965Sjdp		  UNGET (ch2);
109633965Sjdp		}
109733965Sjdp	    } /* bad hack */
109833965Sjdp
109933965Sjdp	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
110033965Sjdp	    {
110133965Sjdp	      int startch;
110233965Sjdp
110333965Sjdp	      startch = ch;
110433965Sjdp
110533965Sjdp	      do
110633965Sjdp		{
110733965Sjdp		  ch = GET ();
110833965Sjdp		}
110933965Sjdp	      while (ch != EOF && IS_WHITESPACE (ch));
111033965Sjdp	      if (ch == EOF)
111133965Sjdp		{
111260484Sobrien		  as_warn (_("end of file in comment; newline inserted"));
111333965Sjdp		  PUT ('\n');
111433965Sjdp		  break;
111533965Sjdp		}
111633965Sjdp	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
111733965Sjdp		{
111833965Sjdp		  /* Not a cpp line.  */
111933965Sjdp		  while (ch != EOF && !IS_NEWLINE (ch))
112033965Sjdp		    ch = GET ();
112133965Sjdp		  if (ch == EOF)
112260484Sobrien		    as_warn (_("EOF in Comment: Newline inserted"));
112333965Sjdp		  state = 0;
112433965Sjdp		  PUT ('\n');
112533965Sjdp		  break;
112633965Sjdp		}
112777298Sobrien	      /* Looks like `# 123 "filename"' from cpp.  */
112833965Sjdp	      UNGET (ch);
112933965Sjdp	      old_state = 4;
113033965Sjdp	      state = -1;
113133965Sjdp	      if (scrub_m68k_mri)
113233965Sjdp		out_string = "\tappline ";
113333965Sjdp	      else
113433965Sjdp		out_string = "\t.appline ";
113533965Sjdp	      PUT (*out_string++);
113633965Sjdp	      break;
113733965Sjdp	    }
113833965Sjdp
113938889Sjdp#ifdef TC_D10V
114038889Sjdp	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
114138889Sjdp	     Trap is the only short insn that has a first operand that is
114238889Sjdp	     neither register nor label.
114338889Sjdp	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
114477298Sobrien	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
114577298Sobrien	     already LEX_IS_LINE_COMMENT_START.  However, it is the
114677298Sobrien	     only character in line_comment_chars for d10v, hence we
114777298Sobrien	     can recognize it as such.  */
114838889Sjdp	  /* An alternative approach would be to reset the state to 1 when
114938889Sjdp	     we see '||', '<'- or '->', but that seems to be overkill.  */
115077298Sobrien	  if (state == 10)
115177298Sobrien	    PUT (' ');
115238889Sjdp#endif
115333965Sjdp	  /* We have a line comment character which is not at the
115433965Sjdp	     start of a line.  If this is also a normal comment
115533965Sjdp	     character, fall through.  Otherwise treat it as a default
115633965Sjdp	     character.  */
115733965Sjdp	  if (strchr (tc_comment_chars, ch) == NULL
115833965Sjdp	      && (! scrub_m68k_mri
115933965Sjdp		  || (ch != '!' && ch != '*')))
116033965Sjdp	    goto de_fault;
116133965Sjdp	  if (scrub_m68k_mri
116233965Sjdp	      && (ch == '!' || ch == '*' || ch == '#')
116333965Sjdp	      && state != 1
116433965Sjdp	      && state != 10)
116533965Sjdp	    goto de_fault;
116633965Sjdp	  /* Fall through.  */
116733965Sjdp	case LEX_IS_COMMENT_START:
116860484Sobrien#if defined TC_ARM && defined OBJ_ELF
116960484Sobrien	  /* On the ARM, `@' is the comment character.
117060484Sobrien	     Unfortunately this is also a special character in ELF .symver
117177298Sobrien	     directives (and .type, though we deal with those another way).
117277298Sobrien	     So we check if this line is such a directive, and treat
117377298Sobrien	     the character as default if so.  This is a hack.  */
117460484Sobrien	  if ((symver_state != NULL) && (*symver_state == 0))
117560484Sobrien	    goto de_fault;
117660484Sobrien#endif
117777298Sobrien#ifdef WARN_COMMENTS
117877298Sobrien	  if (!found_comment)
117977298Sobrien	    as_where (&found_comment_file, &found_comment);
118077298Sobrien#endif
118133965Sjdp	  do
118233965Sjdp	    {
118333965Sjdp	      ch = GET ();
118433965Sjdp	    }
118533965Sjdp	  while (ch != EOF && !IS_NEWLINE (ch));
118633965Sjdp	  if (ch == EOF)
118760484Sobrien	    as_warn (_("end of file in comment; newline inserted"));
118833965Sjdp	  state = 0;
118933965Sjdp	  PUT ('\n');
119033965Sjdp	  break;
119133965Sjdp
119233965Sjdp	case LEX_IS_SYMBOL_COMPONENT:
119333965Sjdp	  if (state == 10)
119433965Sjdp	    {
119533965Sjdp	      /* This is a symbol character following another symbol
119633965Sjdp		 character, with whitespace in between.  We skipped
119733965Sjdp		 the whitespace earlier, so output it now.  */
119833965Sjdp	      UNGET (ch);
119933965Sjdp	      state = 3;
120033965Sjdp	      PUT (' ');
120133965Sjdp	      break;
120233965Sjdp	    }
120333965Sjdp
120433965Sjdp	  if (state == 3)
120533965Sjdp	    state = 9;
120633965Sjdp
120733965Sjdp	  /* This is a common case.  Quickly copy CH and all the
120833965Sjdp             following symbol component or normal characters.  */
120960484Sobrien	  if (to + 1 < toend
121060484Sobrien	      && mri_state == NULL
121160484Sobrien#if defined TC_ARM && defined OBJ_ELF
121260484Sobrien	      && symver_state == NULL
121360484Sobrien#endif
121460484Sobrien	      )
121533965Sjdp	    {
121633965Sjdp	      char *s;
121733965Sjdp	      int len;
121833965Sjdp
121933965Sjdp	      for (s = from; s < fromend; s++)
122033965Sjdp		{
122133965Sjdp		  int type;
122233965Sjdp
122377298Sobrien		  ch2 = *(unsigned char *) s;
122433965Sjdp		  type = lex[ch2];
122533965Sjdp		  if (type != 0
122633965Sjdp		      && type != LEX_IS_SYMBOL_COMPONENT)
122733965Sjdp		    break;
122833965Sjdp		}
122933965Sjdp	      if (s > from)
123033965Sjdp		{
123133965Sjdp		  /* Handle the last character normally, for
123233965Sjdp                     simplicity.  */
123333965Sjdp		  --s;
123433965Sjdp		}
123533965Sjdp	      len = s - from;
123633965Sjdp	      if (len > (toend - to) - 1)
123733965Sjdp		len = (toend - to) - 1;
123833965Sjdp	      if (len > 0)
123933965Sjdp		{
124033965Sjdp		  PUT (ch);
124133965Sjdp		  if (len > 8)
124233965Sjdp		    {
124333965Sjdp		      memcpy (to, from, len);
124433965Sjdp		      to += len;
124533965Sjdp		      from += len;
124633965Sjdp		    }
124733965Sjdp		  else
124833965Sjdp		    {
124933965Sjdp		      switch (len)
125033965Sjdp			{
125133965Sjdp			case 8: *to++ = *from++;
125233965Sjdp			case 7: *to++ = *from++;
125333965Sjdp			case 6: *to++ = *from++;
125433965Sjdp			case 5: *to++ = *from++;
125533965Sjdp			case 4: *to++ = *from++;
125633965Sjdp			case 3: *to++ = *from++;
125733965Sjdp			case 2: *to++ = *from++;
125833965Sjdp			case 1: *to++ = *from++;
125933965Sjdp			}
126077298Sobrien		    }
126133965Sjdp		  ch = GET ();
126233965Sjdp		}
126333965Sjdp	    }
126433965Sjdp
126533965Sjdp	  /* Fall through.  */
126633965Sjdp	default:
126733965Sjdp	de_fault:
126833965Sjdp	  /* Some relatively `normal' character.  */
126933965Sjdp	  if (state == 0)
127033965Sjdp	    {
127133965Sjdp	      state = 11;	/* Now seeing label definition */
127233965Sjdp	    }
127333965Sjdp	  else if (state == 1)
127433965Sjdp	    {
127533965Sjdp	      state = 2;	/* Ditto */
127633965Sjdp	    }
127733965Sjdp	  else if (state == 9)
127833965Sjdp	    {
127933965Sjdp	      if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
128033965Sjdp		state = 3;
128133965Sjdp	    }
128233965Sjdp	  else if (state == 10)
128333965Sjdp	    {
128460484Sobrien	      if (ch == '\\')
128560484Sobrien		{
128660484Sobrien		  /* Special handling for backslash: a backslash may
128760484Sobrien		     be the beginning of a formal parameter (of a
128860484Sobrien		     macro) following another symbol character, with
128960484Sobrien		     whitespace in between.  If that is the case, we
129060484Sobrien		     output a space before the parameter.  Strictly
129160484Sobrien		     speaking, correct handling depends upon what the
129260484Sobrien		     macro parameter expands into; if the parameter
129360484Sobrien		     expands into something which does not start with
129460484Sobrien		     an operand character, then we don't want to keep
129560484Sobrien		     the space.  We don't have enough information to
129660484Sobrien		     make the right choice, so here we are making the
129760484Sobrien		     choice which is more likely to be correct.  */
129860484Sobrien		  PUT (' ');
129960484Sobrien		}
130060484Sobrien
130133965Sjdp	      state = 3;
130233965Sjdp	    }
130333965Sjdp	  PUT (ch);
130433965Sjdp	  break;
130533965Sjdp	}
130633965Sjdp    }
130733965Sjdp
130833965Sjdp  /*NOTREACHED*/
130933965Sjdp
131033965Sjdp fromeof:
131133965Sjdp  /* We have reached the end of the input.  */
131233965Sjdp  return to - tostart;
131333965Sjdp
131433965Sjdp tofull:
131533965Sjdp  /* The output buffer is full.  Save any input we have not yet
131633965Sjdp     processed.  */
131733965Sjdp  if (fromend > from)
131833965Sjdp    {
131960484Sobrien      saved_input = from;
132033965Sjdp      saved_input_len = fromend - from;
132133965Sjdp    }
132233965Sjdp  else
132360484Sobrien    saved_input = NULL;
132460484Sobrien
132533965Sjdp  return to - tostart;
132633965Sjdp}
132733965Sjdp
132833965Sjdp/* end of app.c */
1329