app.c revision 89857
133965Sjdp/* This is the Assembler Pre-Processor
278828Sobrien   Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
378828Sobrien   1999, 2000
433965Sjdp   Free Software Foundation, Inc.
533965Sjdp
633965Sjdp   This file is part of GAS, the GNU Assembler.
733965Sjdp
833965Sjdp   GAS is free software; you can redistribute it and/or modify
933965Sjdp   it under the terms of the GNU General Public License as published by
1033965Sjdp   the Free Software Foundation; either version 2, or (at your option)
1133965Sjdp   any later version.
1233965Sjdp
1333965Sjdp   GAS is distributed in the hope that it will be useful,
1433965Sjdp   but WITHOUT ANY WARRANTY; without even the implied warranty of
1533965Sjdp   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1633965Sjdp   GNU General Public License for more details.
1733965Sjdp
1833965Sjdp   You should have received a copy of the GNU General Public License
1933965Sjdp   along with GAS; see the file COPYING.  If not, write to the Free
2033965Sjdp   Software Foundation, 59 Temple Place - Suite 330, Boston, MA
2133965Sjdp   02111-1307, USA.  */
2233965Sjdp
2333965Sjdp/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
2433965Sjdp/* App, the assembler pre-processor.  This pre-processor strips out excess
2533965Sjdp   spaces, turns single-quoted characters into a decimal constant, and turns
2633965Sjdp   # <number> <filename> <garbage> into a .line <number>\n.file <filename>
2733965Sjdp   pair.  This needs better error-handling.  */
2833965Sjdp
2933965Sjdp#include <stdio.h>
3033965Sjdp#include "as.h"			/* For BAD_CASE() only */
3133965Sjdp
3233965Sjdp#if (__STDC__ != 1)
3333965Sjdp#ifndef const
3433965Sjdp#define const  /* empty */
3533965Sjdp#endif
3633965Sjdp#endif
3733965Sjdp
3860484Sobrien#ifdef TC_M68K
3933965Sjdp/* Whether we are scrubbing in m68k MRI mode.  This is different from
4033965Sjdp   flag_m68k_mri, because the two flags will be affected by the .mri
4133965Sjdp   pseudo-op at different times.  */
4233965Sjdpstatic int scrub_m68k_mri;
4360484Sobrien#else
4460484Sobrien#define scrub_m68k_mri 0
4560484Sobrien#endif
4633965Sjdp
4733965Sjdp/* The pseudo-op which switches in and out of MRI mode.  See the
4833965Sjdp   comment in do_scrub_chars.  */
4933965Sjdpstatic const char mri_pseudo[] = ".mri 0";
5033965Sjdp
5160484Sobrien#if defined TC_ARM && defined OBJ_ELF
5277298Sobrien/* The pseudo-op for which we need to special-case `@' characters.
5360484Sobrien   See the comment in do_scrub_chars.  */
5460484Sobrienstatic const char   symver_pseudo[] = ".symver";
5560484Sobrienstatic const char * symver_state;
5660484Sobrien#endif
5760484Sobrien
5833965Sjdpstatic char lex[256];
5933965Sjdpstatic const char symbol_chars[] =
6033965Sjdp"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
6133965Sjdp
6233965Sjdp#define LEX_IS_SYMBOL_COMPONENT		1
6333965Sjdp#define LEX_IS_WHITESPACE		2
6433965Sjdp#define LEX_IS_LINE_SEPARATOR		3
6533965Sjdp#define LEX_IS_COMMENT_START		4
6633965Sjdp#define LEX_IS_LINE_COMMENT_START	5
6733965Sjdp#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
6833965Sjdp#define	LEX_IS_STRINGQUOTE		8
6933965Sjdp#define	LEX_IS_COLON			9
7033965Sjdp#define	LEX_IS_NEWLINE			10
7133965Sjdp#define	LEX_IS_ONECHAR_QUOTE		11
7238889Sjdp#ifdef TC_V850
7338889Sjdp#define LEX_IS_DOUBLEDASH_1ST		12
7438889Sjdp#endif
7538889Sjdp#ifdef TC_M32R
7660484Sobrien#define DOUBLEBAR_PARALLEL
7760484Sobrien#endif
7860484Sobrien#ifdef DOUBLEBAR_PARALLEL
7938889Sjdp#define LEX_IS_DOUBLEBAR_1ST		13
8038889Sjdp#endif
8189857Sobrien#define LEX_IS_PARALLEL_SEPARATOR	14
8233965Sjdp#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
8333965Sjdp#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
8433965Sjdp#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
8589857Sobrien#define IS_PARALLEL_SEPARATOR(c)	(lex[c] == LEX_IS_PARALLEL_SEPARATOR)
8633965Sjdp#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
8733965Sjdp#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
8833965Sjdp#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
8933965Sjdp
9033965Sjdpstatic int process_escape PARAMS ((int));
9133965Sjdp
9233965Sjdp/* FIXME-soon: The entire lexer/parser thingy should be
9333965Sjdp   built statically at compile time rather than dynamically
9477298Sobrien   each and every time the assembler is run.  xoxorich.  */
9533965Sjdp
9677298Sobrienvoid
9733965Sjdpdo_scrub_begin (m68k_mri)
9860484Sobrien     int m68k_mri ATTRIBUTE_UNUSED;
9933965Sjdp{
10033965Sjdp  const char *p;
10160484Sobrien  int c;
10233965Sjdp
10333965Sjdp  lex[' '] = LEX_IS_WHITESPACE;
10433965Sjdp  lex['\t'] = LEX_IS_WHITESPACE;
10538889Sjdp  lex['\r'] = LEX_IS_WHITESPACE;
10633965Sjdp  lex['\n'] = LEX_IS_NEWLINE;
10733965Sjdp  lex[':'] = LEX_IS_COLON;
10833965Sjdp
10960484Sobrien#ifdef TC_M68K
11060484Sobrien  scrub_m68k_mri = m68k_mri;
11160484Sobrien
11233965Sjdp  if (! m68k_mri)
11360484Sobrien#endif
11433965Sjdp    {
11533965Sjdp      lex['"'] = LEX_IS_STRINGQUOTE;
11633965Sjdp
11760484Sobrien#if ! defined (TC_HPPA) && ! defined (TC_I370)
11860484Sobrien      /* I370 uses single-quotes to delimit integer, float constants */
11933965Sjdp      lex['\''] = LEX_IS_ONECHAR_QUOTE;
12033965Sjdp#endif
12133965Sjdp
12233965Sjdp#ifdef SINGLE_QUOTE_STRINGS
12333965Sjdp      lex['\''] = LEX_IS_STRINGQUOTE;
12433965Sjdp#endif
12533965Sjdp    }
12633965Sjdp
12733965Sjdp  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
12833965Sjdp     in state 5 of do_scrub_chars must be changed.  */
12933965Sjdp
13033965Sjdp  /* Note that these override the previous defaults, e.g. if ';' is a
13133965Sjdp     comment char, then it isn't a line separator.  */
13233965Sjdp  for (p = symbol_chars; *p; ++p)
13333965Sjdp    {
13433965Sjdp      lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
13533965Sjdp    }				/* declare symbol characters */
13633965Sjdp
13760484Sobrien  for (c = 128; c < 256; ++c)
13860484Sobrien    lex[c] = LEX_IS_SYMBOL_COMPONENT;
13960484Sobrien
14060484Sobrien#ifdef tc_symbol_chars
14160484Sobrien  /* This macro permits the processor to specify all characters which
14260484Sobrien     may appears in an operand.  This will prevent the scrubber from
14360484Sobrien     discarding meaningful whitespace in certain cases.  The i386
14460484Sobrien     backend uses this to support prefixes, which can confuse the
14560484Sobrien     scrubber as to whether it is parsing operands or opcodes.  */
14660484Sobrien  for (p = tc_symbol_chars; *p; ++p)
14760484Sobrien    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
14860484Sobrien#endif
14960484Sobrien
15033965Sjdp  /* The m68k backend wants to be able to change comment_chars.  */
15133965Sjdp#ifndef tc_comment_chars
15233965Sjdp#define tc_comment_chars comment_chars
15333965Sjdp#endif
15433965Sjdp  for (p = tc_comment_chars; *p; p++)
15533965Sjdp    {
15633965Sjdp      lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
15733965Sjdp    }				/* declare comment chars */
15833965Sjdp
15933965Sjdp  for (p = line_comment_chars; *p; p++)
16033965Sjdp    {
16133965Sjdp      lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
16233965Sjdp    }				/* declare line comment chars */
16333965Sjdp
16433965Sjdp  for (p = line_separator_chars; *p; p++)
16533965Sjdp    {
16633965Sjdp      lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
16733965Sjdp    }				/* declare line separators */
16833965Sjdp
16989857Sobrien#ifdef tc_parallel_separator_chars
17089857Sobrien  /* This macro permits the processor to specify all characters which
17189857Sobrien     separate parallel insns on the same line.  */
17289857Sobrien  for (p = tc_parallel_separator_chars; *p; p++)
17389857Sobrien    {
17489857Sobrien      lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
17589857Sobrien    }				/* declare parallel separators */
17689857Sobrien#endif
17789857Sobrien
17833965Sjdp  /* Only allow slash-star comments if slash is not in use.
17933965Sjdp     FIXME: This isn't right.  We should always permit them.  */
18033965Sjdp  if (lex['/'] == 0)
18133965Sjdp    {
18233965Sjdp      lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
18333965Sjdp    }
18433965Sjdp
18560484Sobrien#ifdef TC_M68K
18633965Sjdp  if (m68k_mri)
18733965Sjdp    {
18833965Sjdp      lex['\''] = LEX_IS_STRINGQUOTE;
18933965Sjdp      lex[';'] = LEX_IS_COMMENT_START;
19033965Sjdp      lex['*'] = LEX_IS_LINE_COMMENT_START;
19133965Sjdp      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
19233965Sjdp         then it can't be used in an expression.  */
19333965Sjdp      lex['!'] = LEX_IS_LINE_COMMENT_START;
19433965Sjdp    }
19560484Sobrien#endif
19638889Sjdp
19738889Sjdp#ifdef TC_V850
19838889Sjdp  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
19938889Sjdp#endif
20060484Sobrien#ifdef DOUBLEBAR_PARALLEL
20138889Sjdp  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
20238889Sjdp#endif
20360484Sobrien#ifdef TC_D30V
20460484Sobrien  /* must do this is we want VLIW instruction with "->" or "<-" */
20560484Sobrien  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
20660484Sobrien#endif
20733965Sjdp}				/* do_scrub_begin() */
20833965Sjdp
20933965Sjdp/* Saved state of the scrubber */
21033965Sjdpstatic int state;
21133965Sjdpstatic int old_state;
21233965Sjdpstatic char *out_string;
21333965Sjdpstatic char out_buf[20];
21433965Sjdpstatic int add_newlines;
21533965Sjdpstatic char *saved_input;
21633965Sjdpstatic int saved_input_len;
21760484Sobrienstatic char input_buffer[32 * 1024];
21833965Sjdpstatic const char *mri_state;
21933965Sjdpstatic char mri_last_ch;
22033965Sjdp
22133965Sjdp/* Data structure for saving the state of app across #include's.  Note that
22233965Sjdp   app is called asynchronously to the parsing of the .include's, so our
22333965Sjdp   state at the time .include is interpreted is completely unrelated.
22433965Sjdp   That's why we have to save it all.  */
22533965Sjdp
22677298Sobrienstruct app_save {
22777298Sobrien  int          state;
22877298Sobrien  int          old_state;
22977298Sobrien  char *       out_string;
23077298Sobrien  char         out_buf[sizeof (out_buf)];
23177298Sobrien  int          add_newlines;
23277298Sobrien  char *       saved_input;
23377298Sobrien  int          saved_input_len;
23460484Sobrien#ifdef TC_M68K
23577298Sobrien  int          scrub_m68k_mri;
23660484Sobrien#endif
23777298Sobrien  const char * mri_state;
23877298Sobrien  char         mri_last_ch;
23960484Sobrien#if defined TC_ARM && defined OBJ_ELF
24077298Sobrien  const char * symver_state;
24160484Sobrien#endif
24277298Sobrien};
24333965Sjdp
24433965Sjdpchar *
24533965Sjdpapp_push ()
24633965Sjdp{
24733965Sjdp  register struct app_save *saved;
24833965Sjdp
24933965Sjdp  saved = (struct app_save *) xmalloc (sizeof (*saved));
25033965Sjdp  saved->state = state;
25133965Sjdp  saved->old_state = old_state;
25233965Sjdp  saved->out_string = out_string;
25333965Sjdp  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
25433965Sjdp  saved->add_newlines = add_newlines;
25560484Sobrien  if (saved_input == NULL)
25660484Sobrien    saved->saved_input = NULL;
25760484Sobrien  else
25860484Sobrien    {
25960484Sobrien      saved->saved_input = xmalloc (saved_input_len);
26060484Sobrien      memcpy (saved->saved_input, saved_input, saved_input_len);
26160484Sobrien      saved->saved_input_len = saved_input_len;
26260484Sobrien    }
26360484Sobrien#ifdef TC_M68K
26433965Sjdp  saved->scrub_m68k_mri = scrub_m68k_mri;
26560484Sobrien#endif
26633965Sjdp  saved->mri_state = mri_state;
26733965Sjdp  saved->mri_last_ch = mri_last_ch;
26860484Sobrien#if defined TC_ARM && defined OBJ_ELF
26960484Sobrien  saved->symver_state = symver_state;
27060484Sobrien#endif
27133965Sjdp
27277298Sobrien  /* do_scrub_begin() is not useful, just wastes time.  */
27333965Sjdp
27433965Sjdp  state = 0;
27533965Sjdp  saved_input = NULL;
27633965Sjdp
27733965Sjdp  return (char *) saved;
27833965Sjdp}
27933965Sjdp
28077298Sobrienvoid
28133965Sjdpapp_pop (arg)
28233965Sjdp     char *arg;
28333965Sjdp{
28433965Sjdp  register struct app_save *saved = (struct app_save *) arg;
28533965Sjdp
28677298Sobrien  /* There is no do_scrub_end ().  */
28733965Sjdp  state = saved->state;
28833965Sjdp  old_state = saved->old_state;
28933965Sjdp  out_string = saved->out_string;
29033965Sjdp  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
29133965Sjdp  add_newlines = saved->add_newlines;
29260484Sobrien  if (saved->saved_input == NULL)
29360484Sobrien    saved_input = NULL;
29460484Sobrien  else
29560484Sobrien    {
29660484Sobrien      assert (saved->saved_input_len <= (int) (sizeof input_buffer));
29760484Sobrien      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
29860484Sobrien      saved_input = input_buffer;
29960484Sobrien      saved_input_len = saved->saved_input_len;
30060484Sobrien      free (saved->saved_input);
30160484Sobrien    }
30260484Sobrien#ifdef TC_M68K
30333965Sjdp  scrub_m68k_mri = saved->scrub_m68k_mri;
30460484Sobrien#endif
30533965Sjdp  mri_state = saved->mri_state;
30633965Sjdp  mri_last_ch = saved->mri_last_ch;
30760484Sobrien#if defined TC_ARM && defined OBJ_ELF
30860484Sobrien  symver_state = saved->symver_state;
30960484Sobrien#endif
31033965Sjdp
31133965Sjdp  free (arg);
31233965Sjdp}				/* app_pop() */
31333965Sjdp
31433965Sjdp/* @@ This assumes that \n &c are the same on host and target.  This is not
31533965Sjdp   necessarily true.  */
31677298Sobrienstatic int
31733965Sjdpprocess_escape (ch)
31833965Sjdp     int ch;
31933965Sjdp{
32033965Sjdp  switch (ch)
32133965Sjdp    {
32233965Sjdp    case 'b':
32333965Sjdp      return '\b';
32433965Sjdp    case 'f':
32533965Sjdp      return '\f';
32633965Sjdp    case 'n':
32733965Sjdp      return '\n';
32833965Sjdp    case 'r':
32933965Sjdp      return '\r';
33033965Sjdp    case 't':
33133965Sjdp      return '\t';
33233965Sjdp    case '\'':
33333965Sjdp      return '\'';
33433965Sjdp    case '"':
33533965Sjdp      return '\"';
33633965Sjdp    default:
33733965Sjdp      return ch;
33833965Sjdp    }
33933965Sjdp}
34033965Sjdp
34133965Sjdp/* This function is called to process input characters.  The GET
34233965Sjdp   parameter is used to retrieve more input characters.  GET should
34333965Sjdp   set its parameter to point to a buffer, and return the length of
34433965Sjdp   the buffer; it should return 0 at end of file.  The scrubbed output
34533965Sjdp   characters are put into the buffer starting at TOSTART; the TOSTART
34633965Sjdp   buffer is TOLEN bytes in length.  The function returns the number
34733965Sjdp   of scrubbed characters put into TOSTART.  This will be TOLEN unless
34833965Sjdp   end of file was seen.  This function is arranged as a state
34933965Sjdp   machine, and saves its state so that it may return at any point.
35033965Sjdp   This is the way the old code used to work.  */
35133965Sjdp
35233965Sjdpint
35333965Sjdpdo_scrub_chars (get, tostart, tolen)
35460484Sobrien     int (*get) PARAMS ((char *, int));
35533965Sjdp     char *tostart;
35633965Sjdp     int tolen;
35733965Sjdp{
35833965Sjdp  char *to = tostart;
35933965Sjdp  char *toend = tostart + tolen;
36033965Sjdp  char *from;
36133965Sjdp  char *fromend;
36233965Sjdp  int fromlen;
36333965Sjdp  register int ch, ch2 = 0;
36433965Sjdp
36533965Sjdp  /*State 0: beginning of normal line
36633965Sjdp	  1: After first whitespace on line (flush more white)
36733965Sjdp	  2: After first non-white (opcode) on line (keep 1white)
36833965Sjdp	  3: after second white on line (into operands) (flush white)
36933965Sjdp	  4: after putting out a .line, put out digits
37033965Sjdp	  5: parsing a string, then go to old-state
37133965Sjdp	  6: putting out \ escape in a "d string.
37233965Sjdp	  7: After putting out a .appfile, put out string.
37333965Sjdp	  8: After putting out a .appfile string, flush until newline.
37433965Sjdp	  9: After seeing symbol char in state 3 (keep 1white after symchar)
37533965Sjdp	 10: After seeing whitespace in state 9 (keep white before symchar)
37633965Sjdp	 11: After seeing a symbol character in state 0 (eg a label definition)
37733965Sjdp	 -1: output string in out_string and go to the state in old_state
37833965Sjdp	 -2: flush text until a '*' '/' is seen, then go to state old_state
37938889Sjdp#ifdef TC_V850
38038889Sjdp         12: After seeing a dash, looking for a second dash as a start of comment.
38138889Sjdp#endif
38260484Sobrien#ifdef DOUBLEBAR_PARALLEL
38338889Sjdp	 13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
38438889Sjdp#endif
38533965Sjdp	  */
38633965Sjdp
38733965Sjdp  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
38833965Sjdp     constructs like ``.loc 1 20''.  This was turning into ``.loc
38933965Sjdp     120''.  States 9 and 10 ensure that a space is never dropped in
39089857Sobrien     between characters which could appear in an identifier.  Ian
39133965Sjdp     Taylor, ian@cygnus.com.
39233965Sjdp
39333965Sjdp     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
39433965Sjdp     correctly on the PA (and any other target where colons are optional).
39538889Sjdp     Jeff Law, law@cs.utah.edu.
39633965Sjdp
39738889Sjdp     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
39838889Sjdp     get squashed into "cmp r1,r2||trap#1", with the all important space
39938889Sjdp     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
40038889Sjdp
40133965Sjdp  /* This macro gets the next input character.  */
40233965Sjdp
40360484Sobrien#define GET()							\
40460484Sobrien  (from < fromend						\
40560484Sobrien   ? * (unsigned char *) (from++)				\
40660484Sobrien   : (saved_input = NULL,					\
40760484Sobrien      fromlen = (*get) (input_buffer, sizeof input_buffer),	\
40860484Sobrien      from = input_buffer,					\
40960484Sobrien      fromend = from + fromlen,					\
41060484Sobrien      (fromlen == 0						\
41160484Sobrien       ? EOF							\
41260484Sobrien       : * (unsigned char *) (from++))))
41333965Sjdp
41433965Sjdp  /* This macro pushes a character back on the input stream.  */
41533965Sjdp
41633965Sjdp#define UNGET(uch) (*--from = (uch))
41733965Sjdp
41833965Sjdp  /* This macro puts a character into the output buffer.  If this
41933965Sjdp     character fills the output buffer, this macro jumps to the label
42033965Sjdp     TOFULL.  We use this rather ugly approach because we need to
42133965Sjdp     handle two different termination conditions: EOF on the input
42233965Sjdp     stream, and a full output buffer.  It would be simpler if we
42333965Sjdp     always read in the entire input stream before processing it, but
42433965Sjdp     I don't want to make such a significant change to the assembler's
42533965Sjdp     memory usage.  */
42633965Sjdp
42733965Sjdp#define PUT(pch)			\
42833965Sjdp  do					\
42933965Sjdp    {					\
43033965Sjdp      *to++ = (pch);			\
43133965Sjdp      if (to >= toend)			\
43233965Sjdp        goto tofull;			\
43333965Sjdp    }					\
43433965Sjdp  while (0)
43533965Sjdp
43633965Sjdp  if (saved_input != NULL)
43733965Sjdp    {
43833965Sjdp      from = saved_input;
43933965Sjdp      fromend = from + saved_input_len;
44033965Sjdp    }
44133965Sjdp  else
44233965Sjdp    {
44360484Sobrien      fromlen = (*get) (input_buffer, sizeof input_buffer);
44433965Sjdp      if (fromlen == 0)
44533965Sjdp	return 0;
44660484Sobrien      from = input_buffer;
44733965Sjdp      fromend = from + fromlen;
44833965Sjdp    }
44933965Sjdp
45033965Sjdp  while (1)
45133965Sjdp    {
45233965Sjdp      /* The cases in this switch end with continue, in order to
45333965Sjdp         branch back to the top of this while loop and generate the
45433965Sjdp         next output character in the appropriate state.  */
45533965Sjdp      switch (state)
45633965Sjdp	{
45733965Sjdp	case -1:
45833965Sjdp	  ch = *out_string++;
45933965Sjdp	  if (*out_string == '\0')
46033965Sjdp	    {
46133965Sjdp	      state = old_state;
46233965Sjdp	      old_state = 3;
46333965Sjdp	    }
46433965Sjdp	  PUT (ch);
46533965Sjdp	  continue;
46633965Sjdp
46733965Sjdp	case -2:
46833965Sjdp	  for (;;)
46933965Sjdp	    {
47033965Sjdp	      do
47133965Sjdp		{
47233965Sjdp		  ch = GET ();
47333965Sjdp
47433965Sjdp		  if (ch == EOF)
47533965Sjdp		    {
47660484Sobrien		      as_warn (_("end of file in comment"));
47733965Sjdp		      goto fromeof;
47833965Sjdp		    }
47933965Sjdp
48033965Sjdp		  if (ch == '\n')
48133965Sjdp		    PUT ('\n');
48233965Sjdp		}
48333965Sjdp	      while (ch != '*');
48433965Sjdp
48533965Sjdp	      while ((ch = GET ()) == '*')
48633965Sjdp		;
48733965Sjdp
48833965Sjdp	      if (ch == EOF)
48933965Sjdp		{
49060484Sobrien		  as_warn (_("end of file in comment"));
49133965Sjdp		  goto fromeof;
49233965Sjdp		}
49333965Sjdp
49433965Sjdp	      if (ch == '/')
49533965Sjdp		break;
49633965Sjdp
49733965Sjdp	      UNGET (ch);
49833965Sjdp	    }
49933965Sjdp
50033965Sjdp	  state = old_state;
50133965Sjdp	  UNGET (' ');
50233965Sjdp	  continue;
50333965Sjdp
50433965Sjdp	case 4:
50533965Sjdp	  ch = GET ();
50633965Sjdp	  if (ch == EOF)
50733965Sjdp	    goto fromeof;
50833965Sjdp	  else if (ch >= '0' && ch <= '9')
50933965Sjdp	    PUT (ch);
51033965Sjdp	  else
51133965Sjdp	    {
51233965Sjdp	      while (ch != EOF && IS_WHITESPACE (ch))
51333965Sjdp		ch = GET ();
51433965Sjdp	      if (ch == '"')
51533965Sjdp		{
51633965Sjdp		  UNGET (ch);
51733965Sjdp		  if (scrub_m68k_mri)
51833965Sjdp		    out_string = "\n\tappfile ";
51933965Sjdp		  else
52033965Sjdp		    out_string = "\n\t.appfile ";
52133965Sjdp		  old_state = 7;
52233965Sjdp		  state = -1;
52333965Sjdp		  PUT (*out_string++);
52433965Sjdp		}
52533965Sjdp	      else
52633965Sjdp		{
52733965Sjdp		  while (ch != EOF && ch != '\n')
52833965Sjdp		    ch = GET ();
52933965Sjdp		  state = 0;
53033965Sjdp		  PUT (ch);
53133965Sjdp		}
53233965Sjdp	    }
53333965Sjdp	  continue;
53433965Sjdp
53533965Sjdp	case 5:
53633965Sjdp	  /* We are going to copy everything up to a quote character,
53733965Sjdp             with special handling for a backslash.  We try to
53833965Sjdp             optimize the copying in the simple case without using the
53933965Sjdp             GET and PUT macros.  */
54033965Sjdp	  {
54133965Sjdp	    char *s;
54233965Sjdp	    int len;
54333965Sjdp
54433965Sjdp	    for (s = from; s < fromend; s++)
54533965Sjdp	      {
54633965Sjdp		ch = *s;
54733965Sjdp		/* This condition must be changed if the type of any
54833965Sjdp                   other character can be LEX_IS_STRINGQUOTE.  */
54933965Sjdp		if (ch == '\\'
55033965Sjdp		    || ch == '"'
55133965Sjdp		    || ch == '\''
55233965Sjdp		    || ch == '\n')
55333965Sjdp		  break;
55433965Sjdp	      }
55533965Sjdp	    len = s - from;
55633965Sjdp	    if (len > toend - to)
55733965Sjdp	      len = toend - to;
55833965Sjdp	    if (len > 0)
55933965Sjdp	      {
56033965Sjdp		memcpy (to, from, len);
56133965Sjdp		to += len;
56233965Sjdp		from += len;
56333965Sjdp	      }
56433965Sjdp	  }
56533965Sjdp
56633965Sjdp	  ch = GET ();
56733965Sjdp	  if (ch == EOF)
56833965Sjdp	    {
56989857Sobrien	      as_warn (_("end of file in string; inserted '\"'"));
57033965Sjdp	      state = old_state;
57133965Sjdp	      UNGET ('\n');
57233965Sjdp	      PUT ('"');
57333965Sjdp	    }
57433965Sjdp	  else if (lex[ch] == LEX_IS_STRINGQUOTE)
57533965Sjdp	    {
57633965Sjdp	      state = old_state;
57733965Sjdp	      PUT (ch);
57833965Sjdp	    }
57933965Sjdp#ifndef NO_STRING_ESCAPES
58033965Sjdp	  else if (ch == '\\')
58133965Sjdp	    {
58233965Sjdp	      state = 6;
58333965Sjdp	      PUT (ch);
58433965Sjdp	    }
58533965Sjdp#endif
58633965Sjdp	  else if (scrub_m68k_mri && ch == '\n')
58733965Sjdp	    {
58833965Sjdp	      /* Just quietly terminate the string.  This permits lines like
58933965Sjdp		   bne	label	loop if we haven't reach end yet
59033965Sjdp		 */
59133965Sjdp	      state = old_state;
59233965Sjdp	      UNGET (ch);
59333965Sjdp	      PUT ('\'');
59433965Sjdp	    }
59533965Sjdp	  else
59633965Sjdp	    {
59733965Sjdp	      PUT (ch);
59833965Sjdp	    }
59933965Sjdp	  continue;
60033965Sjdp
60133965Sjdp	case 6:
60233965Sjdp	  state = 5;
60333965Sjdp	  ch = GET ();
60433965Sjdp	  switch (ch)
60533965Sjdp	    {
60633965Sjdp	      /* Handle strings broken across lines, by turning '\n' into
60733965Sjdp		 '\\' and 'n'.  */
60833965Sjdp	    case '\n':
60933965Sjdp	      UNGET ('n');
61033965Sjdp	      add_newlines++;
61133965Sjdp	      PUT ('\\');
61233965Sjdp	      continue;
61333965Sjdp
61433965Sjdp	    case '"':
61533965Sjdp	    case '\\':
61633965Sjdp	    case 'b':
61733965Sjdp	    case 'f':
61833965Sjdp	    case 'n':
61933965Sjdp	    case 'r':
62033965Sjdp	    case 't':
62133965Sjdp	    case 'v':
62233965Sjdp	    case 'x':
62333965Sjdp	    case 'X':
62433965Sjdp	    case '0':
62533965Sjdp	    case '1':
62633965Sjdp	    case '2':
62733965Sjdp	    case '3':
62833965Sjdp	    case '4':
62933965Sjdp	    case '5':
63033965Sjdp	    case '6':
63133965Sjdp	    case '7':
63233965Sjdp	      break;
63333965Sjdp#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
63433965Sjdp	    default:
63589857Sobrien	      as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
63633965Sjdp	      break;
63733965Sjdp#else  /* ONLY_STANDARD_ESCAPES */
63833965Sjdp	    default:
63933965Sjdp	      /* Accept \x as x for any x */
64033965Sjdp	      break;
64133965Sjdp#endif /* ONLY_STANDARD_ESCAPES */
64233965Sjdp
64333965Sjdp	    case EOF:
64489857Sobrien	      as_warn (_("end of file in string; '\"' inserted"));
64533965Sjdp	      PUT ('"');
64633965Sjdp	      continue;
64733965Sjdp	    }
64833965Sjdp	  PUT (ch);
64933965Sjdp	  continue;
65033965Sjdp
65133965Sjdp	case 7:
65233965Sjdp	  ch = GET ();
65333965Sjdp	  state = 5;
65433965Sjdp	  old_state = 8;
65533965Sjdp	  if (ch == EOF)
65633965Sjdp	    goto fromeof;
65733965Sjdp	  PUT (ch);
65833965Sjdp	  continue;
65933965Sjdp
66033965Sjdp	case 8:
66133965Sjdp	  do
66233965Sjdp	    ch = GET ();
66333965Sjdp	  while (ch != '\n' && ch != EOF);
66433965Sjdp	  if (ch == EOF)
66533965Sjdp	    goto fromeof;
66633965Sjdp	  state = 0;
66733965Sjdp	  PUT (ch);
66833965Sjdp	  continue;
66933965Sjdp	}
67033965Sjdp
67133965Sjdp      /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
67233965Sjdp
67333965Sjdp      /* flushchar: */
67433965Sjdp      ch = GET ();
67533965Sjdp
67633965Sjdp    recycle:
67733965Sjdp
67860484Sobrien#if defined TC_ARM && defined OBJ_ELF
67960484Sobrien      /* We need to watch out for .symver directives.  See the comment later
68060484Sobrien	 in this function.  */
68160484Sobrien      if (symver_state == NULL)
68260484Sobrien	{
68360484Sobrien	  if ((state == 0 || state == 1) && ch == symver_pseudo[0])
68460484Sobrien	    symver_state = symver_pseudo + 1;
68560484Sobrien	}
68660484Sobrien      else
68760484Sobrien	{
68860484Sobrien	  /* We advance to the next state if we find the right
68960484Sobrien	     character.  */
69060484Sobrien	  if (ch != '\0' && (*symver_state == ch))
69160484Sobrien	    ++symver_state;
69260484Sobrien	  else if (*symver_state != '\0')
69360484Sobrien	    /* We did not get the expected character, or we didn't
69460484Sobrien	       get a valid terminating character after seeing the
69560484Sobrien	       entire pseudo-op, so we must go back to the beginning.  */
69660484Sobrien	    symver_state = NULL;
69760484Sobrien	  else
69860484Sobrien	    {
69960484Sobrien	      /* We've read the entire pseudo-op.  If this is the end
70060484Sobrien		 of the line, go back to the beginning.  */
70160484Sobrien	      if (IS_NEWLINE (ch))
70260484Sobrien		symver_state = NULL;
70360484Sobrien	    }
70460484Sobrien	}
70560484Sobrien#endif /* TC_ARM && OBJ_ELF */
70660484Sobrien
70733965Sjdp#ifdef TC_M68K
70833965Sjdp      /* We want to have pseudo-ops which control whether we are in
70933965Sjdp         MRI mode or not.  Unfortunately, since m68k MRI mode affects
71033965Sjdp         the scrubber, that means that we need a special purpose
71133965Sjdp         recognizer here.  */
71233965Sjdp      if (mri_state == NULL)
71333965Sjdp	{
71433965Sjdp	  if ((state == 0 || state == 1)
71533965Sjdp	      && ch == mri_pseudo[0])
71633965Sjdp	    mri_state = mri_pseudo + 1;
71733965Sjdp	}
71833965Sjdp      else
71933965Sjdp	{
72033965Sjdp	  /* We advance to the next state if we find the right
72133965Sjdp	     character, or if we need a space character and we get any
72233965Sjdp	     whitespace character, or if we need a '0' and we get a
72333965Sjdp	     '1' (this is so that we only need one state to handle
72433965Sjdp	     ``.mri 0'' and ``.mri 1'').  */
72533965Sjdp	  if (ch != '\0'
72633965Sjdp	      && (*mri_state == ch
72733965Sjdp		  || (*mri_state == ' '
72833965Sjdp		      && lex[ch] == LEX_IS_WHITESPACE)
72933965Sjdp		  || (*mri_state == '0'
73033965Sjdp		      && ch == '1')))
73133965Sjdp	    {
73233965Sjdp	      mri_last_ch = ch;
73333965Sjdp	      ++mri_state;
73433965Sjdp	    }
73533965Sjdp	  else if (*mri_state != '\0'
73633965Sjdp		   || (lex[ch] != LEX_IS_WHITESPACE
73733965Sjdp		       && lex[ch] != LEX_IS_NEWLINE))
73833965Sjdp	    {
73933965Sjdp	      /* We did not get the expected character, or we didn't
74033965Sjdp		 get a valid terminating character after seeing the
74133965Sjdp		 entire pseudo-op, so we must go back to the
74233965Sjdp		 beginning.  */
74333965Sjdp	      mri_state = NULL;
74433965Sjdp	    }
74533965Sjdp	  else
74633965Sjdp	    {
74733965Sjdp	      /* We've read the entire pseudo-op.  mips_last_ch is
74833965Sjdp                 either '0' or '1' indicating whether to enter or
74933965Sjdp                 leave MRI mode.  */
75033965Sjdp	      do_scrub_begin (mri_last_ch == '1');
75138889Sjdp	      mri_state = NULL;
75233965Sjdp
75333965Sjdp	      /* We continue handling the character as usual.  The
75433965Sjdp                 main gas reader must also handle the .mri pseudo-op
75533965Sjdp                 to control expression parsing and the like.  */
75633965Sjdp	    }
75733965Sjdp	}
75833965Sjdp#endif
75933965Sjdp
76033965Sjdp      if (ch == EOF)
76133965Sjdp	{
76233965Sjdp	  if (state != 0)
76333965Sjdp	    {
76460484Sobrien	      as_warn (_("end of file not at end of a line; newline inserted"));
76533965Sjdp	      state = 0;
76633965Sjdp	      PUT ('\n');
76733965Sjdp	    }
76833965Sjdp	  goto fromeof;
76933965Sjdp	}
77033965Sjdp
77133965Sjdp      switch (lex[ch])
77233965Sjdp	{
77333965Sjdp	case LEX_IS_WHITESPACE:
77433965Sjdp	  do
77533965Sjdp	    {
77633965Sjdp	      ch = GET ();
77733965Sjdp	    }
77833965Sjdp	  while (ch != EOF && IS_WHITESPACE (ch));
77933965Sjdp	  if (ch == EOF)
78033965Sjdp	    goto fromeof;
78133965Sjdp
78233965Sjdp	  if (state == 0)
78333965Sjdp	    {
78433965Sjdp	      /* Preserve a single whitespace character at the
78533965Sjdp		 beginning of a line.  */
78633965Sjdp	      state = 1;
78733965Sjdp	      UNGET (ch);
78833965Sjdp	      PUT (' ');
78933965Sjdp	      break;
79033965Sjdp	    }
79133965Sjdp
79260484Sobrien#ifdef KEEP_WHITE_AROUND_COLON
79377298Sobrien	  if (lex[ch] == LEX_IS_COLON)
79477298Sobrien	    {
79577298Sobrien	      /* Only keep this white if there's no white *after* the
79677298Sobrien                 colon.  */
79777298Sobrien	      ch2 = GET ();
79877298Sobrien	      UNGET (ch2);
79977298Sobrien	      if (!IS_WHITESPACE (ch2))
80077298Sobrien		{
80177298Sobrien		  state = 9;
80277298Sobrien		  UNGET (ch);
80377298Sobrien		  PUT (' ');
80477298Sobrien		  break;
80577298Sobrien		}
80677298Sobrien	    }
80760484Sobrien#endif
80833965Sjdp	  if (IS_COMMENT (ch)
80933965Sjdp	      || ch == '/'
81089857Sobrien	      || IS_LINE_SEPARATOR (ch)
81189857Sobrien	      || IS_PARALLEL_SEPARATOR (ch))
81233965Sjdp	    {
81333965Sjdp	      if (scrub_m68k_mri)
81433965Sjdp		{
81533965Sjdp		  /* In MRI mode, we keep these spaces.  */
81633965Sjdp		  UNGET (ch);
81733965Sjdp		  PUT (' ');
81833965Sjdp		  break;
81933965Sjdp		}
82033965Sjdp	      goto recycle;
82133965Sjdp	    }
82233965Sjdp
82333965Sjdp	  /* If we're in state 2 or 11, we've seen a non-white
82433965Sjdp	     character followed by whitespace.  If the next character
82533965Sjdp	     is ':', this is whitespace after a label name which we
82633965Sjdp	     normally must ignore.  In MRI mode, though, spaces are
82733965Sjdp	     not permitted between the label and the colon.  */
82833965Sjdp	  if ((state == 2 || state == 11)
82933965Sjdp	      && lex[ch] == LEX_IS_COLON
83033965Sjdp	      && ! scrub_m68k_mri)
83133965Sjdp	    {
83233965Sjdp	      state = 1;
83333965Sjdp	      PUT (ch);
83433965Sjdp	      break;
83533965Sjdp	    }
83633965Sjdp
83733965Sjdp	  switch (state)
83833965Sjdp	    {
83933965Sjdp	    case 0:
84033965Sjdp	      state++;
84133965Sjdp	      goto recycle;	/* Punted leading sp */
84233965Sjdp	    case 1:
84333965Sjdp	      /* We can arrive here if we leave a leading whitespace
84433965Sjdp		 character at the beginning of a line.  */
84533965Sjdp	      goto recycle;
84633965Sjdp	    case 2:
84733965Sjdp	      state = 3;
84833965Sjdp	      if (to + 1 < toend)
84933965Sjdp		{
85033965Sjdp		  /* Optimize common case by skipping UNGET/GET.  */
85133965Sjdp		  PUT (' ');	/* Sp after opco */
85233965Sjdp		  goto recycle;
85333965Sjdp		}
85433965Sjdp	      UNGET (ch);
85533965Sjdp	      PUT (' ');
85633965Sjdp	      break;
85733965Sjdp	    case 3:
85833965Sjdp	      if (scrub_m68k_mri)
85933965Sjdp		{
86033965Sjdp		  /* In MRI mode, we keep these spaces.  */
86133965Sjdp		  UNGET (ch);
86233965Sjdp		  PUT (' ');
86333965Sjdp		  break;
86433965Sjdp		}
86533965Sjdp	      goto recycle;	/* Sp in operands */
86633965Sjdp	    case 9:
86733965Sjdp	    case 10:
86833965Sjdp	      if (scrub_m68k_mri)
86933965Sjdp		{
87033965Sjdp		  /* In MRI mode, we keep these spaces.  */
87133965Sjdp		  state = 3;
87233965Sjdp		  UNGET (ch);
87333965Sjdp		  PUT (' ');
87433965Sjdp		  break;
87533965Sjdp		}
87633965Sjdp	      state = 10;	/* Sp after symbol char */
87733965Sjdp	      goto recycle;
87833965Sjdp	    case 11:
87960484Sobrien	      if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
88033965Sjdp		state = 1;
88133965Sjdp	      else
88233965Sjdp		{
88333965Sjdp		  /* We know that ch is not ':', since we tested that
88433965Sjdp                     case above.  Therefore this is not a label, so it
88533965Sjdp                     must be the opcode, and we've just seen the
88633965Sjdp                     whitespace after it.  */
88733965Sjdp		  state = 3;
88833965Sjdp		}
88933965Sjdp	      UNGET (ch);
89033965Sjdp	      PUT (' ');	/* Sp after label definition.  */
89133965Sjdp	      break;
89233965Sjdp	    default:
89333965Sjdp	      BAD_CASE (state);
89433965Sjdp	    }
89533965Sjdp	  break;
89633965Sjdp
89733965Sjdp	case LEX_IS_TWOCHAR_COMMENT_1ST:
89833965Sjdp	  ch2 = GET ();
89933965Sjdp	  if (ch2 == '*')
90033965Sjdp	    {
90133965Sjdp	      for (;;)
90233965Sjdp		{
90333965Sjdp		  do
90433965Sjdp		    {
90533965Sjdp		      ch2 = GET ();
90633965Sjdp		      if (ch2 != EOF && IS_NEWLINE (ch2))
90733965Sjdp			add_newlines++;
90833965Sjdp		    }
90933965Sjdp		  while (ch2 != EOF && ch2 != '*');
91033965Sjdp
91133965Sjdp		  while (ch2 == '*')
91233965Sjdp		    ch2 = GET ();
91333965Sjdp
91433965Sjdp		  if (ch2 == EOF || ch2 == '/')
91533965Sjdp		    break;
91633965Sjdp
91733965Sjdp		  /* This UNGET will ensure that we count newlines
91833965Sjdp                     correctly.  */
91933965Sjdp		  UNGET (ch2);
92033965Sjdp		}
92133965Sjdp
92233965Sjdp	      if (ch2 == EOF)
92360484Sobrien		as_warn (_("end of file in multiline comment"));
92433965Sjdp
92533965Sjdp	      ch = ' ';
92633965Sjdp	      goto recycle;
92733965Sjdp	    }
92877298Sobrien#ifdef DOUBLESLASH_LINE_COMMENTS
92977298Sobrien	  else if (ch2 == '/')
93077298Sobrien	    {
93177298Sobrien	      do
93277298Sobrien		{
93377298Sobrien		  ch = GET ();
93477298Sobrien		}
93577298Sobrien	      while (ch != EOF && !IS_NEWLINE (ch));
93677298Sobrien	      if (ch == EOF)
93777298Sobrien		as_warn ("end of file in comment; newline inserted");
93877298Sobrien	      state = 0;
93977298Sobrien	      PUT ('\n');
94077298Sobrien	      break;
94177298Sobrien	    }
94277298Sobrien#endif
94333965Sjdp	  else
94433965Sjdp	    {
94533965Sjdp	      if (ch2 != EOF)
94633965Sjdp		UNGET (ch2);
94733965Sjdp	      if (state == 9 || state == 10)
94833965Sjdp		state = 3;
94933965Sjdp	      PUT (ch);
95033965Sjdp	    }
95133965Sjdp	  break;
95233965Sjdp
95333965Sjdp	case LEX_IS_STRINGQUOTE:
95433965Sjdp	  if (state == 10)
95533965Sjdp	    {
95633965Sjdp	      /* Preserve the whitespace in foo "bar" */
95733965Sjdp	      UNGET (ch);
95833965Sjdp	      state = 3;
95933965Sjdp	      PUT (' ');
96033965Sjdp
96133965Sjdp	      /* PUT didn't jump out.  We could just break, but we
96233965Sjdp                 know what will happen, so optimize a bit.  */
96333965Sjdp	      ch = GET ();
96433965Sjdp	      old_state = 3;
96533965Sjdp	    }
96633965Sjdp	  else if (state == 9)
96733965Sjdp	    old_state = 3;
96833965Sjdp	  else
96933965Sjdp	    old_state = state;
97033965Sjdp	  state = 5;
97133965Sjdp	  PUT (ch);
97233965Sjdp	  break;
97333965Sjdp
97433965Sjdp#ifndef IEEE_STYLE
97533965Sjdp	case LEX_IS_ONECHAR_QUOTE:
97633965Sjdp	  if (state == 10)
97733965Sjdp	    {
97833965Sjdp	      /* Preserve the whitespace in foo 'b' */
97933965Sjdp	      UNGET (ch);
98033965Sjdp	      state = 3;
98133965Sjdp	      PUT (' ');
98233965Sjdp	      break;
98333965Sjdp	    }
98433965Sjdp	  ch = GET ();
98533965Sjdp	  if (ch == EOF)
98633965Sjdp	    {
98760484Sobrien	      as_warn (_("end of file after a one-character quote; \\0 inserted"));
98833965Sjdp	      ch = 0;
98933965Sjdp	    }
99033965Sjdp	  if (ch == '\\')
99133965Sjdp	    {
99233965Sjdp	      ch = GET ();
99333965Sjdp	      if (ch == EOF)
99433965Sjdp		{
99560484Sobrien		  as_warn (_("end of file in escape character"));
99633965Sjdp		  ch = '\\';
99733965Sjdp		}
99833965Sjdp	      else
99933965Sjdp		ch = process_escape (ch);
100033965Sjdp	    }
100133965Sjdp	  sprintf (out_buf, "%d", (int) (unsigned char) ch);
100233965Sjdp
100333965Sjdp	  /* None of these 'x constants for us.  We want 'x'.  */
100433965Sjdp	  if ((ch = GET ()) != '\'')
100533965Sjdp	    {
100633965Sjdp#ifdef REQUIRE_CHAR_CLOSE_QUOTE
100789857Sobrien	      as_warn (_("missing close quote; (assumed)"));
100833965Sjdp#else
100933965Sjdp	      if (ch != EOF)
101033965Sjdp		UNGET (ch);
101133965Sjdp#endif
101233965Sjdp	    }
101333965Sjdp	  if (strlen (out_buf) == 1)
101433965Sjdp	    {
101533965Sjdp	      PUT (out_buf[0]);
101633965Sjdp	      break;
101733965Sjdp	    }
101833965Sjdp	  if (state == 9)
101933965Sjdp	    old_state = 3;
102033965Sjdp	  else
102133965Sjdp	    old_state = state;
102233965Sjdp	  state = -1;
102333965Sjdp	  out_string = out_buf;
102433965Sjdp	  PUT (*out_string++);
102533965Sjdp	  break;
102633965Sjdp#endif
102733965Sjdp
102833965Sjdp	case LEX_IS_COLON:
102960484Sobrien#ifdef KEEP_WHITE_AROUND_COLON
103077298Sobrien	  state = 9;
103160484Sobrien#else
103233965Sjdp	  if (state == 9 || state == 10)
103333965Sjdp	    state = 3;
103433965Sjdp	  else if (state != 3)
103533965Sjdp	    state = 1;
103660484Sobrien#endif
103733965Sjdp	  PUT (ch);
103833965Sjdp	  break;
103933965Sjdp
104033965Sjdp	case LEX_IS_NEWLINE:
104133965Sjdp	  /* Roll out a bunch of newlines from inside comments, etc.  */
104233965Sjdp	  if (add_newlines)
104333965Sjdp	    {
104433965Sjdp	      --add_newlines;
104533965Sjdp	      UNGET (ch);
104633965Sjdp	    }
104777298Sobrien	  /* Fall through.  */
104833965Sjdp
104933965Sjdp	case LEX_IS_LINE_SEPARATOR:
105033965Sjdp	  state = 0;
105133965Sjdp	  PUT (ch);
105233965Sjdp	  break;
105333965Sjdp
105489857Sobrien	case LEX_IS_PARALLEL_SEPARATOR:
105589857Sobrien	  state = 1;
105689857Sobrien	  PUT (ch);
105789857Sobrien	  break;
105889857Sobrien
105938889Sjdp#ifdef TC_V850
106038889Sjdp	case LEX_IS_DOUBLEDASH_1ST:
106177298Sobrien	  ch2 = GET ();
106238889Sjdp	  if (ch2 != '-')
106338889Sjdp	    {
106438889Sjdp	      UNGET (ch2);
106538889Sjdp	      goto de_fault;
106638889Sjdp	    }
106777298Sobrien	  /* Read and skip to end of line.  */
106838889Sjdp	  do
106938889Sjdp	    {
107038889Sjdp	      ch = GET ();
107138889Sjdp	    }
107238889Sjdp	  while (ch != EOF && ch != '\n');
107338889Sjdp	  if (ch == EOF)
107438889Sjdp	    {
107560484Sobrien	      as_warn (_("end of file in comment; newline inserted"));
107638889Sjdp	    }
107738889Sjdp	  state = 0;
107838889Sjdp	  PUT ('\n');
107938889Sjdp	  break;
108077298Sobrien#endif
108160484Sobrien#ifdef DOUBLEBAR_PARALLEL
108238889Sjdp	case LEX_IS_DOUBLEBAR_1ST:
108377298Sobrien	  ch2 = GET ();
108438889Sjdp	  if (ch2 != '|')
108538889Sjdp	    {
108638889Sjdp	      UNGET (ch2);
108738889Sjdp	      goto de_fault;
108838889Sjdp	    }
108938889Sjdp	  /* Reset back to state 1 and pretend that we are parsing a line from
109038889Sjdp	     just after the first white space.  */
109138889Sjdp	  state = 1;
109238889Sjdp	  PUT ('|');
109338889Sjdp	  PUT ('|');
109438889Sjdp	  break;
109577298Sobrien#endif
109633965Sjdp	case LEX_IS_LINE_COMMENT_START:
109733965Sjdp	  /* FIXME-someday: The two character comment stuff was badly
109833965Sjdp	     thought out.  On i386, we want '/' as line comment start
109933965Sjdp	     AND we want C style comments.  hence this hack.  The
110033965Sjdp	     whole lexical process should be reworked.  xoxorich.  */
110133965Sjdp	  if (ch == '/')
110233965Sjdp	    {
110333965Sjdp	      ch2 = GET ();
110433965Sjdp	      if (ch2 == '*')
110533965Sjdp		{
110633965Sjdp		  old_state = 3;
110733965Sjdp		  state = -2;
110833965Sjdp		  break;
110933965Sjdp		}
111033965Sjdp	      else
111133965Sjdp		{
111233965Sjdp		  UNGET (ch2);
111333965Sjdp		}
111433965Sjdp	    } /* bad hack */
111533965Sjdp
111633965Sjdp	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
111733965Sjdp	    {
111833965Sjdp	      int startch;
111933965Sjdp
112033965Sjdp	      startch = ch;
112133965Sjdp
112233965Sjdp	      do
112333965Sjdp		{
112433965Sjdp		  ch = GET ();
112533965Sjdp		}
112633965Sjdp	      while (ch != EOF && IS_WHITESPACE (ch));
112733965Sjdp	      if (ch == EOF)
112833965Sjdp		{
112960484Sobrien		  as_warn (_("end of file in comment; newline inserted"));
113033965Sjdp		  PUT ('\n');
113133965Sjdp		  break;
113233965Sjdp		}
113333965Sjdp	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
113433965Sjdp		{
113533965Sjdp		  /* Not a cpp line.  */
113633965Sjdp		  while (ch != EOF && !IS_NEWLINE (ch))
113733965Sjdp		    ch = GET ();
113833965Sjdp		  if (ch == EOF)
113989857Sobrien		    as_warn (_("end of file in comment; newline inserted"));
114033965Sjdp		  state = 0;
114133965Sjdp		  PUT ('\n');
114233965Sjdp		  break;
114333965Sjdp		}
114477298Sobrien	      /* Looks like `# 123 "filename"' from cpp.  */
114533965Sjdp	      UNGET (ch);
114633965Sjdp	      old_state = 4;
114733965Sjdp	      state = -1;
114833965Sjdp	      if (scrub_m68k_mri)
114933965Sjdp		out_string = "\tappline ";
115033965Sjdp	      else
115133965Sjdp		out_string = "\t.appline ";
115233965Sjdp	      PUT (*out_string++);
115333965Sjdp	      break;
115433965Sjdp	    }
115533965Sjdp
115638889Sjdp#ifdef TC_D10V
115738889Sjdp	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
115838889Sjdp	     Trap is the only short insn that has a first operand that is
115938889Sjdp	     neither register nor label.
116038889Sjdp	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
116177298Sobrien	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
116277298Sobrien	     already LEX_IS_LINE_COMMENT_START.  However, it is the
116377298Sobrien	     only character in line_comment_chars for d10v, hence we
116477298Sobrien	     can recognize it as such.  */
116538889Sjdp	  /* An alternative approach would be to reset the state to 1 when
116638889Sjdp	     we see '||', '<'- or '->', but that seems to be overkill.  */
116777298Sobrien	  if (state == 10)
116877298Sobrien	    PUT (' ');
116938889Sjdp#endif
117033965Sjdp	  /* We have a line comment character which is not at the
117133965Sjdp	     start of a line.  If this is also a normal comment
117233965Sjdp	     character, fall through.  Otherwise treat it as a default
117333965Sjdp	     character.  */
117433965Sjdp	  if (strchr (tc_comment_chars, ch) == NULL
117533965Sjdp	      && (! scrub_m68k_mri
117633965Sjdp		  || (ch != '!' && ch != '*')))
117733965Sjdp	    goto de_fault;
117833965Sjdp	  if (scrub_m68k_mri
117933965Sjdp	      && (ch == '!' || ch == '*' || ch == '#')
118033965Sjdp	      && state != 1
118133965Sjdp	      && state != 10)
118233965Sjdp	    goto de_fault;
118333965Sjdp	  /* Fall through.  */
118433965Sjdp	case LEX_IS_COMMENT_START:
118560484Sobrien#if defined TC_ARM && defined OBJ_ELF
118660484Sobrien	  /* On the ARM, `@' is the comment character.
118760484Sobrien	     Unfortunately this is also a special character in ELF .symver
118877298Sobrien	     directives (and .type, though we deal with those another way).
118977298Sobrien	     So we check if this line is such a directive, and treat
119077298Sobrien	     the character as default if so.  This is a hack.  */
119160484Sobrien	  if ((symver_state != NULL) && (*symver_state == 0))
119260484Sobrien	    goto de_fault;
119360484Sobrien#endif
119477298Sobrien#ifdef WARN_COMMENTS
119577298Sobrien	  if (!found_comment)
119677298Sobrien	    as_where (&found_comment_file, &found_comment);
119777298Sobrien#endif
119833965Sjdp	  do
119933965Sjdp	    {
120033965Sjdp	      ch = GET ();
120133965Sjdp	    }
120233965Sjdp	  while (ch != EOF && !IS_NEWLINE (ch));
120333965Sjdp	  if (ch == EOF)
120460484Sobrien	    as_warn (_("end of file in comment; newline inserted"));
120533965Sjdp	  state = 0;
120633965Sjdp	  PUT ('\n');
120733965Sjdp	  break;
120833965Sjdp
120933965Sjdp	case LEX_IS_SYMBOL_COMPONENT:
121033965Sjdp	  if (state == 10)
121133965Sjdp	    {
121233965Sjdp	      /* This is a symbol character following another symbol
121333965Sjdp		 character, with whitespace in between.  We skipped
121433965Sjdp		 the whitespace earlier, so output it now.  */
121533965Sjdp	      UNGET (ch);
121633965Sjdp	      state = 3;
121733965Sjdp	      PUT (' ');
121833965Sjdp	      break;
121933965Sjdp	    }
122033965Sjdp
122133965Sjdp	  if (state == 3)
122233965Sjdp	    state = 9;
122333965Sjdp
122433965Sjdp	  /* This is a common case.  Quickly copy CH and all the
122533965Sjdp             following symbol component or normal characters.  */
122660484Sobrien	  if (to + 1 < toend
122760484Sobrien	      && mri_state == NULL
122860484Sobrien#if defined TC_ARM && defined OBJ_ELF
122960484Sobrien	      && symver_state == NULL
123060484Sobrien#endif
123160484Sobrien	      )
123233965Sjdp	    {
123333965Sjdp	      char *s;
123433965Sjdp	      int len;
123533965Sjdp
123633965Sjdp	      for (s = from; s < fromend; s++)
123733965Sjdp		{
123833965Sjdp		  int type;
123933965Sjdp
124077298Sobrien		  ch2 = *(unsigned char *) s;
124133965Sjdp		  type = lex[ch2];
124233965Sjdp		  if (type != 0
124333965Sjdp		      && type != LEX_IS_SYMBOL_COMPONENT)
124433965Sjdp		    break;
124533965Sjdp		}
124633965Sjdp	      if (s > from)
124733965Sjdp		{
124833965Sjdp		  /* Handle the last character normally, for
124933965Sjdp                     simplicity.  */
125033965Sjdp		  --s;
125133965Sjdp		}
125233965Sjdp	      len = s - from;
125333965Sjdp	      if (len > (toend - to) - 1)
125433965Sjdp		len = (toend - to) - 1;
125533965Sjdp	      if (len > 0)
125633965Sjdp		{
125733965Sjdp		  PUT (ch);
125833965Sjdp		  if (len > 8)
125933965Sjdp		    {
126033965Sjdp		      memcpy (to, from, len);
126133965Sjdp		      to += len;
126233965Sjdp		      from += len;
126333965Sjdp		    }
126433965Sjdp		  else
126533965Sjdp		    {
126633965Sjdp		      switch (len)
126733965Sjdp			{
126833965Sjdp			case 8: *to++ = *from++;
126933965Sjdp			case 7: *to++ = *from++;
127033965Sjdp			case 6: *to++ = *from++;
127133965Sjdp			case 5: *to++ = *from++;
127233965Sjdp			case 4: *to++ = *from++;
127333965Sjdp			case 3: *to++ = *from++;
127433965Sjdp			case 2: *to++ = *from++;
127533965Sjdp			case 1: *to++ = *from++;
127633965Sjdp			}
127777298Sobrien		    }
127833965Sjdp		  ch = GET ();
127933965Sjdp		}
128033965Sjdp	    }
128133965Sjdp
128233965Sjdp	  /* Fall through.  */
128333965Sjdp	default:
128433965Sjdp	de_fault:
128533965Sjdp	  /* Some relatively `normal' character.  */
128633965Sjdp	  if (state == 0)
128733965Sjdp	    {
128833965Sjdp	      state = 11;	/* Now seeing label definition */
128933965Sjdp	    }
129033965Sjdp	  else if (state == 1)
129133965Sjdp	    {
129233965Sjdp	      state = 2;	/* Ditto */
129333965Sjdp	    }
129433965Sjdp	  else if (state == 9)
129533965Sjdp	    {
129633965Sjdp	      if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
129733965Sjdp		state = 3;
129833965Sjdp	    }
129933965Sjdp	  else if (state == 10)
130033965Sjdp	    {
130160484Sobrien	      if (ch == '\\')
130260484Sobrien		{
130360484Sobrien		  /* Special handling for backslash: a backslash may
130460484Sobrien		     be the beginning of a formal parameter (of a
130560484Sobrien		     macro) following another symbol character, with
130660484Sobrien		     whitespace in between.  If that is the case, we
130760484Sobrien		     output a space before the parameter.  Strictly
130860484Sobrien		     speaking, correct handling depends upon what the
130960484Sobrien		     macro parameter expands into; if the parameter
131060484Sobrien		     expands into something which does not start with
131160484Sobrien		     an operand character, then we don't want to keep
131260484Sobrien		     the space.  We don't have enough information to
131360484Sobrien		     make the right choice, so here we are making the
131460484Sobrien		     choice which is more likely to be correct.  */
131560484Sobrien		  PUT (' ');
131660484Sobrien		}
131760484Sobrien
131833965Sjdp	      state = 3;
131933965Sjdp	    }
132033965Sjdp	  PUT (ch);
132133965Sjdp	  break;
132233965Sjdp	}
132333965Sjdp    }
132433965Sjdp
132533965Sjdp  /*NOTREACHED*/
132633965Sjdp
132733965Sjdp fromeof:
132833965Sjdp  /* We have reached the end of the input.  */
132933965Sjdp  return to - tostart;
133033965Sjdp
133133965Sjdp tofull:
133233965Sjdp  /* The output buffer is full.  Save any input we have not yet
133333965Sjdp     processed.  */
133433965Sjdp  if (fromend > from)
133533965Sjdp    {
133660484Sobrien      saved_input = from;
133733965Sjdp      saved_input_len = fromend - from;
133833965Sjdp    }
133933965Sjdp  else
134060484Sobrien    saved_input = NULL;
134160484Sobrien
134233965Sjdp  return to - tostart;
134333965Sjdp}
134433965Sjdp
134533965Sjdp/* end of app.c */
1346