133965Sjdp/* This is the Assembler Pre-Processor
278828Sobrien   Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3218822Sdim   1999, 2000, 2001, 2002, 2003, 2006, 2007
433965Sjdp   Free Software Foundation, Inc.
533965Sjdp
633965Sjdp   This file is part of GAS, the GNU Assembler.
733965Sjdp
833965Sjdp   GAS is free software; you can redistribute it and/or modify
933965Sjdp   it under the terms of the GNU General Public License as published by
1033965Sjdp   the Free Software Foundation; either version 2, or (at your option)
1133965Sjdp   any later version.
1233965Sjdp
1333965Sjdp   GAS is distributed in the hope that it will be useful,
1433965Sjdp   but WITHOUT ANY WARRANTY; without even the implied warranty of
1533965Sjdp   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1633965Sjdp   GNU General Public License for more details.
1733965Sjdp
1833965Sjdp   You should have received a copy of the GNU General Public License
1933965Sjdp   along with GAS; see the file COPYING.  If not, write to the Free
20218822Sdim   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
21218822Sdim   02110-1301, USA.  */
2233965Sjdp
23130561Sobrien/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
24218822Sdim/* App, the assembler pre-processor.  This pre-processor strips out
25218822Sdim   excess spaces, turns single-quoted characters into a decimal
26218822Sdim   constant, and turns the # in # <number> <filename> <garbage> into a
27218822Sdim   .linefile.  This needs better error-handling.  */
2833965Sjdp
29218822Sdim#include "as.h"
3033965Sjdp
3133965Sjdp#if (__STDC__ != 1)
3233965Sjdp#ifndef const
3333965Sjdp#define const  /* empty */
3433965Sjdp#endif
3533965Sjdp#endif
3633965Sjdp
3760484Sobrien#ifdef TC_M68K
3833965Sjdp/* Whether we are scrubbing in m68k MRI mode.  This is different from
3933965Sjdp   flag_m68k_mri, because the two flags will be affected by the .mri
4033965Sjdp   pseudo-op at different times.  */
4133965Sjdpstatic int scrub_m68k_mri;
4233965Sjdp
4333965Sjdp/* The pseudo-op which switches in and out of MRI mode.  See the
4433965Sjdp   comment in do_scrub_chars.  */
4533965Sjdpstatic const char mri_pseudo[] = ".mri 0";
46104834Sobrien#else
47104834Sobrien#define scrub_m68k_mri 0
48104834Sobrien#endif
4933965Sjdp
5060484Sobrien#if defined TC_ARM && defined OBJ_ELF
5177298Sobrien/* The pseudo-op for which we need to special-case `@' characters.
5260484Sobrien   See the comment in do_scrub_chars.  */
5360484Sobrienstatic const char   symver_pseudo[] = ".symver";
5460484Sobrienstatic const char * symver_state;
5560484Sobrien#endif
5660484Sobrien
5733965Sjdpstatic char lex[256];
5833965Sjdpstatic const char symbol_chars[] =
5933965Sjdp"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
6033965Sjdp
6133965Sjdp#define LEX_IS_SYMBOL_COMPONENT		1
6233965Sjdp#define LEX_IS_WHITESPACE		2
6333965Sjdp#define LEX_IS_LINE_SEPARATOR		3
6433965Sjdp#define LEX_IS_COMMENT_START		4
6533965Sjdp#define LEX_IS_LINE_COMMENT_START	5
6633965Sjdp#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
6733965Sjdp#define	LEX_IS_STRINGQUOTE		8
6833965Sjdp#define	LEX_IS_COLON			9
6933965Sjdp#define	LEX_IS_NEWLINE			10
7033965Sjdp#define	LEX_IS_ONECHAR_QUOTE		11
7138889Sjdp#ifdef TC_V850
7238889Sjdp#define LEX_IS_DOUBLEDASH_1ST		12
7338889Sjdp#endif
7438889Sjdp#ifdef TC_M32R
7560484Sobrien#define DOUBLEBAR_PARALLEL
7660484Sobrien#endif
7760484Sobrien#ifdef DOUBLEBAR_PARALLEL
7838889Sjdp#define LEX_IS_DOUBLEBAR_1ST		13
7938889Sjdp#endif
8089857Sobrien#define LEX_IS_PARALLEL_SEPARATOR	14
8133965Sjdp#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
8233965Sjdp#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
8333965Sjdp#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
8489857Sobrien#define IS_PARALLEL_SEPARATOR(c)	(lex[c] == LEX_IS_PARALLEL_SEPARATOR)
8533965Sjdp#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
8633965Sjdp#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
8733965Sjdp#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
8833965Sjdp
89130561Sobrienstatic int process_escape (int);
9033965Sjdp
9133965Sjdp/* FIXME-soon: The entire lexer/parser thingy should be
9233965Sjdp   built statically at compile time rather than dynamically
9377298Sobrien   each and every time the assembler is run.  xoxorich.  */
9433965Sjdp
9577298Sobrienvoid
96130561Sobriendo_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
9733965Sjdp{
9833965Sjdp  const char *p;
9960484Sobrien  int c;
10033965Sjdp
10133965Sjdp  lex[' '] = LEX_IS_WHITESPACE;
10233965Sjdp  lex['\t'] = LEX_IS_WHITESPACE;
10338889Sjdp  lex['\r'] = LEX_IS_WHITESPACE;
10433965Sjdp  lex['\n'] = LEX_IS_NEWLINE;
10533965Sjdp  lex[':'] = LEX_IS_COLON;
10633965Sjdp
10760484Sobrien#ifdef TC_M68K
10860484Sobrien  scrub_m68k_mri = m68k_mri;
10960484Sobrien
11033965Sjdp  if (! m68k_mri)
11160484Sobrien#endif
11233965Sjdp    {
11333965Sjdp      lex['"'] = LEX_IS_STRINGQUOTE;
11433965Sjdp
11560484Sobrien#if ! defined (TC_HPPA) && ! defined (TC_I370)
116130561Sobrien      /* I370 uses single-quotes to delimit integer, float constants.  */
11733965Sjdp      lex['\''] = LEX_IS_ONECHAR_QUOTE;
11833965Sjdp#endif
11933965Sjdp
12033965Sjdp#ifdef SINGLE_QUOTE_STRINGS
12133965Sjdp      lex['\''] = LEX_IS_STRINGQUOTE;
12233965Sjdp#endif
12333965Sjdp    }
12433965Sjdp
12533965Sjdp  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
12633965Sjdp     in state 5 of do_scrub_chars must be changed.  */
12733965Sjdp
12833965Sjdp  /* Note that these override the previous defaults, e.g. if ';' is a
12933965Sjdp     comment char, then it isn't a line separator.  */
13033965Sjdp  for (p = symbol_chars; *p; ++p)
131130561Sobrien    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
13233965Sjdp
13360484Sobrien  for (c = 128; c < 256; ++c)
13460484Sobrien    lex[c] = LEX_IS_SYMBOL_COMPONENT;
13560484Sobrien
13660484Sobrien#ifdef tc_symbol_chars
13760484Sobrien  /* This macro permits the processor to specify all characters which
13860484Sobrien     may appears in an operand.  This will prevent the scrubber from
13960484Sobrien     discarding meaningful whitespace in certain cases.  The i386
14060484Sobrien     backend uses this to support prefixes, which can confuse the
14160484Sobrien     scrubber as to whether it is parsing operands or opcodes.  */
14260484Sobrien  for (p = tc_symbol_chars; *p; ++p)
14360484Sobrien    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
14460484Sobrien#endif
14560484Sobrien
14633965Sjdp  /* The m68k backend wants to be able to change comment_chars.  */
14733965Sjdp#ifndef tc_comment_chars
14833965Sjdp#define tc_comment_chars comment_chars
14933965Sjdp#endif
15033965Sjdp  for (p = tc_comment_chars; *p; p++)
151130561Sobrien    lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
15233965Sjdp
15333965Sjdp  for (p = line_comment_chars; *p; p++)
154130561Sobrien    lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
15533965Sjdp
15633965Sjdp  for (p = line_separator_chars; *p; p++)
157130561Sobrien    lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
15833965Sjdp
15989857Sobrien#ifdef tc_parallel_separator_chars
16089857Sobrien  /* This macro permits the processor to specify all characters which
16189857Sobrien     separate parallel insns on the same line.  */
16289857Sobrien  for (p = tc_parallel_separator_chars; *p; p++)
163130561Sobrien    lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
16489857Sobrien#endif
16589857Sobrien
16633965Sjdp  /* Only allow slash-star comments if slash is not in use.
16733965Sjdp     FIXME: This isn't right.  We should always permit them.  */
16833965Sjdp  if (lex['/'] == 0)
169130561Sobrien    lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
17033965Sjdp
17160484Sobrien#ifdef TC_M68K
17233965Sjdp  if (m68k_mri)
17333965Sjdp    {
17433965Sjdp      lex['\''] = LEX_IS_STRINGQUOTE;
17533965Sjdp      lex[';'] = LEX_IS_COMMENT_START;
17633965Sjdp      lex['*'] = LEX_IS_LINE_COMMENT_START;
17733965Sjdp      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
178130561Sobrien	 then it can't be used in an expression.  */
17933965Sjdp      lex['!'] = LEX_IS_LINE_COMMENT_START;
18033965Sjdp    }
18160484Sobrien#endif
18238889Sjdp
18338889Sjdp#ifdef TC_V850
18438889Sjdp  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
18538889Sjdp#endif
18660484Sobrien#ifdef DOUBLEBAR_PARALLEL
18738889Sjdp  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
18838889Sjdp#endif
18960484Sobrien#ifdef TC_D30V
190130561Sobrien  /* Must do this is we want VLIW instruction with "->" or "<-".  */
19160484Sobrien  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
19260484Sobrien#endif
193130561Sobrien}
19433965Sjdp
195130561Sobrien/* Saved state of the scrubber.  */
19633965Sjdpstatic int state;
19733965Sjdpstatic int old_state;
19833965Sjdpstatic char *out_string;
19933965Sjdpstatic char out_buf[20];
20033965Sjdpstatic int add_newlines;
20133965Sjdpstatic char *saved_input;
20233965Sjdpstatic int saved_input_len;
20360484Sobrienstatic char input_buffer[32 * 1024];
20433965Sjdpstatic const char *mri_state;
20533965Sjdpstatic char mri_last_ch;
20633965Sjdp
20733965Sjdp/* Data structure for saving the state of app across #include's.  Note that
20833965Sjdp   app is called asynchronously to the parsing of the .include's, so our
20933965Sjdp   state at the time .include is interpreted is completely unrelated.
21033965Sjdp   That's why we have to save it all.  */
21133965Sjdp
212130561Sobrienstruct app_save
213130561Sobrien{
21477298Sobrien  int          state;
21577298Sobrien  int          old_state;
21677298Sobrien  char *       out_string;
21777298Sobrien  char         out_buf[sizeof (out_buf)];
21877298Sobrien  int          add_newlines;
21977298Sobrien  char *       saved_input;
22077298Sobrien  int          saved_input_len;
22160484Sobrien#ifdef TC_M68K
22277298Sobrien  int          scrub_m68k_mri;
22360484Sobrien#endif
22477298Sobrien  const char * mri_state;
22577298Sobrien  char         mri_last_ch;
22660484Sobrien#if defined TC_ARM && defined OBJ_ELF
22777298Sobrien  const char * symver_state;
22860484Sobrien#endif
22977298Sobrien};
23033965Sjdp
23133965Sjdpchar *
232130561Sobrienapp_push (void)
23333965Sjdp{
23433965Sjdp  register struct app_save *saved;
23533965Sjdp
23633965Sjdp  saved = (struct app_save *) xmalloc (sizeof (*saved));
23733965Sjdp  saved->state = state;
23833965Sjdp  saved->old_state = old_state;
23933965Sjdp  saved->out_string = out_string;
24033965Sjdp  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
24133965Sjdp  saved->add_newlines = add_newlines;
24260484Sobrien  if (saved_input == NULL)
24360484Sobrien    saved->saved_input = NULL;
24460484Sobrien  else
24560484Sobrien    {
24660484Sobrien      saved->saved_input = xmalloc (saved_input_len);
24760484Sobrien      memcpy (saved->saved_input, saved_input, saved_input_len);
24860484Sobrien      saved->saved_input_len = saved_input_len;
24960484Sobrien    }
25060484Sobrien#ifdef TC_M68K
25133965Sjdp  saved->scrub_m68k_mri = scrub_m68k_mri;
25260484Sobrien#endif
25333965Sjdp  saved->mri_state = mri_state;
25433965Sjdp  saved->mri_last_ch = mri_last_ch;
25560484Sobrien#if defined TC_ARM && defined OBJ_ELF
25660484Sobrien  saved->symver_state = symver_state;
25760484Sobrien#endif
25833965Sjdp
25977298Sobrien  /* do_scrub_begin() is not useful, just wastes time.  */
26033965Sjdp
26133965Sjdp  state = 0;
26233965Sjdp  saved_input = NULL;
26333965Sjdp
26433965Sjdp  return (char *) saved;
26533965Sjdp}
26633965Sjdp
26777298Sobrienvoid
268130561Sobrienapp_pop (char *arg)
26933965Sjdp{
27033965Sjdp  register struct app_save *saved = (struct app_save *) arg;
27133965Sjdp
27277298Sobrien  /* There is no do_scrub_end ().  */
27333965Sjdp  state = saved->state;
27433965Sjdp  old_state = saved->old_state;
27533965Sjdp  out_string = saved->out_string;
27633965Sjdp  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
27733965Sjdp  add_newlines = saved->add_newlines;
27860484Sobrien  if (saved->saved_input == NULL)
27960484Sobrien    saved_input = NULL;
28060484Sobrien  else
28160484Sobrien    {
28260484Sobrien      assert (saved->saved_input_len <= (int) (sizeof input_buffer));
28360484Sobrien      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
28460484Sobrien      saved_input = input_buffer;
28560484Sobrien      saved_input_len = saved->saved_input_len;
28660484Sobrien      free (saved->saved_input);
28760484Sobrien    }
28860484Sobrien#ifdef TC_M68K
28933965Sjdp  scrub_m68k_mri = saved->scrub_m68k_mri;
29060484Sobrien#endif
29133965Sjdp  mri_state = saved->mri_state;
29233965Sjdp  mri_last_ch = saved->mri_last_ch;
29360484Sobrien#if defined TC_ARM && defined OBJ_ELF
29460484Sobrien  symver_state = saved->symver_state;
29560484Sobrien#endif
29633965Sjdp
29733965Sjdp  free (arg);
298130561Sobrien}
29933965Sjdp
30033965Sjdp/* @@ This assumes that \n &c are the same on host and target.  This is not
30133965Sjdp   necessarily true.  */
302130561Sobrien
30377298Sobrienstatic int
304130561Sobrienprocess_escape (int ch)
30533965Sjdp{
30633965Sjdp  switch (ch)
30733965Sjdp    {
30833965Sjdp    case 'b':
30933965Sjdp      return '\b';
31033965Sjdp    case 'f':
31133965Sjdp      return '\f';
31233965Sjdp    case 'n':
31333965Sjdp      return '\n';
31433965Sjdp    case 'r':
31533965Sjdp      return '\r';
31633965Sjdp    case 't':
31733965Sjdp      return '\t';
31833965Sjdp    case '\'':
31933965Sjdp      return '\'';
32033965Sjdp    case '"':
32133965Sjdp      return '\"';
32233965Sjdp    default:
32333965Sjdp      return ch;
32433965Sjdp    }
32533965Sjdp}
32633965Sjdp
32733965Sjdp/* This function is called to process input characters.  The GET
32833965Sjdp   parameter is used to retrieve more input characters.  GET should
32933965Sjdp   set its parameter to point to a buffer, and return the length of
33033965Sjdp   the buffer; it should return 0 at end of file.  The scrubbed output
33133965Sjdp   characters are put into the buffer starting at TOSTART; the TOSTART
33233965Sjdp   buffer is TOLEN bytes in length.  The function returns the number
33333965Sjdp   of scrubbed characters put into TOSTART.  This will be TOLEN unless
33433965Sjdp   end of file was seen.  This function is arranged as a state
33533965Sjdp   machine, and saves its state so that it may return at any point.
33633965Sjdp   This is the way the old code used to work.  */
33733965Sjdp
33833965Sjdpint
339130561Sobriendo_scrub_chars (int (*get) (char *, int), char *tostart, int tolen)
34033965Sjdp{
34133965Sjdp  char *to = tostart;
34233965Sjdp  char *toend = tostart + tolen;
34333965Sjdp  char *from;
34433965Sjdp  char *fromend;
34533965Sjdp  int fromlen;
34633965Sjdp  register int ch, ch2 = 0;
347218822Sdim  /* Character that started the string we're working on.  */
348218822Sdim  static char quotechar;
34933965Sjdp
35033965Sjdp  /*State 0: beginning of normal line
35133965Sjdp	  1: After first whitespace on line (flush more white)
35233965Sjdp	  2: After first non-white (opcode) on line (keep 1white)
35333965Sjdp	  3: after second white on line (into operands) (flush white)
354218822Sdim	  4: after putting out a .linefile, put out digits
35533965Sjdp	  5: parsing a string, then go to old-state
35633965Sjdp	  6: putting out \ escape in a "d string.
357218822Sdim	  7: no longer used
358218822Sdim	  8: no longer used
35933965Sjdp	  9: After seeing symbol char in state 3 (keep 1white after symchar)
36033965Sjdp	 10: After seeing whitespace in state 9 (keep white before symchar)
36133965Sjdp	 11: After seeing a symbol character in state 0 (eg a label definition)
36233965Sjdp	 -1: output string in out_string and go to the state in old_state
36333965Sjdp	 -2: flush text until a '*' '/' is seen, then go to state old_state
36438889Sjdp#ifdef TC_V850
365130561Sobrien	 12: After seeing a dash, looking for a second dash as a start
366130561Sobrien	     of comment.
36738889Sjdp#endif
36860484Sobrien#ifdef DOUBLEBAR_PARALLEL
369130561Sobrien	 13: After seeing a vertical bar, looking for a second
370130561Sobrien	     vertical bar as a parallel expression separator.
37138889Sjdp#endif
372130561Sobrien#ifdef TC_IA64
373130561Sobrien	 14: After seeing a `(' at state 0, looking for a `)' as
374130561Sobrien	     predicate.
375130561Sobrien	 15: After seeing a `(' at state 1, looking for a `)' as
376130561Sobrien	     predicate.
377130561Sobrien#endif
378218822Sdim#ifdef TC_Z80
379218822Sdim	 16: After seeing an 'a' or an 'A' at the start of a symbol
380218822Sdim	 17: After seeing an 'f' or an 'F' in state 16
381218822Sdim#endif
38233965Sjdp	  */
38333965Sjdp
38433965Sjdp  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
38533965Sjdp     constructs like ``.loc 1 20''.  This was turning into ``.loc
38633965Sjdp     120''.  States 9 and 10 ensure that a space is never dropped in
38789857Sobrien     between characters which could appear in an identifier.  Ian
38833965Sjdp     Taylor, ian@cygnus.com.
38933965Sjdp
39033965Sjdp     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
39133965Sjdp     correctly on the PA (and any other target where colons are optional).
39238889Sjdp     Jeff Law, law@cs.utah.edu.
39333965Sjdp
39438889Sjdp     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
39538889Sjdp     get squashed into "cmp r1,r2||trap#1", with the all important space
39638889Sjdp     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
39738889Sjdp
39833965Sjdp  /* This macro gets the next input character.  */
39933965Sjdp
40060484Sobrien#define GET()							\
40160484Sobrien  (from < fromend						\
40260484Sobrien   ? * (unsigned char *) (from++)				\
40360484Sobrien   : (saved_input = NULL,					\
40460484Sobrien      fromlen = (*get) (input_buffer, sizeof input_buffer),	\
40560484Sobrien      from = input_buffer,					\
40660484Sobrien      fromend = from + fromlen,					\
40760484Sobrien      (fromlen == 0						\
40860484Sobrien       ? EOF							\
40960484Sobrien       : * (unsigned char *) (from++))))
41033965Sjdp
41133965Sjdp  /* This macro pushes a character back on the input stream.  */
41233965Sjdp
41333965Sjdp#define UNGET(uch) (*--from = (uch))
41433965Sjdp
41533965Sjdp  /* This macro puts a character into the output buffer.  If this
41633965Sjdp     character fills the output buffer, this macro jumps to the label
41733965Sjdp     TOFULL.  We use this rather ugly approach because we need to
41833965Sjdp     handle two different termination conditions: EOF on the input
41933965Sjdp     stream, and a full output buffer.  It would be simpler if we
42033965Sjdp     always read in the entire input stream before processing it, but
42133965Sjdp     I don't want to make such a significant change to the assembler's
42233965Sjdp     memory usage.  */
42333965Sjdp
424104834Sobrien#define PUT(pch)				\
425104834Sobrien  do						\
426104834Sobrien    {						\
427104834Sobrien      *to++ = (pch);				\
428104834Sobrien      if (to >= toend)				\
429104834Sobrien	goto tofull;				\
430104834Sobrien    }						\
43133965Sjdp  while (0)
43233965Sjdp
43333965Sjdp  if (saved_input != NULL)
43433965Sjdp    {
43533965Sjdp      from = saved_input;
43633965Sjdp      fromend = from + saved_input_len;
43733965Sjdp    }
43833965Sjdp  else
43933965Sjdp    {
44060484Sobrien      fromlen = (*get) (input_buffer, sizeof input_buffer);
44133965Sjdp      if (fromlen == 0)
44233965Sjdp	return 0;
44360484Sobrien      from = input_buffer;
44433965Sjdp      fromend = from + fromlen;
44533965Sjdp    }
44633965Sjdp
44733965Sjdp  while (1)
44833965Sjdp    {
44933965Sjdp      /* The cases in this switch end with continue, in order to
450130561Sobrien	 branch back to the top of this while loop and generate the
451130561Sobrien	 next output character in the appropriate state.  */
45233965Sjdp      switch (state)
45333965Sjdp	{
45433965Sjdp	case -1:
45533965Sjdp	  ch = *out_string++;
45633965Sjdp	  if (*out_string == '\0')
45733965Sjdp	    {
45833965Sjdp	      state = old_state;
45933965Sjdp	      old_state = 3;
46033965Sjdp	    }
46133965Sjdp	  PUT (ch);
46233965Sjdp	  continue;
46333965Sjdp
46433965Sjdp	case -2:
46533965Sjdp	  for (;;)
46633965Sjdp	    {
46733965Sjdp	      do
46833965Sjdp		{
46933965Sjdp		  ch = GET ();
47033965Sjdp
47133965Sjdp		  if (ch == EOF)
47233965Sjdp		    {
47360484Sobrien		      as_warn (_("end of file in comment"));
47433965Sjdp		      goto fromeof;
47533965Sjdp		    }
47633965Sjdp
47733965Sjdp		  if (ch == '\n')
47833965Sjdp		    PUT ('\n');
47933965Sjdp		}
48033965Sjdp	      while (ch != '*');
48133965Sjdp
48233965Sjdp	      while ((ch = GET ()) == '*')
48333965Sjdp		;
48433965Sjdp
48533965Sjdp	      if (ch == EOF)
48633965Sjdp		{
48760484Sobrien		  as_warn (_("end of file in comment"));
48833965Sjdp		  goto fromeof;
48933965Sjdp		}
49033965Sjdp
49133965Sjdp	      if (ch == '/')
49233965Sjdp		break;
49333965Sjdp
49433965Sjdp	      UNGET (ch);
49533965Sjdp	    }
49633965Sjdp
49733965Sjdp	  state = old_state;
49833965Sjdp	  UNGET (' ');
49933965Sjdp	  continue;
50033965Sjdp
50133965Sjdp	case 4:
50233965Sjdp	  ch = GET ();
50333965Sjdp	  if (ch == EOF)
50433965Sjdp	    goto fromeof;
50533965Sjdp	  else if (ch >= '0' && ch <= '9')
50633965Sjdp	    PUT (ch);
50733965Sjdp	  else
50833965Sjdp	    {
50933965Sjdp	      while (ch != EOF && IS_WHITESPACE (ch))
51033965Sjdp		ch = GET ();
51133965Sjdp	      if (ch == '"')
51233965Sjdp		{
513218822Sdim		  quotechar = ch;
514218822Sdim		  state = 5;
515218822Sdim		  old_state = 3;
516218822Sdim		  PUT (ch);
51733965Sjdp		}
51833965Sjdp	      else
51933965Sjdp		{
52033965Sjdp		  while (ch != EOF && ch != '\n')
52133965Sjdp		    ch = GET ();
52233965Sjdp		  state = 0;
52333965Sjdp		  PUT (ch);
52433965Sjdp		}
52533965Sjdp	    }
52633965Sjdp	  continue;
52733965Sjdp
52833965Sjdp	case 5:
52933965Sjdp	  /* We are going to copy everything up to a quote character,
530130561Sobrien	     with special handling for a backslash.  We try to
531130561Sobrien	     optimize the copying in the simple case without using the
532130561Sobrien	     GET and PUT macros.  */
53333965Sjdp	  {
53433965Sjdp	    char *s;
53533965Sjdp	    int len;
53633965Sjdp
53733965Sjdp	    for (s = from; s < fromend; s++)
53833965Sjdp	      {
53933965Sjdp		ch = *s;
54033965Sjdp		if (ch == '\\'
541218822Sdim		    || ch == quotechar
54233965Sjdp		    || ch == '\n')
54333965Sjdp		  break;
54433965Sjdp	      }
54533965Sjdp	    len = s - from;
54633965Sjdp	    if (len > toend - to)
54733965Sjdp	      len = toend - to;
54833965Sjdp	    if (len > 0)
54933965Sjdp	      {
55033965Sjdp		memcpy (to, from, len);
55133965Sjdp		to += len;
55233965Sjdp		from += len;
553218822Sdim		if (to >= toend)
554218822Sdim		  goto tofull;
55533965Sjdp	      }
55633965Sjdp	  }
55733965Sjdp
55833965Sjdp	  ch = GET ();
55933965Sjdp	  if (ch == EOF)
56033965Sjdp	    {
561218822Sdim	      as_warn (_("end of file in string; '%c' inserted"), quotechar);
56233965Sjdp	      state = old_state;
56333965Sjdp	      UNGET ('\n');
564218822Sdim	      PUT (quotechar);
56533965Sjdp	    }
566218822Sdim	  else if (ch == quotechar)
56733965Sjdp	    {
56833965Sjdp	      state = old_state;
56933965Sjdp	      PUT (ch);
57033965Sjdp	    }
57133965Sjdp#ifndef NO_STRING_ESCAPES
57233965Sjdp	  else if (ch == '\\')
57333965Sjdp	    {
57433965Sjdp	      state = 6;
57533965Sjdp	      PUT (ch);
57633965Sjdp	    }
57733965Sjdp#endif
57833965Sjdp	  else if (scrub_m68k_mri && ch == '\n')
57933965Sjdp	    {
58033965Sjdp	      /* Just quietly terminate the string.  This permits lines like
581130561Sobrien		   bne	label	loop if we haven't reach end yet.  */
58233965Sjdp	      state = old_state;
58333965Sjdp	      UNGET (ch);
58433965Sjdp	      PUT ('\'');
58533965Sjdp	    }
58633965Sjdp	  else
58733965Sjdp	    {
58833965Sjdp	      PUT (ch);
58933965Sjdp	    }
59033965Sjdp	  continue;
59133965Sjdp
59233965Sjdp	case 6:
59333965Sjdp	  state = 5;
59433965Sjdp	  ch = GET ();
59533965Sjdp	  switch (ch)
59633965Sjdp	    {
59733965Sjdp	      /* Handle strings broken across lines, by turning '\n' into
59833965Sjdp		 '\\' and 'n'.  */
59933965Sjdp	    case '\n':
60033965Sjdp	      UNGET ('n');
60133965Sjdp	      add_newlines++;
60233965Sjdp	      PUT ('\\');
60333965Sjdp	      continue;
60433965Sjdp
605130561Sobrien	    case EOF:
606218822Sdim	      as_warn (_("end of file in string; '%c' inserted"), quotechar);
607218822Sdim	      PUT (quotechar);
608130561Sobrien	      continue;
609130561Sobrien
61033965Sjdp	    case '"':
61133965Sjdp	    case '\\':
61233965Sjdp	    case 'b':
61333965Sjdp	    case 'f':
61433965Sjdp	    case 'n':
61533965Sjdp	    case 'r':
61633965Sjdp	    case 't':
61733965Sjdp	    case 'v':
61833965Sjdp	    case 'x':
61933965Sjdp	    case 'X':
62033965Sjdp	    case '0':
62133965Sjdp	    case '1':
62233965Sjdp	    case '2':
62333965Sjdp	    case '3':
62433965Sjdp	    case '4':
62533965Sjdp	    case '5':
62633965Sjdp	    case '6':
62733965Sjdp	    case '7':
62833965Sjdp	      break;
629130561Sobrien
63033965Sjdp	    default:
631130561Sobrien#ifdef ONLY_STANDARD_ESCAPES
63289857Sobrien	      as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
633130561Sobrien#endif
63433965Sjdp	      break;
63533965Sjdp	    }
63633965Sjdp	  PUT (ch);
63733965Sjdp	  continue;
63833965Sjdp
639130561Sobrien#ifdef DOUBLEBAR_PARALLEL
640130561Sobrien	case 13:
641130561Sobrien	  ch = GET ();
642130561Sobrien	  if (ch != '|')
643130561Sobrien	    abort ();
644130561Sobrien
645130561Sobrien	  /* Reset back to state 1 and pretend that we are parsing a
646130561Sobrien	     line from just after the first white space.  */
647130561Sobrien	  state = 1;
648130561Sobrien	  PUT ('|');
649130561Sobrien	  continue;
650130561Sobrien#endif
651218822Sdim#ifdef TC_Z80
652218822Sdim	case 16:
653218822Sdim	  /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
654218822Sdim	  ch = GET ();
655218822Sdim	  if (ch == 'f' || ch == 'F')
656218822Sdim	    {
657218822Sdim	      state = 17;
658218822Sdim	      PUT (ch);
659218822Sdim	    }
660218822Sdim	  else
661218822Sdim	    {
662218822Sdim	      state = 9;
663218822Sdim	      break;
664218822Sdim	    }
665218822Sdim	case 17:
666218822Sdim	  /* We have seen "af" at the start of a symbol,
667218822Sdim	     a ' here is a part of that symbol.  */
668218822Sdim	  ch = GET ();
669218822Sdim	  state = 9;
670218822Sdim	  if (ch == '\'')
671218822Sdim	    /* Change to avoid warning about unclosed string.  */
672218822Sdim	    PUT ('`');
673218822Sdim	  else
674218822Sdim	    UNGET (ch);
675218822Sdim	  break;
676218822Sdim#endif
67733965Sjdp	}
67833965Sjdp
679130561Sobrien      /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
68033965Sjdp
68133965Sjdp      /* flushchar: */
68233965Sjdp      ch = GET ();
68333965Sjdp
684130561Sobrien#ifdef TC_IA64
685130561Sobrien      if (ch == '(' && (state == 0 || state == 1))
686130561Sobrien	{
687130561Sobrien	  state += 14;
688130561Sobrien	  PUT (ch);
689130561Sobrien	  continue;
690130561Sobrien	}
691130561Sobrien      else if (state == 14 || state == 15)
692130561Sobrien	{
693130561Sobrien	  if (ch == ')')
694130561Sobrien	    {
695130561Sobrien	      state -= 14;
696130561Sobrien	      PUT (ch);
697130561Sobrien	      ch = GET ();
698130561Sobrien	    }
699130561Sobrien	  else
700130561Sobrien	    {
701130561Sobrien	      PUT (ch);
702130561Sobrien	      continue;
703130561Sobrien	    }
704130561Sobrien	}
705130561Sobrien#endif
706130561Sobrien
70733965Sjdp    recycle:
70833965Sjdp
70960484Sobrien#if defined TC_ARM && defined OBJ_ELF
71060484Sobrien      /* We need to watch out for .symver directives.  See the comment later
71160484Sobrien	 in this function.  */
71260484Sobrien      if (symver_state == NULL)
71360484Sobrien	{
71460484Sobrien	  if ((state == 0 || state == 1) && ch == symver_pseudo[0])
71560484Sobrien	    symver_state = symver_pseudo + 1;
71660484Sobrien	}
71760484Sobrien      else
71860484Sobrien	{
71960484Sobrien	  /* We advance to the next state if we find the right
72060484Sobrien	     character.  */
72160484Sobrien	  if (ch != '\0' && (*symver_state == ch))
72260484Sobrien	    ++symver_state;
72360484Sobrien	  else if (*symver_state != '\0')
72460484Sobrien	    /* We did not get the expected character, or we didn't
72560484Sobrien	       get a valid terminating character after seeing the
72660484Sobrien	       entire pseudo-op, so we must go back to the beginning.  */
72760484Sobrien	    symver_state = NULL;
72860484Sobrien	  else
72960484Sobrien	    {
73060484Sobrien	      /* We've read the entire pseudo-op.  If this is the end
73160484Sobrien		 of the line, go back to the beginning.  */
73260484Sobrien	      if (IS_NEWLINE (ch))
73360484Sobrien		symver_state = NULL;
73460484Sobrien	    }
73560484Sobrien	}
73660484Sobrien#endif /* TC_ARM && OBJ_ELF */
73760484Sobrien
73833965Sjdp#ifdef TC_M68K
73933965Sjdp      /* We want to have pseudo-ops which control whether we are in
740130561Sobrien	 MRI mode or not.  Unfortunately, since m68k MRI mode affects
741130561Sobrien	 the scrubber, that means that we need a special purpose
742130561Sobrien	 recognizer here.  */
74333965Sjdp      if (mri_state == NULL)
74433965Sjdp	{
74533965Sjdp	  if ((state == 0 || state == 1)
74633965Sjdp	      && ch == mri_pseudo[0])
74733965Sjdp	    mri_state = mri_pseudo + 1;
74833965Sjdp	}
74933965Sjdp      else
75033965Sjdp	{
75133965Sjdp	  /* We advance to the next state if we find the right
75233965Sjdp	     character, or if we need a space character and we get any
75333965Sjdp	     whitespace character, or if we need a '0' and we get a
75433965Sjdp	     '1' (this is so that we only need one state to handle
75533965Sjdp	     ``.mri 0'' and ``.mri 1'').  */
75633965Sjdp	  if (ch != '\0'
75733965Sjdp	      && (*mri_state == ch
75833965Sjdp		  || (*mri_state == ' '
75933965Sjdp		      && lex[ch] == LEX_IS_WHITESPACE)
76033965Sjdp		  || (*mri_state == '0'
76133965Sjdp		      && ch == '1')))
76233965Sjdp	    {
76333965Sjdp	      mri_last_ch = ch;
76433965Sjdp	      ++mri_state;
76533965Sjdp	    }
76633965Sjdp	  else if (*mri_state != '\0'
76733965Sjdp		   || (lex[ch] != LEX_IS_WHITESPACE
76833965Sjdp		       && lex[ch] != LEX_IS_NEWLINE))
76933965Sjdp	    {
77033965Sjdp	      /* We did not get the expected character, or we didn't
77133965Sjdp		 get a valid terminating character after seeing the
77233965Sjdp		 entire pseudo-op, so we must go back to the
77333965Sjdp		 beginning.  */
77433965Sjdp	      mri_state = NULL;
77533965Sjdp	    }
77633965Sjdp	  else
77733965Sjdp	    {
77833965Sjdp	      /* We've read the entire pseudo-op.  mips_last_ch is
779130561Sobrien		 either '0' or '1' indicating whether to enter or
780130561Sobrien		 leave MRI mode.  */
78133965Sjdp	      do_scrub_begin (mri_last_ch == '1');
78238889Sjdp	      mri_state = NULL;
78333965Sjdp
78433965Sjdp	      /* We continue handling the character as usual.  The
785130561Sobrien		 main gas reader must also handle the .mri pseudo-op
786130561Sobrien		 to control expression parsing and the like.  */
78733965Sjdp	    }
78833965Sjdp	}
78933965Sjdp#endif
79033965Sjdp
79133965Sjdp      if (ch == EOF)
79233965Sjdp	{
79333965Sjdp	  if (state != 0)
79433965Sjdp	    {
79560484Sobrien	      as_warn (_("end of file not at end of a line; newline inserted"));
79633965Sjdp	      state = 0;
79733965Sjdp	      PUT ('\n');
79833965Sjdp	    }
79933965Sjdp	  goto fromeof;
80033965Sjdp	}
80133965Sjdp
80233965Sjdp      switch (lex[ch])
80333965Sjdp	{
80433965Sjdp	case LEX_IS_WHITESPACE:
80533965Sjdp	  do
80633965Sjdp	    {
80733965Sjdp	      ch = GET ();
80833965Sjdp	    }
80933965Sjdp	  while (ch != EOF && IS_WHITESPACE (ch));
81033965Sjdp	  if (ch == EOF)
81133965Sjdp	    goto fromeof;
81233965Sjdp
81333965Sjdp	  if (state == 0)
81433965Sjdp	    {
81533965Sjdp	      /* Preserve a single whitespace character at the
81633965Sjdp		 beginning of a line.  */
81733965Sjdp	      state = 1;
81833965Sjdp	      UNGET (ch);
81933965Sjdp	      PUT (' ');
82033965Sjdp	      break;
82133965Sjdp	    }
82233965Sjdp
82360484Sobrien#ifdef KEEP_WHITE_AROUND_COLON
82477298Sobrien	  if (lex[ch] == LEX_IS_COLON)
82577298Sobrien	    {
82677298Sobrien	      /* Only keep this white if there's no white *after* the
827130561Sobrien		 colon.  */
82877298Sobrien	      ch2 = GET ();
82977298Sobrien	      UNGET (ch2);
83077298Sobrien	      if (!IS_WHITESPACE (ch2))
83177298Sobrien		{
83277298Sobrien		  state = 9;
83377298Sobrien		  UNGET (ch);
83477298Sobrien		  PUT (' ');
83577298Sobrien		  break;
83677298Sobrien		}
83777298Sobrien	    }
83860484Sobrien#endif
83933965Sjdp	  if (IS_COMMENT (ch)
84033965Sjdp	      || ch == '/'
84189857Sobrien	      || IS_LINE_SEPARATOR (ch)
84289857Sobrien	      || IS_PARALLEL_SEPARATOR (ch))
84333965Sjdp	    {
84433965Sjdp	      if (scrub_m68k_mri)
84533965Sjdp		{
84633965Sjdp		  /* In MRI mode, we keep these spaces.  */
84733965Sjdp		  UNGET (ch);
84833965Sjdp		  PUT (' ');
84933965Sjdp		  break;
85033965Sjdp		}
85133965Sjdp	      goto recycle;
85233965Sjdp	    }
85333965Sjdp
85433965Sjdp	  /* If we're in state 2 or 11, we've seen a non-white
85533965Sjdp	     character followed by whitespace.  If the next character
85633965Sjdp	     is ':', this is whitespace after a label name which we
85733965Sjdp	     normally must ignore.  In MRI mode, though, spaces are
85833965Sjdp	     not permitted between the label and the colon.  */
85933965Sjdp	  if ((state == 2 || state == 11)
86033965Sjdp	      && lex[ch] == LEX_IS_COLON
86133965Sjdp	      && ! scrub_m68k_mri)
86233965Sjdp	    {
86333965Sjdp	      state = 1;
86433965Sjdp	      PUT (ch);
86533965Sjdp	      break;
86633965Sjdp	    }
86733965Sjdp
86833965Sjdp	  switch (state)
86933965Sjdp	    {
87033965Sjdp	    case 1:
87133965Sjdp	      /* We can arrive here if we leave a leading whitespace
87233965Sjdp		 character at the beginning of a line.  */
87333965Sjdp	      goto recycle;
87433965Sjdp	    case 2:
87533965Sjdp	      state = 3;
87633965Sjdp	      if (to + 1 < toend)
87733965Sjdp		{
87833965Sjdp		  /* Optimize common case by skipping UNGET/GET.  */
87933965Sjdp		  PUT (' ');	/* Sp after opco */
88033965Sjdp		  goto recycle;
88133965Sjdp		}
88233965Sjdp	      UNGET (ch);
88333965Sjdp	      PUT (' ');
88433965Sjdp	      break;
88533965Sjdp	    case 3:
88633965Sjdp	      if (scrub_m68k_mri)
88733965Sjdp		{
88833965Sjdp		  /* In MRI mode, we keep these spaces.  */
88933965Sjdp		  UNGET (ch);
89033965Sjdp		  PUT (' ');
89133965Sjdp		  break;
89233965Sjdp		}
89333965Sjdp	      goto recycle;	/* Sp in operands */
89433965Sjdp	    case 9:
89533965Sjdp	    case 10:
89633965Sjdp	      if (scrub_m68k_mri)
89733965Sjdp		{
89833965Sjdp		  /* In MRI mode, we keep these spaces.  */
89933965Sjdp		  state = 3;
90033965Sjdp		  UNGET (ch);
90133965Sjdp		  PUT (' ');
90233965Sjdp		  break;
90333965Sjdp		}
90433965Sjdp	      state = 10;	/* Sp after symbol char */
90533965Sjdp	      goto recycle;
90633965Sjdp	    case 11:
90760484Sobrien	      if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
90833965Sjdp		state = 1;
90933965Sjdp	      else
91033965Sjdp		{
91133965Sjdp		  /* We know that ch is not ':', since we tested that
912130561Sobrien		     case above.  Therefore this is not a label, so it
913130561Sobrien		     must be the opcode, and we've just seen the
914130561Sobrien		     whitespace after it.  */
91533965Sjdp		  state = 3;
91633965Sjdp		}
91733965Sjdp	      UNGET (ch);
91833965Sjdp	      PUT (' ');	/* Sp after label definition.  */
91933965Sjdp	      break;
92033965Sjdp	    default:
92133965Sjdp	      BAD_CASE (state);
92233965Sjdp	    }
92333965Sjdp	  break;
92433965Sjdp
92533965Sjdp	case LEX_IS_TWOCHAR_COMMENT_1ST:
92633965Sjdp	  ch2 = GET ();
92733965Sjdp	  if (ch2 == '*')
92833965Sjdp	    {
92933965Sjdp	      for (;;)
93033965Sjdp		{
93133965Sjdp		  do
93233965Sjdp		    {
93333965Sjdp		      ch2 = GET ();
93433965Sjdp		      if (ch2 != EOF && IS_NEWLINE (ch2))
93533965Sjdp			add_newlines++;
93633965Sjdp		    }
93733965Sjdp		  while (ch2 != EOF && ch2 != '*');
93833965Sjdp
93933965Sjdp		  while (ch2 == '*')
94033965Sjdp		    ch2 = GET ();
94133965Sjdp
94233965Sjdp		  if (ch2 == EOF || ch2 == '/')
94333965Sjdp		    break;
94433965Sjdp
94533965Sjdp		  /* This UNGET will ensure that we count newlines
946130561Sobrien		     correctly.  */
94733965Sjdp		  UNGET (ch2);
94833965Sjdp		}
94933965Sjdp
95033965Sjdp	      if (ch2 == EOF)
95160484Sobrien		as_warn (_("end of file in multiline comment"));
95233965Sjdp
95333965Sjdp	      ch = ' ';
95433965Sjdp	      goto recycle;
95533965Sjdp	    }
95677298Sobrien#ifdef DOUBLESLASH_LINE_COMMENTS
95777298Sobrien	  else if (ch2 == '/')
95877298Sobrien	    {
95977298Sobrien	      do
96077298Sobrien		{
96177298Sobrien		  ch = GET ();
96277298Sobrien		}
96377298Sobrien	      while (ch != EOF && !IS_NEWLINE (ch));
96477298Sobrien	      if (ch == EOF)
96577298Sobrien		as_warn ("end of file in comment; newline inserted");
96677298Sobrien	      state = 0;
96777298Sobrien	      PUT ('\n');
96877298Sobrien	      break;
96977298Sobrien	    }
97077298Sobrien#endif
97133965Sjdp	  else
97233965Sjdp	    {
97333965Sjdp	      if (ch2 != EOF)
97433965Sjdp		UNGET (ch2);
97533965Sjdp	      if (state == 9 || state == 10)
97633965Sjdp		state = 3;
97733965Sjdp	      PUT (ch);
97833965Sjdp	    }
97933965Sjdp	  break;
98033965Sjdp
98133965Sjdp	case LEX_IS_STRINGQUOTE:
982218822Sdim	  quotechar = ch;
98333965Sjdp	  if (state == 10)
98433965Sjdp	    {
985130561Sobrien	      /* Preserve the whitespace in foo "bar".  */
98633965Sjdp	      UNGET (ch);
98733965Sjdp	      state = 3;
98833965Sjdp	      PUT (' ');
98933965Sjdp
99033965Sjdp	      /* PUT didn't jump out.  We could just break, but we
991130561Sobrien		 know what will happen, so optimize a bit.  */
99233965Sjdp	      ch = GET ();
99333965Sjdp	      old_state = 3;
99433965Sjdp	    }
99533965Sjdp	  else if (state == 9)
99633965Sjdp	    old_state = 3;
99733965Sjdp	  else
99833965Sjdp	    old_state = state;
99933965Sjdp	  state = 5;
100033965Sjdp	  PUT (ch);
100133965Sjdp	  break;
100233965Sjdp
100333965Sjdp#ifndef IEEE_STYLE
100433965Sjdp	case LEX_IS_ONECHAR_QUOTE:
100533965Sjdp	  if (state == 10)
100633965Sjdp	    {
1007130561Sobrien	      /* Preserve the whitespace in foo 'b'.  */
100833965Sjdp	      UNGET (ch);
100933965Sjdp	      state = 3;
101033965Sjdp	      PUT (' ');
101133965Sjdp	      break;
101233965Sjdp	    }
101333965Sjdp	  ch = GET ();
101433965Sjdp	  if (ch == EOF)
101533965Sjdp	    {
101660484Sobrien	      as_warn (_("end of file after a one-character quote; \\0 inserted"));
101733965Sjdp	      ch = 0;
101833965Sjdp	    }
101933965Sjdp	  if (ch == '\\')
102033965Sjdp	    {
102133965Sjdp	      ch = GET ();
102233965Sjdp	      if (ch == EOF)
102333965Sjdp		{
102460484Sobrien		  as_warn (_("end of file in escape character"));
102533965Sjdp		  ch = '\\';
102633965Sjdp		}
102733965Sjdp	      else
102833965Sjdp		ch = process_escape (ch);
102933965Sjdp	    }
103033965Sjdp	  sprintf (out_buf, "%d", (int) (unsigned char) ch);
103133965Sjdp
103233965Sjdp	  /* None of these 'x constants for us.  We want 'x'.  */
103333965Sjdp	  if ((ch = GET ()) != '\'')
103433965Sjdp	    {
103533965Sjdp#ifdef REQUIRE_CHAR_CLOSE_QUOTE
103689857Sobrien	      as_warn (_("missing close quote; (assumed)"));
103733965Sjdp#else
103833965Sjdp	      if (ch != EOF)
103933965Sjdp		UNGET (ch);
104033965Sjdp#endif
104133965Sjdp	    }
104233965Sjdp	  if (strlen (out_buf) == 1)
104333965Sjdp	    {
104433965Sjdp	      PUT (out_buf[0]);
104533965Sjdp	      break;
104633965Sjdp	    }
104733965Sjdp	  if (state == 9)
104833965Sjdp	    old_state = 3;
104933965Sjdp	  else
105033965Sjdp	    old_state = state;
105133965Sjdp	  state = -1;
105233965Sjdp	  out_string = out_buf;
105333965Sjdp	  PUT (*out_string++);
105433965Sjdp	  break;
105533965Sjdp#endif
105633965Sjdp
105733965Sjdp	case LEX_IS_COLON:
105860484Sobrien#ifdef KEEP_WHITE_AROUND_COLON
105977298Sobrien	  state = 9;
106060484Sobrien#else
106133965Sjdp	  if (state == 9 || state == 10)
106233965Sjdp	    state = 3;
106333965Sjdp	  else if (state != 3)
106433965Sjdp	    state = 1;
106560484Sobrien#endif
106633965Sjdp	  PUT (ch);
106733965Sjdp	  break;
106833965Sjdp
106933965Sjdp	case LEX_IS_NEWLINE:
107033965Sjdp	  /* Roll out a bunch of newlines from inside comments, etc.  */
107133965Sjdp	  if (add_newlines)
107233965Sjdp	    {
107333965Sjdp	      --add_newlines;
107433965Sjdp	      UNGET (ch);
107533965Sjdp	    }
107677298Sobrien	  /* Fall through.  */
107733965Sjdp
107833965Sjdp	case LEX_IS_LINE_SEPARATOR:
107933965Sjdp	  state = 0;
108033965Sjdp	  PUT (ch);
108133965Sjdp	  break;
108233965Sjdp
108389857Sobrien	case LEX_IS_PARALLEL_SEPARATOR:
108489857Sobrien	  state = 1;
108589857Sobrien	  PUT (ch);
108689857Sobrien	  break;
108789857Sobrien
108838889Sjdp#ifdef TC_V850
108938889Sjdp	case LEX_IS_DOUBLEDASH_1ST:
109077298Sobrien	  ch2 = GET ();
109138889Sjdp	  if (ch2 != '-')
109238889Sjdp	    {
109338889Sjdp	      UNGET (ch2);
109438889Sjdp	      goto de_fault;
109538889Sjdp	    }
109677298Sobrien	  /* Read and skip to end of line.  */
109738889Sjdp	  do
109838889Sjdp	    {
109938889Sjdp	      ch = GET ();
110038889Sjdp	    }
110138889Sjdp	  while (ch != EOF && ch != '\n');
1102130561Sobrien
110338889Sjdp	  if (ch == EOF)
1104130561Sobrien	    as_warn (_("end of file in comment; newline inserted"));
1105130561Sobrien
110638889Sjdp	  state = 0;
110738889Sjdp	  PUT ('\n');
110838889Sjdp	  break;
110977298Sobrien#endif
111060484Sobrien#ifdef DOUBLEBAR_PARALLEL
111138889Sjdp	case LEX_IS_DOUBLEBAR_1ST:
111277298Sobrien	  ch2 = GET ();
1113130561Sobrien	  UNGET (ch2);
111438889Sjdp	  if (ch2 != '|')
1115130561Sobrien	    goto de_fault;
1116130561Sobrien
1117130561Sobrien	  /* Handle '||' in two states as invoking PUT twice might
1118130561Sobrien	     result in the first one jumping out of this loop.  We'd
1119130561Sobrien	     then lose track of the state and one '|' char.  */
1120130561Sobrien	  state = 13;
112138889Sjdp	  PUT ('|');
112238889Sjdp	  break;
112377298Sobrien#endif
112433965Sjdp	case LEX_IS_LINE_COMMENT_START:
112533965Sjdp	  /* FIXME-someday: The two character comment stuff was badly
112633965Sjdp	     thought out.  On i386, we want '/' as line comment start
112733965Sjdp	     AND we want C style comments.  hence this hack.  The
112833965Sjdp	     whole lexical process should be reworked.  xoxorich.  */
112933965Sjdp	  if (ch == '/')
113033965Sjdp	    {
113133965Sjdp	      ch2 = GET ();
113233965Sjdp	      if (ch2 == '*')
113333965Sjdp		{
113433965Sjdp		  old_state = 3;
113533965Sjdp		  state = -2;
113633965Sjdp		  break;
113733965Sjdp		}
113833965Sjdp	      else
113933965Sjdp		{
114033965Sjdp		  UNGET (ch2);
114133965Sjdp		}
1142130561Sobrien	    }
114333965Sjdp
114433965Sjdp	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
114533965Sjdp	    {
114633965Sjdp	      int startch;
114733965Sjdp
114833965Sjdp	      startch = ch;
114933965Sjdp
115033965Sjdp	      do
115133965Sjdp		{
115233965Sjdp		  ch = GET ();
115333965Sjdp		}
115433965Sjdp	      while (ch != EOF && IS_WHITESPACE (ch));
1155130561Sobrien
115633965Sjdp	      if (ch == EOF)
115733965Sjdp		{
115860484Sobrien		  as_warn (_("end of file in comment; newline inserted"));
115933965Sjdp		  PUT ('\n');
116033965Sjdp		  break;
116133965Sjdp		}
1162130561Sobrien
116333965Sjdp	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
116433965Sjdp		{
116533965Sjdp		  /* Not a cpp line.  */
116633965Sjdp		  while (ch != EOF && !IS_NEWLINE (ch))
116733965Sjdp		    ch = GET ();
116833965Sjdp		  if (ch == EOF)
116989857Sobrien		    as_warn (_("end of file in comment; newline inserted"));
117033965Sjdp		  state = 0;
117133965Sjdp		  PUT ('\n');
117233965Sjdp		  break;
117333965Sjdp		}
117477298Sobrien	      /* Looks like `# 123 "filename"' from cpp.  */
117533965Sjdp	      UNGET (ch);
117633965Sjdp	      old_state = 4;
117733965Sjdp	      state = -1;
117833965Sjdp	      if (scrub_m68k_mri)
1179218822Sdim		out_string = "\tlinefile ";
118033965Sjdp	      else
1181218822Sdim		out_string = "\t.linefile ";
118233965Sjdp	      PUT (*out_string++);
118333965Sjdp	      break;
118433965Sjdp	    }
118533965Sjdp
118638889Sjdp#ifdef TC_D10V
118738889Sjdp	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
118838889Sjdp	     Trap is the only short insn that has a first operand that is
118938889Sjdp	     neither register nor label.
119038889Sjdp	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
119177298Sobrien	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
119277298Sobrien	     already LEX_IS_LINE_COMMENT_START.  However, it is the
119377298Sobrien	     only character in line_comment_chars for d10v, hence we
119477298Sobrien	     can recognize it as such.  */
119538889Sjdp	  /* An alternative approach would be to reset the state to 1 when
119638889Sjdp	     we see '||', '<'- or '->', but that seems to be overkill.  */
119777298Sobrien	  if (state == 10)
119877298Sobrien	    PUT (' ');
119938889Sjdp#endif
120033965Sjdp	  /* We have a line comment character which is not at the
120133965Sjdp	     start of a line.  If this is also a normal comment
120233965Sjdp	     character, fall through.  Otherwise treat it as a default
120333965Sjdp	     character.  */
120433965Sjdp	  if (strchr (tc_comment_chars, ch) == NULL
120533965Sjdp	      && (! scrub_m68k_mri
120633965Sjdp		  || (ch != '!' && ch != '*')))
120733965Sjdp	    goto de_fault;
120833965Sjdp	  if (scrub_m68k_mri
120933965Sjdp	      && (ch == '!' || ch == '*' || ch == '#')
121033965Sjdp	      && state != 1
121133965Sjdp	      && state != 10)
121233965Sjdp	    goto de_fault;
121333965Sjdp	  /* Fall through.  */
121433965Sjdp	case LEX_IS_COMMENT_START:
121560484Sobrien#if defined TC_ARM && defined OBJ_ELF
121660484Sobrien	  /* On the ARM, `@' is the comment character.
121760484Sobrien	     Unfortunately this is also a special character in ELF .symver
121877298Sobrien	     directives (and .type, though we deal with those another way).
121977298Sobrien	     So we check if this line is such a directive, and treat
122077298Sobrien	     the character as default if so.  This is a hack.  */
122160484Sobrien	  if ((symver_state != NULL) && (*symver_state == 0))
122260484Sobrien	    goto de_fault;
122360484Sobrien#endif
1224218822Sdim
1225218822Sdim#ifdef TC_ARM
1226218822Sdim	  /* For the ARM, care is needed not to damage occurrences of \@
1227218822Sdim	     by stripping the @ onwards.  Yuck.  */
1228218822Sdim	  if (to > tostart && *(to - 1) == '\\')
1229218822Sdim	    /* Do not treat the @ as a start-of-comment.  */
1230218822Sdim	    goto de_fault;
1231218822Sdim#endif
1232218822Sdim
123377298Sobrien#ifdef WARN_COMMENTS
123477298Sobrien	  if (!found_comment)
123577298Sobrien	    as_where (&found_comment_file, &found_comment);
123677298Sobrien#endif
123733965Sjdp	  do
123833965Sjdp	    {
123933965Sjdp	      ch = GET ();
124033965Sjdp	    }
124133965Sjdp	  while (ch != EOF && !IS_NEWLINE (ch));
124233965Sjdp	  if (ch == EOF)
124360484Sobrien	    as_warn (_("end of file in comment; newline inserted"));
124433965Sjdp	  state = 0;
124533965Sjdp	  PUT ('\n');
124633965Sjdp	  break;
124733965Sjdp
124833965Sjdp	case LEX_IS_SYMBOL_COMPONENT:
124933965Sjdp	  if (state == 10)
125033965Sjdp	    {
125133965Sjdp	      /* This is a symbol character following another symbol
125233965Sjdp		 character, with whitespace in between.  We skipped
125333965Sjdp		 the whitespace earlier, so output it now.  */
125433965Sjdp	      UNGET (ch);
125533965Sjdp	      state = 3;
125633965Sjdp	      PUT (' ');
125733965Sjdp	      break;
125833965Sjdp	    }
125933965Sjdp
1260218822Sdim#ifdef TC_Z80
1261218822Sdim	  /* "af'" is a symbol containing '\''.  */
1262218822Sdim	  if (state == 3 && (ch == 'a' || ch == 'A'))
1263218822Sdim	    {
1264218822Sdim	      state = 16;
1265218822Sdim	      PUT (ch);
1266218822Sdim	      ch = GET ();
1267218822Sdim	      if (ch == 'f' || ch == 'F')
1268218822Sdim		{
1269218822Sdim		  state = 17;
1270218822Sdim		  PUT (ch);
1271218822Sdim		  break;
1272218822Sdim		}
1273218822Sdim	      else
1274218822Sdim		{
1275218822Sdim		  state = 9;
1276218822Sdim		  if (!IS_SYMBOL_COMPONENT (ch))
1277218822Sdim		    {
1278218822Sdim		      UNGET (ch);
1279218822Sdim		      break;
1280218822Sdim		    }
1281218822Sdim		}
1282218822Sdim	    }
1283218822Sdim#endif
128433965Sjdp	  if (state == 3)
128533965Sjdp	    state = 9;
128633965Sjdp
128733965Sjdp	  /* This is a common case.  Quickly copy CH and all the
1288130561Sobrien	     following symbol component or normal characters.  */
128960484Sobrien	  if (to + 1 < toend
129060484Sobrien	      && mri_state == NULL
129160484Sobrien#if defined TC_ARM && defined OBJ_ELF
129260484Sobrien	      && symver_state == NULL
129360484Sobrien#endif
129460484Sobrien	      )
129533965Sjdp	    {
129633965Sjdp	      char *s;
129733965Sjdp	      int len;
129833965Sjdp
129933965Sjdp	      for (s = from; s < fromend; s++)
130033965Sjdp		{
130133965Sjdp		  int type;
130233965Sjdp
130377298Sobrien		  ch2 = *(unsigned char *) s;
130433965Sjdp		  type = lex[ch2];
130533965Sjdp		  if (type != 0
130633965Sjdp		      && type != LEX_IS_SYMBOL_COMPONENT)
130733965Sjdp		    break;
130833965Sjdp		}
1309130561Sobrien
131033965Sjdp	      if (s > from)
1311130561Sobrien		/* Handle the last character normally, for
1312130561Sobrien		   simplicity.  */
1313130561Sobrien		--s;
1314130561Sobrien
131533965Sjdp	      len = s - from;
1316130561Sobrien
131733965Sjdp	      if (len > (toend - to) - 1)
131833965Sjdp		len = (toend - to) - 1;
1319130561Sobrien
132033965Sjdp	      if (len > 0)
132133965Sjdp		{
132233965Sjdp		  PUT (ch);
1323218822Sdim		  memcpy (to, from, len);
1324218822Sdim		  to += len;
1325218822Sdim		  from += len;
1326218822Sdim		  if (to >= toend)
1327218822Sdim		    goto tofull;
132833965Sjdp		  ch = GET ();
132933965Sjdp		}
133033965Sjdp	    }
133133965Sjdp
133233965Sjdp	  /* Fall through.  */
133333965Sjdp	default:
133433965Sjdp	de_fault:
133533965Sjdp	  /* Some relatively `normal' character.  */
133633965Sjdp	  if (state == 0)
133733965Sjdp	    {
1338130561Sobrien	      state = 11;	/* Now seeing label definition.  */
133933965Sjdp	    }
134033965Sjdp	  else if (state == 1)
134133965Sjdp	    {
1342130561Sobrien	      state = 2;	/* Ditto.  */
134333965Sjdp	    }
134433965Sjdp	  else if (state == 9)
134533965Sjdp	    {
1346130561Sobrien	      if (!IS_SYMBOL_COMPONENT (ch))
134733965Sjdp		state = 3;
134833965Sjdp	    }
134933965Sjdp	  else if (state == 10)
135033965Sjdp	    {
135160484Sobrien	      if (ch == '\\')
135260484Sobrien		{
135360484Sobrien		  /* Special handling for backslash: a backslash may
135460484Sobrien		     be the beginning of a formal parameter (of a
135560484Sobrien		     macro) following another symbol character, with
135660484Sobrien		     whitespace in between.  If that is the case, we
135760484Sobrien		     output a space before the parameter.  Strictly
135860484Sobrien		     speaking, correct handling depends upon what the
135960484Sobrien		     macro parameter expands into; if the parameter
136060484Sobrien		     expands into something which does not start with
136160484Sobrien		     an operand character, then we don't want to keep
136260484Sobrien		     the space.  We don't have enough information to
136360484Sobrien		     make the right choice, so here we are making the
136460484Sobrien		     choice which is more likely to be correct.  */
1365218822Sdim		  if (to + 1 >= toend)
1366218822Sdim		    {
1367218822Sdim		      /* If we're near the end of the buffer, save the
1368218822Sdim		         character for the next time round.  Otherwise
1369218822Sdim		         we'll lose our state.  */
1370218822Sdim		      UNGET (ch);
1371218822Sdim		      goto tofull;
1372218822Sdim		    }
1373218822Sdim		  *to++ = ' ';
137460484Sobrien		}
137560484Sobrien
137633965Sjdp	      state = 3;
137733965Sjdp	    }
137833965Sjdp	  PUT (ch);
137933965Sjdp	  break;
138033965Sjdp	}
138133965Sjdp    }
138233965Sjdp
138333965Sjdp  /*NOTREACHED*/
138433965Sjdp
138533965Sjdp fromeof:
138633965Sjdp  /* We have reached the end of the input.  */
138733965Sjdp  return to - tostart;
138833965Sjdp
138933965Sjdp tofull:
139033965Sjdp  /* The output buffer is full.  Save any input we have not yet
139133965Sjdp     processed.  */
139233965Sjdp  if (fromend > from)
139333965Sjdp    {
139460484Sobrien      saved_input = from;
139533965Sjdp      saved_input_len = fromend - from;
139633965Sjdp    }
139733965Sjdp  else
139860484Sobrien    saved_input = NULL;
139960484Sobrien
140033965Sjdp  return to - tostart;
140133965Sjdp}
140233965Sjdp
1403