app.c revision 1.1.1.3
1/* This is the Assembler Pre-Processor
2   Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3   1999, 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2008, 2009, 2010, 2012
4   Free Software Foundation, Inc.
5
6   This file is part of GAS, the GNU Assembler.
7
8   GAS is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 3, or (at your option)
11   any later version.
12
13   GAS is distributed in the hope that it will be useful, but WITHOUT
14   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16   License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with GAS; see the file COPYING.  If not, write to the Free
20   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
21   02110-1301, USA.  */
22
23/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
24/* App, the assembler pre-processor.  This pre-processor strips out
25   excess spaces, turns single-quoted characters into a decimal
26   constant, and turns the # in # <number> <filename> <garbage> into a
27   .linefile.  This needs better error-handling.  */
28
29#include "as.h"
30
31#if (__STDC__ != 1)
32#ifndef const
33#define const  /* empty */
34#endif
35#endif
36
37#ifdef H_TICK_HEX
38int enable_h_tick_hex = 0;
39#endif
40
41#ifdef TC_M68K
42/* Whether we are scrubbing in m68k MRI mode.  This is different from
43   flag_m68k_mri, because the two flags will be affected by the .mri
44   pseudo-op at different times.  */
45static int scrub_m68k_mri;
46
47/* The pseudo-op which switches in and out of MRI mode.  See the
48   comment in do_scrub_chars.  */
49static const char mri_pseudo[] = ".mri 0";
50#else
51#define scrub_m68k_mri 0
52#endif
53
54#if defined TC_ARM && defined OBJ_ELF
55/* The pseudo-op for which we need to special-case `@' characters.
56   See the comment in do_scrub_chars.  */
57static const char   symver_pseudo[] = ".symver";
58static const char * symver_state;
59#endif
60
61static char lex[256];
62static const char symbol_chars[] =
63"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
64
65#define LEX_IS_SYMBOL_COMPONENT		1
66#define LEX_IS_WHITESPACE		2
67#define LEX_IS_LINE_SEPARATOR		3
68#define LEX_IS_COMMENT_START		4
69#define LEX_IS_LINE_COMMENT_START	5
70#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
71#define	LEX_IS_STRINGQUOTE		8
72#define	LEX_IS_COLON			9
73#define	LEX_IS_NEWLINE			10
74#define	LEX_IS_ONECHAR_QUOTE		11
75#ifdef TC_V850
76#define LEX_IS_DOUBLEDASH_1ST		12
77#endif
78#ifdef TC_M32R
79#define DOUBLEBAR_PARALLEL
80#endif
81#ifdef DOUBLEBAR_PARALLEL
82#define LEX_IS_DOUBLEBAR_1ST		13
83#endif
84#define LEX_IS_PARALLEL_SEPARATOR	14
85#ifdef H_TICK_HEX
86#define LEX_IS_H			15
87#endif
88#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
89#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
90#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
91#define IS_PARALLEL_SEPARATOR(c)	(lex[c] == LEX_IS_PARALLEL_SEPARATOR)
92#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
93#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
94#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
95
96static int process_escape (int);
97
98/* FIXME-soon: The entire lexer/parser thingy should be
99   built statically at compile time rather than dynamically
100   each and every time the assembler is run.  xoxorich.  */
101
102void
103do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
104{
105  const char *p;
106  int c;
107
108  lex[' '] = LEX_IS_WHITESPACE;
109  lex['\t'] = LEX_IS_WHITESPACE;
110  lex['\r'] = LEX_IS_WHITESPACE;
111  lex['\n'] = LEX_IS_NEWLINE;
112  lex[':'] = LEX_IS_COLON;
113
114#ifdef TC_M68K
115  scrub_m68k_mri = m68k_mri;
116
117  if (! m68k_mri)
118#endif
119    {
120      lex['"'] = LEX_IS_STRINGQUOTE;
121
122#if ! defined (TC_HPPA) && ! defined (TC_I370)
123      /* I370 uses single-quotes to delimit integer, float constants.  */
124      lex['\''] = LEX_IS_ONECHAR_QUOTE;
125#endif
126
127#ifdef SINGLE_QUOTE_STRINGS
128      lex['\''] = LEX_IS_STRINGQUOTE;
129#endif
130    }
131
132  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
133     in state 5 of do_scrub_chars must be changed.  */
134
135  /* Note that these override the previous defaults, e.g. if ';' is a
136     comment char, then it isn't a line separator.  */
137  for (p = symbol_chars; *p; ++p)
138    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
139
140  for (c = 128; c < 256; ++c)
141    lex[c] = LEX_IS_SYMBOL_COMPONENT;
142
143#ifdef tc_symbol_chars
144  /* This macro permits the processor to specify all characters which
145     may appears in an operand.  This will prevent the scrubber from
146     discarding meaningful whitespace in certain cases.  The i386
147     backend uses this to support prefixes, which can confuse the
148     scrubber as to whether it is parsing operands or opcodes.  */
149  for (p = tc_symbol_chars; *p; ++p)
150    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
151#endif
152
153  /* The m68k backend wants to be able to change comment_chars.  */
154#ifndef tc_comment_chars
155#define tc_comment_chars comment_chars
156#endif
157  for (p = tc_comment_chars; *p; p++)
158    lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
159
160  for (p = line_comment_chars; *p; p++)
161    lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
162
163  for (p = line_separator_chars; *p; p++)
164    lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
165
166#ifdef tc_parallel_separator_chars
167  /* This macro permits the processor to specify all characters which
168     separate parallel insns on the same line.  */
169  for (p = tc_parallel_separator_chars; *p; p++)
170    lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
171#endif
172
173  /* Only allow slash-star comments if slash is not in use.
174     FIXME: This isn't right.  We should always permit them.  */
175  if (lex['/'] == 0)
176    lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
177
178#ifdef TC_M68K
179  if (m68k_mri)
180    {
181      lex['\''] = LEX_IS_STRINGQUOTE;
182      lex[';'] = LEX_IS_COMMENT_START;
183      lex['*'] = LEX_IS_LINE_COMMENT_START;
184      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
185	 then it can't be used in an expression.  */
186      lex['!'] = LEX_IS_LINE_COMMENT_START;
187    }
188#endif
189
190#ifdef TC_V850
191  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
192#endif
193#ifdef DOUBLEBAR_PARALLEL
194  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
195#endif
196#ifdef TC_D30V
197  /* Must do this is we want VLIW instruction with "->" or "<-".  */
198  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
199#endif
200
201#ifdef H_TICK_HEX
202  if (enable_h_tick_hex)
203    {
204      lex['h'] = LEX_IS_H;
205      lex['H'] = LEX_IS_H;
206    }
207#endif
208}
209
210/* Saved state of the scrubber.  */
211static int state;
212static int old_state;
213static char *out_string;
214static char out_buf[20];
215static int add_newlines;
216static char *saved_input;
217static size_t saved_input_len;
218static char input_buffer[32 * 1024];
219static const char *mri_state;
220static char mri_last_ch;
221
222/* Data structure for saving the state of app across #include's.  Note that
223   app is called asynchronously to the parsing of the .include's, so our
224   state at the time .include is interpreted is completely unrelated.
225   That's why we have to save it all.  */
226
227struct app_save
228{
229  int          state;
230  int          old_state;
231  char *       out_string;
232  char         out_buf[sizeof (out_buf)];
233  int          add_newlines;
234  char *       saved_input;
235  size_t       saved_input_len;
236#ifdef TC_M68K
237  int          scrub_m68k_mri;
238#endif
239  const char * mri_state;
240  char         mri_last_ch;
241#if defined TC_ARM && defined OBJ_ELF
242  const char * symver_state;
243#endif
244};
245
246char *
247app_push (void)
248{
249  register struct app_save *saved;
250
251  saved = (struct app_save *) xmalloc (sizeof (*saved));
252  saved->state = state;
253  saved->old_state = old_state;
254  saved->out_string = out_string;
255  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
256  saved->add_newlines = add_newlines;
257  if (saved_input == NULL)
258    saved->saved_input = NULL;
259  else
260    {
261      saved->saved_input = (char *) xmalloc (saved_input_len);
262      memcpy (saved->saved_input, saved_input, saved_input_len);
263      saved->saved_input_len = saved_input_len;
264    }
265#ifdef TC_M68K
266  saved->scrub_m68k_mri = scrub_m68k_mri;
267#endif
268  saved->mri_state = mri_state;
269  saved->mri_last_ch = mri_last_ch;
270#if defined TC_ARM && defined OBJ_ELF
271  saved->symver_state = symver_state;
272#endif
273
274  /* do_scrub_begin() is not useful, just wastes time.  */
275
276  state = 0;
277  saved_input = NULL;
278  add_newlines = 0;
279
280  return (char *) saved;
281}
282
283void
284app_pop (char *arg)
285{
286  register struct app_save *saved = (struct app_save *) arg;
287
288  /* There is no do_scrub_end ().  */
289  state = saved->state;
290  old_state = saved->old_state;
291  out_string = saved->out_string;
292  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
293  add_newlines = saved->add_newlines;
294  if (saved->saved_input == NULL)
295    saved_input = NULL;
296  else
297    {
298      gas_assert (saved->saved_input_len <= sizeof (input_buffer));
299      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
300      saved_input = input_buffer;
301      saved_input_len = saved->saved_input_len;
302      free (saved->saved_input);
303    }
304#ifdef TC_M68K
305  scrub_m68k_mri = saved->scrub_m68k_mri;
306#endif
307  mri_state = saved->mri_state;
308  mri_last_ch = saved->mri_last_ch;
309#if defined TC_ARM && defined OBJ_ELF
310  symver_state = saved->symver_state;
311#endif
312
313  free (arg);
314}
315
316/* @@ This assumes that \n &c are the same on host and target.  This is not
317   necessarily true.  */
318
319static int
320process_escape (int ch)
321{
322  switch (ch)
323    {
324    case 'b':
325      return '\b';
326    case 'f':
327      return '\f';
328    case 'n':
329      return '\n';
330    case 'r':
331      return '\r';
332    case 't':
333      return '\t';
334    case '\'':
335      return '\'';
336    case '"':
337      return '\"';
338    default:
339      return ch;
340    }
341}
342
343/* This function is called to process input characters.  The GET
344   parameter is used to retrieve more input characters.  GET should
345   set its parameter to point to a buffer, and return the length of
346   the buffer; it should return 0 at end of file.  The scrubbed output
347   characters are put into the buffer starting at TOSTART; the TOSTART
348   buffer is TOLEN bytes in length.  The function returns the number
349   of scrubbed characters put into TOSTART.  This will be TOLEN unless
350   end of file was seen.  This function is arranged as a state
351   machine, and saves its state so that it may return at any point.
352   This is the way the old code used to work.  */
353
354size_t
355do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
356{
357  char *to = tostart;
358  char *toend = tostart + tolen;
359  char *from;
360  char *fromend;
361  size_t fromlen;
362  register int ch, ch2 = 0;
363  /* Character that started the string we're working on.  */
364  static char quotechar;
365
366  /*State 0: beginning of normal line
367	  1: After first whitespace on line (flush more white)
368	  2: After first non-white (opcode) on line (keep 1white)
369	  3: after second white on line (into operands) (flush white)
370	  4: after putting out a .linefile, put out digits
371	  5: parsing a string, then go to old-state
372	  6: putting out \ escape in a "d string.
373	  7: no longer used
374	  8: no longer used
375	  9: After seeing symbol char in state 3 (keep 1white after symchar)
376	 10: After seeing whitespace in state 9 (keep white before symchar)
377	 11: After seeing a symbol character in state 0 (eg a label definition)
378	 -1: output string in out_string and go to the state in old_state
379	 -2: flush text until a '*' '/' is seen, then go to state old_state
380#ifdef TC_V850
381	 12: After seeing a dash, looking for a second dash as a start
382	     of comment.
383#endif
384#ifdef DOUBLEBAR_PARALLEL
385	 13: After seeing a vertical bar, looking for a second
386	     vertical bar as a parallel expression separator.
387#endif
388#ifdef TC_PREDICATE_START_CHAR
389	 14: After seeing a predicate start character at state 0, looking
390	     for a predicate end character as predicate.
391	 15: After seeing a predicate start character at state 1, looking
392	     for a predicate end character as predicate.
393#endif
394#ifdef TC_Z80
395	 16: After seeing an 'a' or an 'A' at the start of a symbol
396	 17: After seeing an 'f' or an 'F' in state 16
397#endif
398	  */
399
400  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
401     constructs like ``.loc 1 20''.  This was turning into ``.loc
402     120''.  States 9 and 10 ensure that a space is never dropped in
403     between characters which could appear in an identifier.  Ian
404     Taylor, ian@cygnus.com.
405
406     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
407     correctly on the PA (and any other target where colons are optional).
408     Jeff Law, law@cs.utah.edu.
409
410     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
411     get squashed into "cmp r1,r2||trap#1", with the all important space
412     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
413
414  /* This macro gets the next input character.  */
415
416#define GET()							\
417  (from < fromend						\
418   ? * (unsigned char *) (from++)				\
419   : (saved_input = NULL,					\
420      fromlen = (*get) (input_buffer, sizeof input_buffer),	\
421      from = input_buffer,					\
422      fromend = from + fromlen,					\
423      (fromlen == 0						\
424       ? EOF							\
425       : * (unsigned char *) (from++))))
426
427  /* This macro pushes a character back on the input stream.  */
428
429#define UNGET(uch) (*--from = (uch))
430
431  /* This macro puts a character into the output buffer.  If this
432     character fills the output buffer, this macro jumps to the label
433     TOFULL.  We use this rather ugly approach because we need to
434     handle two different termination conditions: EOF on the input
435     stream, and a full output buffer.  It would be simpler if we
436     always read in the entire input stream before processing it, but
437     I don't want to make such a significant change to the assembler's
438     memory usage.  */
439
440#define PUT(pch)				\
441  do						\
442    {						\
443      *to++ = (pch);				\
444      if (to >= toend)				\
445	goto tofull;				\
446    }						\
447  while (0)
448
449  if (saved_input != NULL)
450    {
451      from = saved_input;
452      fromend = from + saved_input_len;
453    }
454  else
455    {
456      fromlen = (*get) (input_buffer, sizeof input_buffer);
457      if (fromlen == 0)
458	return 0;
459      from = input_buffer;
460      fromend = from + fromlen;
461    }
462
463  while (1)
464    {
465      /* The cases in this switch end with continue, in order to
466	 branch back to the top of this while loop and generate the
467	 next output character in the appropriate state.  */
468      switch (state)
469	{
470	case -1:
471	  ch = *out_string++;
472	  if (*out_string == '\0')
473	    {
474	      state = old_state;
475	      old_state = 3;
476	    }
477	  PUT (ch);
478	  continue;
479
480	case -2:
481	  for (;;)
482	    {
483	      do
484		{
485		  ch = GET ();
486
487		  if (ch == EOF)
488		    {
489		      as_warn (_("end of file in comment"));
490		      goto fromeof;
491		    }
492
493		  if (ch == '\n')
494		    PUT ('\n');
495		}
496	      while (ch != '*');
497
498	      while ((ch = GET ()) == '*')
499		;
500
501	      if (ch == EOF)
502		{
503		  as_warn (_("end of file in comment"));
504		  goto fromeof;
505		}
506
507	      if (ch == '/')
508		break;
509
510	      UNGET (ch);
511	    }
512
513	  state = old_state;
514	  UNGET (' ');
515	  continue;
516
517	case 4:
518	  ch = GET ();
519	  if (ch == EOF)
520	    goto fromeof;
521	  else if (ch >= '0' && ch <= '9')
522	    PUT (ch);
523	  else
524	    {
525	      while (ch != EOF && IS_WHITESPACE (ch))
526		ch = GET ();
527	      if (ch == '"')
528		{
529		  quotechar = ch;
530		  state = 5;
531		  old_state = 3;
532		  PUT (ch);
533		}
534	      else
535		{
536		  while (ch != EOF && ch != '\n')
537		    ch = GET ();
538		  state = 0;
539		  PUT (ch);
540		}
541	    }
542	  continue;
543
544	case 5:
545	  /* We are going to copy everything up to a quote character,
546	     with special handling for a backslash.  We try to
547	     optimize the copying in the simple case without using the
548	     GET and PUT macros.  */
549	  {
550	    char *s;
551	    ptrdiff_t len;
552
553	    for (s = from; s < fromend; s++)
554	      {
555		ch = *s;
556		if (ch == '\\'
557		    || ch == quotechar
558		    || ch == '\n')
559		  break;
560	      }
561	    len = s - from;
562	    if (len > toend - to)
563	      len = toend - to;
564	    if (len > 0)
565	      {
566		memcpy (to, from, len);
567		to += len;
568		from += len;
569		if (to >= toend)
570		  goto tofull;
571	      }
572	  }
573
574	  ch = GET ();
575	  if (ch == EOF)
576	    {
577	      /* This buffer is here specifically so
578		 that the UNGET below will work.  */
579	      static char one_char_buf[1];
580
581	      as_warn (_("end of file in string; '%c' inserted"), quotechar);
582	      state = old_state;
583	      from = fromend = one_char_buf + 1;
584	      fromlen = 1;
585	      UNGET ('\n');
586	      PUT (quotechar);
587	    }
588	  else if (ch == quotechar)
589	    {
590	      state = old_state;
591	      PUT (ch);
592	    }
593#ifndef NO_STRING_ESCAPES
594	  else if (ch == '\\')
595	    {
596	      state = 6;
597	      PUT (ch);
598	    }
599#endif
600	  else if (scrub_m68k_mri && ch == '\n')
601	    {
602	      /* Just quietly terminate the string.  This permits lines like
603		   bne	label	loop if we haven't reach end yet.  */
604	      state = old_state;
605	      UNGET (ch);
606	      PUT ('\'');
607	    }
608	  else
609	    {
610	      PUT (ch);
611	    }
612	  continue;
613
614	case 6:
615	  state = 5;
616	  ch = GET ();
617	  switch (ch)
618	    {
619	      /* Handle strings broken across lines, by turning '\n' into
620		 '\\' and 'n'.  */
621	    case '\n':
622	      UNGET ('n');
623	      add_newlines++;
624	      PUT ('\\');
625	      continue;
626
627	    case EOF:
628	      as_warn (_("end of file in string; '%c' inserted"), quotechar);
629	      PUT (quotechar);
630	      continue;
631
632	    case '"':
633	    case '\\':
634	    case 'b':
635	    case 'f':
636	    case 'n':
637	    case 'r':
638	    case 't':
639	    case 'v':
640	    case 'x':
641	    case 'X':
642	    case '0':
643	    case '1':
644	    case '2':
645	    case '3':
646	    case '4':
647	    case '5':
648	    case '6':
649	    case '7':
650	      break;
651
652	    default:
653#ifdef ONLY_STANDARD_ESCAPES
654	      as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
655#endif
656	      break;
657	    }
658	  PUT (ch);
659	  continue;
660
661#ifdef DOUBLEBAR_PARALLEL
662	case 13:
663	  ch = GET ();
664	  if (ch != '|')
665	    abort ();
666
667	  /* Reset back to state 1 and pretend that we are parsing a
668	     line from just after the first white space.  */
669	  state = 1;
670	  PUT ('|');
671#ifdef TC_TIC6X
672	  /* "||^" is used for SPMASKed instructions.  */
673	  ch = GET ();
674	  if (ch == EOF)
675	    goto fromeof;
676	  else if (ch == '^')
677	    PUT ('^');
678	  else
679	    UNGET (ch);
680#endif
681	  continue;
682#endif
683#ifdef TC_Z80
684	case 16:
685	  /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
686	  ch = GET ();
687	  if (ch == 'f' || ch == 'F')
688	    {
689	      state = 17;
690	      PUT (ch);
691	    }
692	  else
693	    {
694	      state = 9;
695	      break;
696	    }
697	case 17:
698	  /* We have seen "af" at the start of a symbol,
699	     a ' here is a part of that symbol.  */
700	  ch = GET ();
701	  state = 9;
702	  if (ch == '\'')
703	    /* Change to avoid warning about unclosed string.  */
704	    PUT ('`');
705	  else if (ch != EOF)
706	    UNGET (ch);
707	  break;
708#endif
709	}
710
711      /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
712
713      /* flushchar: */
714      ch = GET ();
715
716#ifdef TC_PREDICATE_START_CHAR
717      if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
718	{
719	  state += 14;
720	  PUT (ch);
721	  continue;
722	}
723      else if (state == 14 || state == 15)
724	{
725	  if (ch == TC_PREDICATE_END_CHAR)
726	    {
727	      state -= 14;
728	      PUT (ch);
729	      ch = GET ();
730	    }
731	  else
732	    {
733	      PUT (ch);
734	      continue;
735	    }
736	}
737#endif
738
739    recycle:
740
741#if defined TC_ARM && defined OBJ_ELF
742      /* We need to watch out for .symver directives.  See the comment later
743	 in this function.  */
744      if (symver_state == NULL)
745	{
746	  if ((state == 0 || state == 1) && ch == symver_pseudo[0])
747	    symver_state = symver_pseudo + 1;
748	}
749      else
750	{
751	  /* We advance to the next state if we find the right
752	     character.  */
753	  if (ch != '\0' && (*symver_state == ch))
754	    ++symver_state;
755	  else if (*symver_state != '\0')
756	    /* We did not get the expected character, or we didn't
757	       get a valid terminating character after seeing the
758	       entire pseudo-op, so we must go back to the beginning.  */
759	    symver_state = NULL;
760	  else
761	    {
762	      /* We've read the entire pseudo-op.  If this is the end
763		 of the line, go back to the beginning.  */
764	      if (IS_NEWLINE (ch))
765		symver_state = NULL;
766	    }
767	}
768#endif /* TC_ARM && OBJ_ELF */
769
770#ifdef TC_M68K
771      /* We want to have pseudo-ops which control whether we are in
772	 MRI mode or not.  Unfortunately, since m68k MRI mode affects
773	 the scrubber, that means that we need a special purpose
774	 recognizer here.  */
775      if (mri_state == NULL)
776	{
777	  if ((state == 0 || state == 1)
778	      && ch == mri_pseudo[0])
779	    mri_state = mri_pseudo + 1;
780	}
781      else
782	{
783	  /* We advance to the next state if we find the right
784	     character, or if we need a space character and we get any
785	     whitespace character, or if we need a '0' and we get a
786	     '1' (this is so that we only need one state to handle
787	     ``.mri 0'' and ``.mri 1'').  */
788	  if (ch != '\0'
789	      && (*mri_state == ch
790		  || (*mri_state == ' '
791		      && lex[ch] == LEX_IS_WHITESPACE)
792		  || (*mri_state == '0'
793		      && ch == '1')))
794	    {
795	      mri_last_ch = ch;
796	      ++mri_state;
797	    }
798	  else if (*mri_state != '\0'
799		   || (lex[ch] != LEX_IS_WHITESPACE
800		       && lex[ch] != LEX_IS_NEWLINE))
801	    {
802	      /* We did not get the expected character, or we didn't
803		 get a valid terminating character after seeing the
804		 entire pseudo-op, so we must go back to the
805		 beginning.  */
806	      mri_state = NULL;
807	    }
808	  else
809	    {
810	      /* We've read the entire pseudo-op.  mips_last_ch is
811		 either '0' or '1' indicating whether to enter or
812		 leave MRI mode.  */
813	      do_scrub_begin (mri_last_ch == '1');
814	      mri_state = NULL;
815
816	      /* We continue handling the character as usual.  The
817		 main gas reader must also handle the .mri pseudo-op
818		 to control expression parsing and the like.  */
819	    }
820	}
821#endif
822
823      if (ch == EOF)
824	{
825	  if (state != 0)
826	    {
827	      as_warn (_("end of file not at end of a line; newline inserted"));
828	      state = 0;
829	      PUT ('\n');
830	    }
831	  goto fromeof;
832	}
833
834      switch (lex[ch])
835	{
836	case LEX_IS_WHITESPACE:
837	  do
838	    {
839	      ch = GET ();
840	    }
841	  while (ch != EOF && IS_WHITESPACE (ch));
842	  if (ch == EOF)
843	    goto fromeof;
844
845	  if (state == 0)
846	    {
847	      /* Preserve a single whitespace character at the
848		 beginning of a line.  */
849	      state = 1;
850	      UNGET (ch);
851	      PUT (' ');
852	      break;
853	    }
854
855#ifdef KEEP_WHITE_AROUND_COLON
856	  if (lex[ch] == LEX_IS_COLON)
857	    {
858	      /* Only keep this white if there's no white *after* the
859		 colon.  */
860	      ch2 = GET ();
861	      if (ch2 != EOF)
862		UNGET (ch2);
863	      if (!IS_WHITESPACE (ch2))
864		{
865		  state = 9;
866		  UNGET (ch);
867		  PUT (' ');
868		  break;
869		}
870	    }
871#endif
872	  if (IS_COMMENT (ch)
873	      || ch == '/'
874	      || IS_LINE_SEPARATOR (ch)
875	      || IS_PARALLEL_SEPARATOR (ch))
876	    {
877	      if (scrub_m68k_mri)
878		{
879		  /* In MRI mode, we keep these spaces.  */
880		  UNGET (ch);
881		  PUT (' ');
882		  break;
883		}
884	      goto recycle;
885	    }
886
887	  /* If we're in state 2 or 11, we've seen a non-white
888	     character followed by whitespace.  If the next character
889	     is ':', this is whitespace after a label name which we
890	     normally must ignore.  In MRI mode, though, spaces are
891	     not permitted between the label and the colon.  */
892	  if ((state == 2 || state == 11)
893	      && lex[ch] == LEX_IS_COLON
894	      && ! scrub_m68k_mri)
895	    {
896	      state = 1;
897	      PUT (ch);
898	      break;
899	    }
900
901	  switch (state)
902	    {
903	    case 1:
904	      /* We can arrive here if we leave a leading whitespace
905		 character at the beginning of a line.  */
906	      goto recycle;
907	    case 2:
908	      state = 3;
909	      if (to + 1 < toend)
910		{
911		  /* Optimize common case by skipping UNGET/GET.  */
912		  PUT (' ');	/* Sp after opco */
913		  goto recycle;
914		}
915	      UNGET (ch);
916	      PUT (' ');
917	      break;
918	    case 3:
919#ifndef TC_KEEP_OPERAND_SPACES
920	      /* For TI C6X, we keep these spaces as they may separate
921		 functional unit specifiers from operands.  */
922	      if (scrub_m68k_mri)
923#endif
924		{
925		  /* In MRI mode, we keep these spaces.  */
926		  UNGET (ch);
927		  PUT (' ');
928		  break;
929		}
930	      goto recycle;	/* Sp in operands */
931	    case 9:
932	    case 10:
933#ifndef TC_KEEP_OPERAND_SPACES
934	      if (scrub_m68k_mri)
935#endif
936		{
937		  /* In MRI mode, we keep these spaces.  */
938		  state = 3;
939		  UNGET (ch);
940		  PUT (' ');
941		  break;
942		}
943	      state = 10;	/* Sp after symbol char */
944	      goto recycle;
945	    case 11:
946	      if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
947		state = 1;
948	      else
949		{
950		  /* We know that ch is not ':', since we tested that
951		     case above.  Therefore this is not a label, so it
952		     must be the opcode, and we've just seen the
953		     whitespace after it.  */
954		  state = 3;
955		}
956	      UNGET (ch);
957	      PUT (' ');	/* Sp after label definition.  */
958	      break;
959	    default:
960	      BAD_CASE (state);
961	    }
962	  break;
963
964	case LEX_IS_TWOCHAR_COMMENT_1ST:
965	  ch2 = GET ();
966	  if (ch2 == '*')
967	    {
968	      for (;;)
969		{
970		  do
971		    {
972		      ch2 = GET ();
973		      if (ch2 != EOF && IS_NEWLINE (ch2))
974			add_newlines++;
975		    }
976		  while (ch2 != EOF && ch2 != '*');
977
978		  while (ch2 == '*')
979		    ch2 = GET ();
980
981		  if (ch2 == EOF || ch2 == '/')
982		    break;
983
984		  /* This UNGET will ensure that we count newlines
985		     correctly.  */
986		  UNGET (ch2);
987		}
988
989	      if (ch2 == EOF)
990		as_warn (_("end of file in multiline comment"));
991
992	      ch = ' ';
993	      goto recycle;
994	    }
995#ifdef DOUBLESLASH_LINE_COMMENTS
996	  else if (ch2 == '/')
997	    {
998	      do
999		{
1000		  ch = GET ();
1001		}
1002	      while (ch != EOF && !IS_NEWLINE (ch));
1003	      if (ch == EOF)
1004		as_warn ("end of file in comment; newline inserted");
1005	      state = 0;
1006	      PUT ('\n');
1007	      break;
1008	    }
1009#endif
1010	  else
1011	    {
1012	      if (ch2 != EOF)
1013		UNGET (ch2);
1014	      if (state == 9 || state == 10)
1015		state = 3;
1016	      PUT (ch);
1017	    }
1018	  break;
1019
1020	case LEX_IS_STRINGQUOTE:
1021	  quotechar = ch;
1022	  if (state == 10)
1023	    {
1024	      /* Preserve the whitespace in foo "bar".  */
1025	      UNGET (ch);
1026	      state = 3;
1027	      PUT (' ');
1028
1029	      /* PUT didn't jump out.  We could just break, but we
1030		 know what will happen, so optimize a bit.  */
1031	      ch = GET ();
1032	      old_state = 3;
1033	    }
1034	  else if (state == 9)
1035	    old_state = 3;
1036	  else
1037	    old_state = state;
1038	  state = 5;
1039	  PUT (ch);
1040	  break;
1041
1042#ifndef IEEE_STYLE
1043	case LEX_IS_ONECHAR_QUOTE:
1044#ifdef H_TICK_HEX
1045	  if (state == 9 && enable_h_tick_hex)
1046	    {
1047	      char c;
1048
1049	      c = GET ();
1050	      as_warn ("'%c found after symbol", c);
1051	      UNGET (c);
1052	    }
1053#endif
1054	  if (state == 10)
1055	    {
1056	      /* Preserve the whitespace in foo 'b'.  */
1057	      UNGET (ch);
1058	      state = 3;
1059	      PUT (' ');
1060	      break;
1061	    }
1062	  ch = GET ();
1063	  if (ch == EOF)
1064	    {
1065	      as_warn (_("end of file after a one-character quote; \\0 inserted"));
1066	      ch = 0;
1067	    }
1068	  if (ch == '\\')
1069	    {
1070	      ch = GET ();
1071	      if (ch == EOF)
1072		{
1073		  as_warn (_("end of file in escape character"));
1074		  ch = '\\';
1075		}
1076	      else
1077		ch = process_escape (ch);
1078	    }
1079	  sprintf (out_buf, "%d", (int) (unsigned char) ch);
1080
1081	  /* None of these 'x constants for us.  We want 'x'.  */
1082	  if ((ch = GET ()) != '\'')
1083	    {
1084#ifdef REQUIRE_CHAR_CLOSE_QUOTE
1085	      as_warn (_("missing close quote; (assumed)"));
1086#else
1087	      if (ch != EOF)
1088		UNGET (ch);
1089#endif
1090	    }
1091	  if (strlen (out_buf) == 1)
1092	    {
1093	      PUT (out_buf[0]);
1094	      break;
1095	    }
1096	  if (state == 9)
1097	    old_state = 3;
1098	  else
1099	    old_state = state;
1100	  state = -1;
1101	  out_string = out_buf;
1102	  PUT (*out_string++);
1103	  break;
1104#endif
1105
1106	case LEX_IS_COLON:
1107#ifdef KEEP_WHITE_AROUND_COLON
1108	  state = 9;
1109#else
1110	  if (state == 9 || state == 10)
1111	    state = 3;
1112	  else if (state != 3)
1113	    state = 1;
1114#endif
1115	  PUT (ch);
1116	  break;
1117
1118	case LEX_IS_NEWLINE:
1119	  /* Roll out a bunch of newlines from inside comments, etc.  */
1120	  if (add_newlines)
1121	    {
1122	      --add_newlines;
1123	      UNGET (ch);
1124	    }
1125	  /* Fall through.  */
1126
1127	case LEX_IS_LINE_SEPARATOR:
1128	  state = 0;
1129	  PUT (ch);
1130	  break;
1131
1132	case LEX_IS_PARALLEL_SEPARATOR:
1133	  state = 1;
1134	  PUT (ch);
1135	  break;
1136
1137#ifdef TC_V850
1138	case LEX_IS_DOUBLEDASH_1ST:
1139	  ch2 = GET ();
1140	  if (ch2 != '-')
1141	    {
1142	      if (ch2 != EOF)
1143		UNGET (ch2);
1144	      goto de_fault;
1145	    }
1146	  /* Read and skip to end of line.  */
1147	  do
1148	    {
1149	      ch = GET ();
1150	    }
1151	  while (ch != EOF && ch != '\n');
1152
1153	  if (ch == EOF)
1154	    as_warn (_("end of file in comment; newline inserted"));
1155
1156	  state = 0;
1157	  PUT ('\n');
1158	  break;
1159#endif
1160#ifdef DOUBLEBAR_PARALLEL
1161	case LEX_IS_DOUBLEBAR_1ST:
1162	  ch2 = GET ();
1163	  if (ch2 != EOF)
1164	    UNGET (ch2);
1165	  if (ch2 != '|')
1166	    goto de_fault;
1167
1168	  /* Handle '||' in two states as invoking PUT twice might
1169	     result in the first one jumping out of this loop.  We'd
1170	     then lose track of the state and one '|' char.  */
1171	  state = 13;
1172	  PUT ('|');
1173	  break;
1174#endif
1175	case LEX_IS_LINE_COMMENT_START:
1176	  /* FIXME-someday: The two character comment stuff was badly
1177	     thought out.  On i386, we want '/' as line comment start
1178	     AND we want C style comments.  hence this hack.  The
1179	     whole lexical process should be reworked.  xoxorich.  */
1180	  if (ch == '/')
1181	    {
1182	      ch2 = GET ();
1183	      if (ch2 == '*')
1184		{
1185		  old_state = 3;
1186		  state = -2;
1187		  break;
1188		}
1189	      else
1190		{
1191		  UNGET (ch2);
1192		}
1193	    }
1194
1195	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
1196	    {
1197	      int startch;
1198
1199	      startch = ch;
1200
1201	      do
1202		{
1203		  ch = GET ();
1204		}
1205	      while (ch != EOF && IS_WHITESPACE (ch));
1206
1207	      if (ch == EOF)
1208		{
1209		  as_warn (_("end of file in comment; newline inserted"));
1210		  PUT ('\n');
1211		  break;
1212		}
1213
1214	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1215		{
1216		  /* Not a cpp line.  */
1217		  while (ch != EOF && !IS_NEWLINE (ch))
1218		    ch = GET ();
1219		  if (ch == EOF)
1220		    as_warn (_("end of file in comment; newline inserted"));
1221		  state = 0;
1222		  PUT ('\n');
1223		  break;
1224		}
1225	      /* Looks like `# 123 "filename"' from cpp.  */
1226	      UNGET (ch);
1227	      old_state = 4;
1228	      state = -1;
1229	      if (scrub_m68k_mri)
1230		out_string = "\tlinefile ";
1231	      else
1232		out_string = "\t.linefile ";
1233	      PUT (*out_string++);
1234	      break;
1235	    }
1236
1237#ifdef TC_D10V
1238	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1239	     Trap is the only short insn that has a first operand that is
1240	     neither register nor label.
1241	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1242	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1243	     already LEX_IS_LINE_COMMENT_START.  However, it is the
1244	     only character in line_comment_chars for d10v, hence we
1245	     can recognize it as such.  */
1246	  /* An alternative approach would be to reset the state to 1 when
1247	     we see '||', '<'- or '->', but that seems to be overkill.  */
1248	  if (state == 10)
1249	    PUT (' ');
1250#endif
1251	  /* We have a line comment character which is not at the
1252	     start of a line.  If this is also a normal comment
1253	     character, fall through.  Otherwise treat it as a default
1254	     character.  */
1255	  if (strchr (tc_comment_chars, ch) == NULL
1256	      && (! scrub_m68k_mri
1257		  || (ch != '!' && ch != '*')))
1258	    goto de_fault;
1259	  if (scrub_m68k_mri
1260	      && (ch == '!' || ch == '*' || ch == '#')
1261	      && state != 1
1262	      && state != 10)
1263	    goto de_fault;
1264	  /* Fall through.  */
1265	case LEX_IS_COMMENT_START:
1266#if defined TC_ARM && defined OBJ_ELF
1267	  /* On the ARM, `@' is the comment character.
1268	     Unfortunately this is also a special character in ELF .symver
1269	     directives (and .type, though we deal with those another way).
1270	     So we check if this line is such a directive, and treat
1271	     the character as default if so.  This is a hack.  */
1272	  if ((symver_state != NULL) && (*symver_state == 0))
1273	    goto de_fault;
1274#endif
1275
1276#ifdef TC_ARM
1277	  /* For the ARM, care is needed not to damage occurrences of \@
1278	     by stripping the @ onwards.  Yuck.  */
1279	  if (to > tostart && *(to - 1) == '\\')
1280	    /* Do not treat the @ as a start-of-comment.  */
1281	    goto de_fault;
1282#endif
1283
1284#ifdef WARN_COMMENTS
1285	  if (!found_comment)
1286	    as_where (&found_comment_file, &found_comment);
1287#endif
1288	  do
1289	    {
1290	      ch = GET ();
1291	    }
1292	  while (ch != EOF && !IS_NEWLINE (ch));
1293	  if (ch == EOF)
1294	    as_warn (_("end of file in comment; newline inserted"));
1295	  state = 0;
1296	  PUT ('\n');
1297	  break;
1298
1299#ifdef H_TICK_HEX
1300	case LEX_IS_H:
1301	  /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1302	     the H' with 0x to make them gas-style hex characters.  */
1303	  if (enable_h_tick_hex)
1304	    {
1305	      char quot;
1306
1307	      quot = GET ();
1308	      if (quot == '\'')
1309		{
1310		  UNGET ('x');
1311		  ch = '0';
1312		}
1313	      else
1314		UNGET (quot);
1315	    }
1316	  /* FALL THROUGH */
1317#endif
1318
1319	case LEX_IS_SYMBOL_COMPONENT:
1320	  if (state == 10)
1321	    {
1322	      /* This is a symbol character following another symbol
1323		 character, with whitespace in between.  We skipped
1324		 the whitespace earlier, so output it now.  */
1325	      UNGET (ch);
1326	      state = 3;
1327	      PUT (' ');
1328	      break;
1329	    }
1330
1331#ifdef TC_Z80
1332	  /* "af'" is a symbol containing '\''.  */
1333	  if (state == 3 && (ch == 'a' || ch == 'A'))
1334	    {
1335	      state = 16;
1336	      PUT (ch);
1337	      ch = GET ();
1338	      if (ch == 'f' || ch == 'F')
1339		{
1340		  state = 17;
1341		  PUT (ch);
1342		  break;
1343		}
1344	      else
1345		{
1346		  state = 9;
1347		  if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1348		    {
1349		      if (ch != EOF)
1350			UNGET (ch);
1351		      break;
1352		    }
1353		}
1354	    }
1355#endif
1356	  if (state == 3)
1357	    state = 9;
1358
1359	  /* This is a common case.  Quickly copy CH and all the
1360	     following symbol component or normal characters.  */
1361	  if (to + 1 < toend
1362	      && mri_state == NULL
1363#if defined TC_ARM && defined OBJ_ELF
1364	      && symver_state == NULL
1365#endif
1366	      )
1367	    {
1368	      char *s;
1369	      ptrdiff_t len;
1370
1371	      for (s = from; s < fromend; s++)
1372		{
1373		  int type;
1374
1375		  ch2 = *(unsigned char *) s;
1376		  type = lex[ch2];
1377		  if (type != 0
1378		      && type != LEX_IS_SYMBOL_COMPONENT)
1379		    break;
1380		}
1381
1382	      if (s > from)
1383		/* Handle the last character normally, for
1384		   simplicity.  */
1385		--s;
1386
1387	      len = s - from;
1388
1389	      if (len > (toend - to) - 1)
1390		len = (toend - to) - 1;
1391
1392	      if (len > 0)
1393		{
1394		  PUT (ch);
1395		  memcpy (to, from, len);
1396		  to += len;
1397		  from += len;
1398		  if (to >= toend)
1399		    goto tofull;
1400		  ch = GET ();
1401		}
1402	    }
1403
1404	  /* Fall through.  */
1405	default:
1406	de_fault:
1407	  /* Some relatively `normal' character.  */
1408	  if (state == 0)
1409	    {
1410	      state = 11;	/* Now seeing label definition.  */
1411	    }
1412	  else if (state == 1)
1413	    {
1414	      state = 2;	/* Ditto.  */
1415	    }
1416	  else if (state == 9)
1417	    {
1418	      if (!IS_SYMBOL_COMPONENT (ch))
1419		state = 3;
1420	    }
1421	  else if (state == 10)
1422	    {
1423	      if (ch == '\\')
1424		{
1425		  /* Special handling for backslash: a backslash may
1426		     be the beginning of a formal parameter (of a
1427		     macro) following another symbol character, with
1428		     whitespace in between.  If that is the case, we
1429		     output a space before the parameter.  Strictly
1430		     speaking, correct handling depends upon what the
1431		     macro parameter expands into; if the parameter
1432		     expands into something which does not start with
1433		     an operand character, then we don't want to keep
1434		     the space.  We don't have enough information to
1435		     make the right choice, so here we are making the
1436		     choice which is more likely to be correct.  */
1437		  if (to + 1 >= toend)
1438		    {
1439		      /* If we're near the end of the buffer, save the
1440		         character for the next time round.  Otherwise
1441		         we'll lose our state.  */
1442		      UNGET (ch);
1443		      goto tofull;
1444		    }
1445		  *to++ = ' ';
1446		}
1447
1448	      state = 3;
1449	    }
1450	  PUT (ch);
1451	  break;
1452	}
1453    }
1454
1455  /*NOTREACHED*/
1456
1457 fromeof:
1458  /* We have reached the end of the input.  */
1459  return to - tostart;
1460
1461 tofull:
1462  /* The output buffer is full.  Save any input we have not yet
1463     processed.  */
1464  if (fromend > from)
1465    {
1466      saved_input = from;
1467      saved_input_len = fromend - from;
1468    }
1469  else
1470    saved_input = NULL;
1471
1472  return to - tostart;
1473}
1474