app.c revision 33965
168651Skris/* This is the Assembler Pre-Processor
268651Skris   Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 1997
368651Skris   Free Software Foundation, Inc.
468651Skris
572613Skris   This file is part of GAS, the GNU Assembler.
668651Skris
768651Skris   GAS is free software; you can redistribute it and/or modify
868651Skris   it under the terms of the GNU General Public License as published by
968651Skris   the Free Software Foundation; either version 2, or (at your option)
1068651Skris   any later version.
11160814Ssimon
12160814Ssimon   GAS is distributed in the hope that it will be useful,
13160814Ssimon   but WITHOUT ANY WARRANTY; without even the implied warranty of
14205128Ssimon   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1568651Skris   GNU General Public License for more details.
1668651Skris
1768651Skris   You should have received a copy of the GNU General Public License
1868651Skris   along with GAS; see the file COPYING.  If not, write to the Free
1968651Skris   Software Foundation, 59 Temple Place - Suite 330, Boston, MA
2068651Skris   02111-1307, USA.  */
2168651Skris
2268651Skris/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
2368651Skris/* App, the assembler pre-processor.  This pre-processor strips out excess
2468651Skris   spaces, turns single-quoted characters into a decimal constant, and turns
2568651Skris   # <number> <filename> <garbage> into a .line <number>\n.file <filename>
26269682Sjkim   pair.  This needs better error-handling.  */
27269682Sjkim
28269682Sjkim#include <stdio.h>
29269682Sjkim#include "as.h"			/* For BAD_CASE() only */
30269682Sjkim
31269682Sjkim#if (__STDC__ != 1)
3268651Skris#ifndef const
3368651Skris#define const  /* empty */
3468651Skris#endif
35194206Ssimon#endif
3672613Skris
3772613Skris/* Whether we are scrubbing in m68k MRI mode.  This is different from
3872613Skris   flag_m68k_mri, because the two flags will be affected by the .mri
3968651Skris   pseudo-op at different times.  */
4068651Skrisstatic int scrub_m68k_mri;
4168651Skris
4268651Skris/* The pseudo-op which switches in and out of MRI mode.  See the
4368651Skris   comment in do_scrub_chars.  */
4468651Skrisstatic const char mri_pseudo[] = ".mri 0";
4568651Skris
4668651Skrisstatic char lex[256];
4772613Skrisstatic const char symbol_chars[] =
4876866Skris"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
4972613Skris
5072613Skris#define LEX_IS_SYMBOL_COMPONENT		1
5172613Skris#define LEX_IS_WHITESPACE		2
5272613Skris#define LEX_IS_LINE_SEPARATOR		3
5372613Skris#define LEX_IS_COMMENT_START		4
5472613Skris#define LEX_IS_LINE_COMMENT_START	5
5572613Skris#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
5672613Skris#define	LEX_IS_STRINGQUOTE		8
5772613Skris#define	LEX_IS_COLON			9
5872613Skris#define	LEX_IS_NEWLINE			10
59269682Sjkim#define	LEX_IS_ONECHAR_QUOTE		11
60269682Sjkim#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
6172613Skris#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
6272613Skris#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
6372613Skris#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
6472613Skris#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
6572613Skris#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
6672613Skris
6772613Skrisstatic int process_escape PARAMS ((int));
6872613Skris
6972613Skris/* FIXME-soon: The entire lexer/parser thingy should be
7072613Skris   built statically at compile time rather than dynamically
7172613Skris   each and every time the assembler is run.  xoxorich. */
7272613Skris
7376866Skrisvoid
7472613Skrisdo_scrub_begin (m68k_mri)
7572613Skris     int m68k_mri;
7672613Skris{
7772613Skris  const char *p;
7872613Skris
7972613Skris  scrub_m68k_mri = m68k_mri;
8072613Skris
8172613Skris  lex[' '] = LEX_IS_WHITESPACE;
8272613Skris  lex['\t'] = LEX_IS_WHITESPACE;
8372613Skris  lex['\n'] = LEX_IS_NEWLINE;
8472613Skris  lex[';'] = LEX_IS_LINE_SEPARATOR;
8572613Skris  lex[':'] = LEX_IS_COLON;
8672613Skris
8772613Skris  if (! m68k_mri)
8872613Skris    {
8972613Skris      lex['"'] = LEX_IS_STRINGQUOTE;
9072613Skris
9172613Skris#ifndef TC_HPPA
9272613Skris      lex['\''] = LEX_IS_ONECHAR_QUOTE;
9372613Skris#endif
9472613Skris
9572613Skris#ifdef SINGLE_QUOTE_STRINGS
9672613Skris      lex['\''] = LEX_IS_STRINGQUOTE;
9772613Skris#endif
9872613Skris    }
99269682Sjkim
100269682Sjkim  /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
101269682Sjkim     in state 5 of do_scrub_chars must be changed.  */
102269682Sjkim
10368651Skris  /* Note that these override the previous defaults, e.g. if ';' is a
10468651Skris     comment char, then it isn't a line separator.  */
10568651Skris  for (p = symbol_chars; *p; ++p)
10668651Skris    {
10768651Skris      lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
10872613Skris    }				/* declare symbol characters */
10972613Skris
11072613Skris  /* The m68k backend wants to be able to change comment_chars.  */
11172613Skris#ifndef tc_comment_chars
112290207Sjkim#define tc_comment_chars comment_chars
113290207Sjkim#endif
114290207Sjkim  for (p = tc_comment_chars; *p; p++)
115290207Sjkim    {
116290207Sjkim      lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
117290207Sjkim    }				/* declare comment chars */
118290207Sjkim
119290207Sjkim  for (p = line_comment_chars; *p; p++)
120290207Sjkim    {
121290207Sjkim      lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
12268651Skris    }				/* declare line comment chars */
12368651Skris
12468651Skris  for (p = line_separator_chars; *p; p++)
12568651Skris    {
12668651Skris      lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
12768651Skris    }				/* declare line separators */
12868651Skris
129290207Sjkim  /* Only allow slash-star comments if slash is not in use.
130290207Sjkim     FIXME: This isn't right.  We should always permit them.  */
13168651Skris  if (lex['/'] == 0)
13268651Skris    {
133      lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
134    }
135
136  if (m68k_mri)
137    {
138      lex['\''] = LEX_IS_STRINGQUOTE;
139      lex[';'] = LEX_IS_COMMENT_START;
140      lex['*'] = LEX_IS_LINE_COMMENT_START;
141      /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
142         then it can't be used in an expression.  */
143      lex['!'] = LEX_IS_LINE_COMMENT_START;
144    }
145}				/* do_scrub_begin() */
146
147/* Saved state of the scrubber */
148static int state;
149static int old_state;
150static char *out_string;
151static char out_buf[20];
152static int add_newlines;
153static char *saved_input;
154static int saved_input_len;
155static const char *mri_state;
156static char mri_last_ch;
157
158/* Data structure for saving the state of app across #include's.  Note that
159   app is called asynchronously to the parsing of the .include's, so our
160   state at the time .include is interpreted is completely unrelated.
161   That's why we have to save it all.  */
162
163struct app_save
164  {
165    int state;
166    int old_state;
167    char *out_string;
168    char out_buf[sizeof (out_buf)];
169    int add_newlines;
170    char *saved_input;
171    int saved_input_len;
172    int scrub_m68k_mri;
173    const char *mri_state;
174    char mri_last_ch;
175  };
176
177char *
178app_push ()
179{
180  register struct app_save *saved;
181
182  saved = (struct app_save *) xmalloc (sizeof (*saved));
183  saved->state = state;
184  saved->old_state = old_state;
185  saved->out_string = out_string;
186  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
187  saved->add_newlines = add_newlines;
188  saved->saved_input = saved_input;
189  saved->saved_input_len = saved_input_len;
190  saved->scrub_m68k_mri = scrub_m68k_mri;
191  saved->mri_state = mri_state;
192  saved->mri_last_ch = mri_last_ch;
193
194  /* do_scrub_begin() is not useful, just wastes time. */
195
196  state = 0;
197  saved_input = NULL;
198
199  return (char *) saved;
200}
201
202void
203app_pop (arg)
204     char *arg;
205{
206  register struct app_save *saved = (struct app_save *) arg;
207
208  /* There is no do_scrub_end (). */
209  state = saved->state;
210  old_state = saved->old_state;
211  out_string = saved->out_string;
212  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
213  add_newlines = saved->add_newlines;
214  saved_input = saved->saved_input;
215  saved_input_len = saved->saved_input_len;
216  scrub_m68k_mri = saved->scrub_m68k_mri;
217  mri_state = saved->mri_state;
218  mri_last_ch = saved->mri_last_ch;
219
220  free (arg);
221}				/* app_pop() */
222
223/* @@ This assumes that \n &c are the same on host and target.  This is not
224   necessarily true.  */
225static int
226process_escape (ch)
227     int ch;
228{
229  switch (ch)
230    {
231    case 'b':
232      return '\b';
233    case 'f':
234      return '\f';
235    case 'n':
236      return '\n';
237    case 'r':
238      return '\r';
239    case 't':
240      return '\t';
241    case '\'':
242      return '\'';
243    case '"':
244      return '\"';
245    default:
246      return ch;
247    }
248}
249
250/* This function is called to process input characters.  The GET
251   parameter is used to retrieve more input characters.  GET should
252   set its parameter to point to a buffer, and return the length of
253   the buffer; it should return 0 at end of file.  The scrubbed output
254   characters are put into the buffer starting at TOSTART; the TOSTART
255   buffer is TOLEN bytes in length.  The function returns the number
256   of scrubbed characters put into TOSTART.  This will be TOLEN unless
257   end of file was seen.  This function is arranged as a state
258   machine, and saves its state so that it may return at any point.
259   This is the way the old code used to work.  */
260
261int
262do_scrub_chars (get, tostart, tolen)
263     int (*get) PARAMS ((char **));
264     char *tostart;
265     int tolen;
266{
267  char *to = tostart;
268  char *toend = tostart + tolen;
269  char *from;
270  char *fromend;
271  int fromlen;
272  register int ch, ch2 = 0;
273
274  /*State 0: beginning of normal line
275	  1: After first whitespace on line (flush more white)
276	  2: After first non-white (opcode) on line (keep 1white)
277	  3: after second white on line (into operands) (flush white)
278	  4: after putting out a .line, put out digits
279	  5: parsing a string, then go to old-state
280	  6: putting out \ escape in a "d string.
281	  7: After putting out a .appfile, put out string.
282	  8: After putting out a .appfile string, flush until newline.
283	  9: After seeing symbol char in state 3 (keep 1white after symchar)
284	 10: After seeing whitespace in state 9 (keep white before symchar)
285	 11: After seeing a symbol character in state 0 (eg a label definition)
286	 -1: output string in out_string and go to the state in old_state
287	 -2: flush text until a '*' '/' is seen, then go to state old_state
288	  */
289
290  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
291     constructs like ``.loc 1 20''.  This was turning into ``.loc
292     120''.  States 9 and 10 ensure that a space is never dropped in
293     between characters which could appear in a identifier.  Ian
294     Taylor, ian@cygnus.com.
295
296     I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
297     correctly on the PA (and any other target where colons are optional).
298     Jeff Law, law@cs.utah.edu.  */
299
300  /* This macro gets the next input character.  */
301
302#define GET()				\
303  (from < fromend			\
304   ? *from++				\
305   : ((saved_input != NULL		\
306       ? (free (saved_input),		\
307	  saved_input = NULL,		\
308	  0)				\
309       : 0),				\
310      fromlen = (*get) (&from),		\
311      fromend = from + fromlen,		\
312      (fromlen == 0			\
313       ? EOF				\
314       : *from++)))
315
316  /* This macro pushes a character back on the input stream.  */
317
318#define UNGET(uch) (*--from = (uch))
319
320  /* This macro puts a character into the output buffer.  If this
321     character fills the output buffer, this macro jumps to the label
322     TOFULL.  We use this rather ugly approach because we need to
323     handle two different termination conditions: EOF on the input
324     stream, and a full output buffer.  It would be simpler if we
325     always read in the entire input stream before processing it, but
326     I don't want to make such a significant change to the assembler's
327     memory usage.  */
328
329#define PUT(pch)			\
330  do					\
331    {					\
332      *to++ = (pch);			\
333      if (to >= toend)			\
334        goto tofull;			\
335    }					\
336  while (0)
337
338  if (saved_input != NULL)
339    {
340      from = saved_input;
341      fromend = from + saved_input_len;
342    }
343  else
344    {
345      fromlen = (*get) (&from);
346      if (fromlen == 0)
347	return 0;
348      fromend = from + fromlen;
349    }
350
351  while (1)
352    {
353      /* The cases in this switch end with continue, in order to
354         branch back to the top of this while loop and generate the
355         next output character in the appropriate state.  */
356      switch (state)
357	{
358	case -1:
359	  ch = *out_string++;
360	  if (*out_string == '\0')
361	    {
362	      state = old_state;
363	      old_state = 3;
364	    }
365	  PUT (ch);
366	  continue;
367
368	case -2:
369	  for (;;)
370	    {
371	      do
372		{
373		  ch = GET ();
374
375		  if (ch == EOF)
376		    {
377		      as_warn ("end of file in comment");
378		      goto fromeof;
379		    }
380
381		  if (ch == '\n')
382		    PUT ('\n');
383		}
384	      while (ch != '*');
385
386	      while ((ch = GET ()) == '*')
387		;
388
389	      if (ch == EOF)
390		{
391		  as_warn ("end of file in comment");
392		  goto fromeof;
393		}
394
395	      if (ch == '/')
396		break;
397
398	      UNGET (ch);
399	    }
400
401	  state = old_state;
402	  UNGET (' ');
403	  continue;
404
405	case 4:
406	  ch = GET ();
407	  if (ch == EOF)
408	    goto fromeof;
409	  else if (ch >= '0' && ch <= '9')
410	    PUT (ch);
411	  else
412	    {
413	      while (ch != EOF && IS_WHITESPACE (ch))
414		ch = GET ();
415	      if (ch == '"')
416		{
417		  UNGET (ch);
418		  if (scrub_m68k_mri)
419		    out_string = "\n\tappfile ";
420		  else
421		    out_string = "\n\t.appfile ";
422		  old_state = 7;
423		  state = -1;
424		  PUT (*out_string++);
425		}
426	      else
427		{
428		  while (ch != EOF && ch != '\n')
429		    ch = GET ();
430		  state = 0;
431		  PUT (ch);
432		}
433	    }
434	  continue;
435
436	case 5:
437	  /* We are going to copy everything up to a quote character,
438             with special handling for a backslash.  We try to
439             optimize the copying in the simple case without using the
440             GET and PUT macros.  */
441	  {
442	    char *s;
443	    int len;
444
445	    for (s = from; s < fromend; s++)
446	      {
447		ch = *s;
448		/* This condition must be changed if the type of any
449                   other character can be LEX_IS_STRINGQUOTE.  */
450		if (ch == '\\'
451		    || ch == '"'
452		    || ch == '\''
453		    || ch == '\n')
454		  break;
455	      }
456	    len = s - from;
457	    if (len > toend - to)
458	      len = toend - to;
459	    if (len > 0)
460	      {
461		memcpy (to, from, len);
462		to += len;
463		from += len;
464	      }
465	  }
466
467	  ch = GET ();
468	  if (ch == EOF)
469	    {
470	      as_warn ("end of file in string: inserted '\"'");
471	      state = old_state;
472	      UNGET ('\n');
473	      PUT ('"');
474	    }
475	  else if (lex[ch] == LEX_IS_STRINGQUOTE)
476	    {
477	      state = old_state;
478	      PUT (ch);
479	    }
480#ifndef NO_STRING_ESCAPES
481	  else if (ch == '\\')
482	    {
483	      state = 6;
484	      PUT (ch);
485	    }
486#endif
487	  else if (scrub_m68k_mri && ch == '\n')
488	    {
489	      /* Just quietly terminate the string.  This permits lines like
490		   bne	label	loop if we haven't reach end yet
491		 */
492	      state = old_state;
493	      UNGET (ch);
494	      PUT ('\'');
495	    }
496	  else
497	    {
498	      PUT (ch);
499	    }
500	  continue;
501
502	case 6:
503	  state = 5;
504	  ch = GET ();
505	  switch (ch)
506	    {
507	      /* Handle strings broken across lines, by turning '\n' into
508		 '\\' and 'n'.  */
509	    case '\n':
510	      UNGET ('n');
511	      add_newlines++;
512	      PUT ('\\');
513	      continue;
514
515	    case '"':
516	    case '\\':
517	    case 'b':
518	    case 'f':
519	    case 'n':
520	    case 'r':
521	    case 't':
522	    case 'v':
523	    case 'x':
524	    case 'X':
525	    case '0':
526	    case '1':
527	    case '2':
528	    case '3':
529	    case '4':
530	    case '5':
531	    case '6':
532	    case '7':
533	      break;
534#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
535	    default:
536	      as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
537	      break;
538#else  /* ONLY_STANDARD_ESCAPES */
539	    default:
540	      /* Accept \x as x for any x */
541	      break;
542#endif /* ONLY_STANDARD_ESCAPES */
543
544	    case EOF:
545	      as_warn ("End of file in string: '\"' inserted");
546	      PUT ('"');
547	      continue;
548	    }
549	  PUT (ch);
550	  continue;
551
552	case 7:
553	  ch = GET ();
554	  state = 5;
555	  old_state = 8;
556	  if (ch == EOF)
557	    goto fromeof;
558	  PUT (ch);
559	  continue;
560
561	case 8:
562	  do
563	    ch = GET ();
564	  while (ch != '\n' && ch != EOF);
565	  if (ch == EOF)
566	    goto fromeof;
567	  state = 0;
568	  PUT (ch);
569	  continue;
570	}
571
572      /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
573
574      /* flushchar: */
575      ch = GET ();
576
577    recycle:
578
579#ifdef TC_M68K
580      /* We want to have pseudo-ops which control whether we are in
581         MRI mode or not.  Unfortunately, since m68k MRI mode affects
582         the scrubber, that means that we need a special purpose
583         recognizer here.  */
584      if (mri_state == NULL)
585	{
586	  if ((state == 0 || state == 1)
587	      && ch == mri_pseudo[0])
588	    mri_state = mri_pseudo + 1;
589	}
590      else
591	{
592	  /* We advance to the next state if we find the right
593	     character, or if we need a space character and we get any
594	     whitespace character, or if we need a '0' and we get a
595	     '1' (this is so that we only need one state to handle
596	     ``.mri 0'' and ``.mri 1'').  */
597	  if (ch != '\0'
598	      && (*mri_state == ch
599		  || (*mri_state == ' '
600		      && lex[ch] == LEX_IS_WHITESPACE)
601		  || (*mri_state == '0'
602		      && ch == '1')))
603	    {
604	      mri_last_ch = ch;
605	      ++mri_state;
606	    }
607	  else if (*mri_state != '\0'
608		   || (lex[ch] != LEX_IS_WHITESPACE
609		       && lex[ch] != LEX_IS_NEWLINE))
610	    {
611	      /* We did not get the expected character, or we didn't
612		 get a valid terminating character after seeing the
613		 entire pseudo-op, so we must go back to the
614		 beginning.  */
615	      mri_state = NULL;
616	    }
617	  else
618	    {
619	      /* We've read the entire pseudo-op.  mips_last_ch is
620                 either '0' or '1' indicating whether to enter or
621                 leave MRI mode.  */
622	      do_scrub_begin (mri_last_ch == '1');
623
624	      /* We continue handling the character as usual.  The
625                 main gas reader must also handle the .mri pseudo-op
626                 to control expression parsing and the like.  */
627	    }
628	}
629#endif
630
631      if (ch == EOF)
632	{
633	  if (state != 0)
634	    {
635	      as_warn ("end of file not at end of a line; newline inserted");
636	      state = 0;
637	      PUT ('\n');
638	    }
639	  goto fromeof;
640	}
641
642      switch (lex[ch])
643	{
644	case LEX_IS_WHITESPACE:
645	  do
646	    {
647	      ch = GET ();
648	    }
649	  while (ch != EOF && IS_WHITESPACE (ch));
650	  if (ch == EOF)
651	    goto fromeof;
652
653	  if (state == 0)
654	    {
655	      /* Preserve a single whitespace character at the
656		 beginning of a line.  */
657	      state = 1;
658	      UNGET (ch);
659	      PUT (' ');
660	      break;
661	    }
662
663	  if (IS_COMMENT (ch)
664	      || ch == '/'
665	      || IS_LINE_SEPARATOR (ch))
666	    {
667	      if (scrub_m68k_mri)
668		{
669		  /* In MRI mode, we keep these spaces.  */
670		  UNGET (ch);
671		  PUT (' ');
672		  break;
673		}
674	      goto recycle;
675	    }
676
677	  /* If we're in state 2 or 11, we've seen a non-white
678	     character followed by whitespace.  If the next character
679	     is ':', this is whitespace after a label name which we
680	     normally must ignore.  In MRI mode, though, spaces are
681	     not permitted between the label and the colon.  */
682	  if ((state == 2 || state == 11)
683	      && lex[ch] == LEX_IS_COLON
684	      && ! scrub_m68k_mri)
685	    {
686	      state = 1;
687	      PUT (ch);
688	      break;
689	    }
690
691	  switch (state)
692	    {
693	    case 0:
694	      state++;
695	      goto recycle;	/* Punted leading sp */
696	    case 1:
697	      /* We can arrive here if we leave a leading whitespace
698		 character at the beginning of a line.  */
699	      goto recycle;
700	    case 2:
701	      state = 3;
702	      if (to + 1 < toend)
703		{
704		  /* Optimize common case by skipping UNGET/GET.  */
705		  PUT (' ');	/* Sp after opco */
706		  goto recycle;
707		}
708	      UNGET (ch);
709	      PUT (' ');
710	      break;
711	    case 3:
712	      if (scrub_m68k_mri)
713		{
714		  /* In MRI mode, we keep these spaces.  */
715		  UNGET (ch);
716		  PUT (' ');
717		  break;
718		}
719	      goto recycle;	/* Sp in operands */
720	    case 9:
721	    case 10:
722	      if (scrub_m68k_mri)
723		{
724		  /* In MRI mode, we keep these spaces.  */
725		  state = 3;
726		  UNGET (ch);
727		  PUT (' ');
728		  break;
729		}
730	      state = 10;	/* Sp after symbol char */
731	      goto recycle;
732	    case 11:
733	      if (flag_m68k_mri
734#ifdef LABELS_WITHOUT_COLONS
735		  || 1
736#endif
737		  )
738		state = 1;
739	      else
740		{
741		  /* We know that ch is not ':', since we tested that
742                     case above.  Therefore this is not a label, so it
743                     must be the opcode, and we've just seen the
744                     whitespace after it.  */
745		  state = 3;
746		}
747	      UNGET (ch);
748	      PUT (' ');	/* Sp after label definition.  */
749	      break;
750	    default:
751	      BAD_CASE (state);
752	    }
753	  break;
754
755	case LEX_IS_TWOCHAR_COMMENT_1ST:
756	  ch2 = GET ();
757	  if (ch2 == '*')
758	    {
759	      for (;;)
760		{
761		  do
762		    {
763		      ch2 = GET ();
764		      if (ch2 != EOF && IS_NEWLINE (ch2))
765			add_newlines++;
766		    }
767		  while (ch2 != EOF && ch2 != '*');
768
769		  while (ch2 == '*')
770		    ch2 = GET ();
771
772		  if (ch2 == EOF || ch2 == '/')
773		    break;
774
775		  /* This UNGET will ensure that we count newlines
776                     correctly.  */
777		  UNGET (ch2);
778		}
779
780	      if (ch2 == EOF)
781		as_warn ("end of file in multiline comment");
782
783	      ch = ' ';
784	      goto recycle;
785	    }
786	  else
787	    {
788	      if (ch2 != EOF)
789		UNGET (ch2);
790	      if (state == 9 || state == 10)
791		state = 3;
792	      PUT (ch);
793	    }
794	  break;
795
796	case LEX_IS_STRINGQUOTE:
797	  if (state == 10)
798	    {
799	      /* Preserve the whitespace in foo "bar" */
800	      UNGET (ch);
801	      state = 3;
802	      PUT (' ');
803
804	      /* PUT didn't jump out.  We could just break, but we
805                 know what will happen, so optimize a bit.  */
806	      ch = GET ();
807	      old_state = 3;
808	    }
809	  else if (state == 9)
810	    old_state = 3;
811	  else
812	    old_state = state;
813	  state = 5;
814	  PUT (ch);
815	  break;
816
817#ifndef IEEE_STYLE
818	case LEX_IS_ONECHAR_QUOTE:
819	  if (state == 10)
820	    {
821	      /* Preserve the whitespace in foo 'b' */
822	      UNGET (ch);
823	      state = 3;
824	      PUT (' ');
825	      break;
826	    }
827	  ch = GET ();
828	  if (ch == EOF)
829	    {
830	      as_warn ("end of file after a one-character quote; \\0 inserted");
831	      ch = 0;
832	    }
833	  if (ch == '\\')
834	    {
835	      ch = GET ();
836	      if (ch == EOF)
837		{
838		  as_warn ("end of file in escape character");
839		  ch = '\\';
840		}
841	      else
842		ch = process_escape (ch);
843	    }
844	  sprintf (out_buf, "%d", (int) (unsigned char) ch);
845
846	  /* None of these 'x constants for us.  We want 'x'.  */
847	  if ((ch = GET ()) != '\'')
848	    {
849#ifdef REQUIRE_CHAR_CLOSE_QUOTE
850	      as_warn ("Missing close quote: (assumed)");
851#else
852	      if (ch != EOF)
853		UNGET (ch);
854#endif
855	    }
856	  if (strlen (out_buf) == 1)
857	    {
858	      PUT (out_buf[0]);
859	      break;
860	    }
861	  if (state == 9)
862	    old_state = 3;
863	  else
864	    old_state = state;
865	  state = -1;
866	  out_string = out_buf;
867	  PUT (*out_string++);
868	  break;
869#endif
870
871	case LEX_IS_COLON:
872	  if (state == 9 || state == 10)
873	    state = 3;
874	  else if (state != 3)
875	    state = 1;
876	  PUT (ch);
877	  break;
878
879	case LEX_IS_NEWLINE:
880	  /* Roll out a bunch of newlines from inside comments, etc.  */
881	  if (add_newlines)
882	    {
883	      --add_newlines;
884	      UNGET (ch);
885	    }
886	  /* fall thru into... */
887
888	case LEX_IS_LINE_SEPARATOR:
889	  state = 0;
890	  PUT (ch);
891	  break;
892
893	case LEX_IS_LINE_COMMENT_START:
894	  /* FIXME-someday: The two character comment stuff was badly
895	     thought out.  On i386, we want '/' as line comment start
896	     AND we want C style comments.  hence this hack.  The
897	     whole lexical process should be reworked.  xoxorich.  */
898	  if (ch == '/')
899	    {
900	      ch2 = GET ();
901	      if (ch2 == '*')
902		{
903		  old_state = 3;
904		  state = -2;
905		  break;
906		}
907	      else
908		{
909		  UNGET (ch2);
910		}
911	    } /* bad hack */
912
913	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
914	    {
915	      int startch;
916
917	      startch = ch;
918
919	      do
920		{
921		  ch = GET ();
922		}
923	      while (ch != EOF && IS_WHITESPACE (ch));
924	      if (ch == EOF)
925		{
926		  as_warn ("end of file in comment; newline inserted");
927		  PUT ('\n');
928		  break;
929		}
930	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
931		{
932		  /* Not a cpp line.  */
933		  while (ch != EOF && !IS_NEWLINE (ch))
934		    ch = GET ();
935		  if (ch == EOF)
936		    as_warn ("EOF in Comment: Newline inserted");
937		  state = 0;
938		  PUT ('\n');
939		  break;
940		}
941	      /* Loks like `# 123 "filename"' from cpp.  */
942	      UNGET (ch);
943	      old_state = 4;
944	      state = -1;
945	      if (scrub_m68k_mri)
946		out_string = "\tappline ";
947	      else
948		out_string = "\t.appline ";
949	      PUT (*out_string++);
950	      break;
951	    }
952
953	  /* We have a line comment character which is not at the
954	     start of a line.  If this is also a normal comment
955	     character, fall through.  Otherwise treat it as a default
956	     character.  */
957	  if (strchr (tc_comment_chars, ch) == NULL
958	      && (! scrub_m68k_mri
959		  || (ch != '!' && ch != '*')))
960	    goto de_fault;
961	  if (scrub_m68k_mri
962	      && (ch == '!' || ch == '*' || ch == '#')
963	      && state != 1
964	      && state != 10)
965	    goto de_fault;
966	  /* Fall through.  */
967	case LEX_IS_COMMENT_START:
968	  do
969	    {
970	      ch = GET ();
971	    }
972	  while (ch != EOF && !IS_NEWLINE (ch));
973	  if (ch == EOF)
974	    as_warn ("end of file in comment; newline inserted");
975	  state = 0;
976	  PUT ('\n');
977	  break;
978
979	case LEX_IS_SYMBOL_COMPONENT:
980	  if (state == 10)
981	    {
982	      /* This is a symbol character following another symbol
983		 character, with whitespace in between.  We skipped
984		 the whitespace earlier, so output it now.  */
985	      UNGET (ch);
986	      state = 3;
987	      PUT (' ');
988	      break;
989	    }
990
991	  if (state == 3)
992	    state = 9;
993
994	  /* This is a common case.  Quickly copy CH and all the
995             following symbol component or normal characters.  */
996	  if (to + 1 < toend && mri_state == NULL)
997	    {
998	      char *s;
999	      int len;
1000
1001	      for (s = from; s < fromend; s++)
1002		{
1003		  int type;
1004
1005		  ch2 = *s;
1006		  type = lex[ch2];
1007		  if (type != 0
1008		      && type != LEX_IS_SYMBOL_COMPONENT)
1009		    break;
1010		}
1011	      if (s > from)
1012		{
1013		  /* Handle the last character normally, for
1014                     simplicity.  */
1015		  --s;
1016		}
1017	      len = s - from;
1018	      if (len > (toend - to) - 1)
1019		len = (toend - to) - 1;
1020	      if (len > 0)
1021		{
1022		  PUT (ch);
1023		  if (len > 8)
1024		    {
1025		      memcpy (to, from, len);
1026		      to += len;
1027		      from += len;
1028		    }
1029		  else
1030		    {
1031		      switch (len)
1032			{
1033			case 8: *to++ = *from++;
1034			case 7: *to++ = *from++;
1035			case 6: *to++ = *from++;
1036			case 5: *to++ = *from++;
1037			case 4: *to++ = *from++;
1038			case 3: *to++ = *from++;
1039			case 2: *to++ = *from++;
1040			case 1: *to++ = *from++;
1041			}
1042		    }
1043		  ch = GET ();
1044		}
1045	    }
1046
1047	  /* Fall through.  */
1048	default:
1049	de_fault:
1050	  /* Some relatively `normal' character.  */
1051	  if (state == 0)
1052	    {
1053	      state = 11;	/* Now seeing label definition */
1054	    }
1055	  else if (state == 1)
1056	    {
1057	      state = 2;	/* Ditto */
1058	    }
1059	  else if (state == 9)
1060	    {
1061	      if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
1062		state = 3;
1063	    }
1064	  else if (state == 10)
1065	    {
1066	      state = 3;
1067	    }
1068	  PUT (ch);
1069	  break;
1070	}
1071    }
1072
1073  /*NOTREACHED*/
1074
1075 fromeof:
1076  /* We have reached the end of the input.  */
1077  return to - tostart;
1078
1079 tofull:
1080  /* The output buffer is full.  Save any input we have not yet
1081     processed.  */
1082  if (fromend > from)
1083    {
1084      char *save;
1085
1086      save = (char *) xmalloc (fromend - from);
1087      memcpy (save, from, fromend - from);
1088      if (saved_input != NULL)
1089	free (saved_input);
1090      saved_input = save;
1091      saved_input_len = fromend - from;
1092    }
1093  else
1094    {
1095      if (saved_input != NULL)
1096	{
1097	  free (saved_input);
1098	  saved_input = NULL;
1099	}
1100    }
1101  return to - tostart;
1102}
1103
1104/* end of app.c */
1105