lexi.c revision 69795
1112158Sdas/*
2112158Sdas * Copyright (c) 1985 Sun Microsystems, Inc.
3112158Sdas * Copyright (c) 1980, 1993
4112158Sdas *	The Regents of the University of California.  All rights reserved.
5112158Sdas * All rights reserved.
6112158Sdas *
7112158Sdas * Redistribution and use in source and binary forms, with or without
8112158Sdas * modification, are permitted provided that the following conditions
9112158Sdas * are met:
10112158Sdas * 1. Redistributions of source code must retain the above copyright
11112158Sdas *    notice, this list of conditions and the following disclaimer.
12112158Sdas * 2. Redistributions in binary form must reproduce the above copyright
13112158Sdas *    notice, this list of conditions and the following disclaimer in the
14112158Sdas *    documentation and/or other materials provided with the distribution.
15112158Sdas * 3. All advertising materials mentioning features or use of this software
16112158Sdas *    must display the following acknowledgement:
17112158Sdas *	This product includes software developed by the University of
18112158Sdas *	California, Berkeley and its contributors.
19112158Sdas * 4. Neither the name of the University nor the names of its contributors
20112158Sdas *    may be used to endorse or promote products derived from this software
21112158Sdas *    without specific prior written permission.
22112158Sdas *
23112158Sdas * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24112158Sdas * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25112158Sdas * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26112158Sdas * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27112158Sdas * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28112158Sdas * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29165743Sdas * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30165743Sdas * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31112158Sdas * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32174679Sdas * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33174679Sdas * SUCH DAMAGE.
34112158Sdas */
35165743Sdas
36165743Sdas#ifndef lint
37165743Sdasstatic char sccsid[] = "@(#)lexi.c	8.1 (Berkeley) 6/6/93";
38112158Sdasstatic char rcsid[] = "@(#)$FreeBSD: head/usr.bin/indent/lexi.c 69795 2000-12-09 09:45:09Z obrien $";
39112158Sdas#endif /* not lint */
40112158Sdas
41112158Sdas/*
42112158Sdas * Here we have the token scanner for indent.  It scans off one token and puts
43112158Sdas * it in the global variable "token".  It returns a code, indicating the type
44112158Sdas * of token scanned.
45112158Sdas */
46112158Sdas
47182709Sdas#include <stdio.h>
48112158Sdas#include <ctype.h>
49112158Sdas#include <stdlib.h>
50182709Sdas#include <string.h>
51112158Sdas#include "indent_globs.h"
52112158Sdas#include "indent_codes.h"
53112158Sdas
54112158Sdas#define alphanum 1
55112158Sdas#define opchar 3
56112158Sdas
57112158Sdasstruct templ {
58112158Sdas    char       *rwd;
59112158Sdas    int         rwcode;
60112158Sdas};
61112158Sdas
62219557Sdasstruct templ specials[1000] =
63219557Sdas{
64219557Sdas    "switch", 1,
65219557Sdas    "case", 2,
66219557Sdas    "break", 0,
67219557Sdas    "struct", 3,
68219557Sdas    "union", 3,
69219557Sdas    "enum", 3,
70219557Sdas    "default", 2,
71219557Sdas    "int", 4,
72219557Sdas    "char", 4,
73219557Sdas    "float", 4,
74219557Sdas    "double", 4,
75219557Sdas    "long", 4,
76219557Sdas    "short", 4,
77219557Sdas    "typdef", 4,
78219557Sdas    "unsigned", 4,
79219557Sdas    "register", 4,
80219557Sdas    "static", 4,
81219557Sdas    "global", 4,
82219557Sdas    "extern", 4,
83219557Sdas    "void", 4,
84112158Sdas    "goto", 0,
85227753Stheraven    "return", 0,
86112158Sdas    "if", 5,
87227753Stheraven    "while", 5,
88112158Sdas    "for", 5,
89227753Stheraven    "else", 6,
90112158Sdas    "do", 6,
91112158Sdas    "sizeof", 7,
92112158Sdas    "const", 9,
93112158Sdas    "volatile", 9,
94112158Sdas    0, 0
95165743Sdas};
96112158Sdas
97112158Sdaschar        chartype[128] =
98219557Sdas{				/* this is used to facilitate the decision of
99112158Sdas				 * what type (alphanumeric, operator) each
100219557Sdas				 * character is */
101112158Sdas    0, 0, 0, 0, 0, 0, 0, 0,
102112158Sdas    0, 0, 0, 0, 0, 0, 0, 0,
103219557Sdas    0, 0, 0, 0, 0, 0, 0, 0,
104219557Sdas    0, 0, 0, 0, 0, 0, 0, 0,
105219557Sdas    0, 3, 0, 0, 1, 3, 3, 0,
106112158Sdas    0, 0, 3, 3, 0, 3, 0, 3,
107112158Sdas    1, 1, 1, 1, 1, 1, 1, 1,
108112158Sdas    1, 1, 0, 0, 3, 3, 3, 3,
109187808Sdas    0, 1, 1, 1, 1, 1, 1, 1,
110187808Sdas    1, 1, 1, 1, 1, 1, 1, 1,
111227753Stheraven    1, 1, 1, 1, 1, 1, 1, 1,
112187808Sdas    1, 1, 1, 0, 0, 0, 3, 1,
113187808Sdas    0, 1, 1, 1, 1, 1, 1, 1,
114187808Sdas    1, 1, 1, 1, 1, 1, 1, 1,
115187808Sdas    1, 1, 1, 1, 1, 1, 1, 1,
116187808Sdas    1, 1, 1, 0, 3, 0, 3, 0
117187808Sdas};
118227753Stheraven
119219557Sdas
120187808Sdas
121187808Sdas
122187808Sdasint
123187808Sdaslexi()
124187808Sdas{
125187808Sdas    int         unary_delim;	/* this is set to 1 if the current token
126187808Sdas				 *
127187808Sdas				 * forces a following operator to be unary */
128187808Sdas    static int  last_code;	/* the last token type returned */
129187808Sdas    static int  l_struct;	/* set to 1 if the last token was 'struct' */
130187808Sdas    int         code;		/* internal code to be returned */
131182709Sdas    char        qchar;		/* the delimiter character for a string */
132182709Sdas
133182709Sdas    e_token = s_token;		/* point to start of place to save token */
134182709Sdas    unary_delim = false;
135182709Sdas    ps.col_1 = ps.last_nl;	/* tell world that this token started in
136182709Sdas				 * column 1 iff the last thing scanned was nl */
137182709Sdas    ps.last_nl = false;
138182709Sdas
139182709Sdas    while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
140182709Sdas	ps.col_1 = false;	/* leading blanks imply token is not in column
141182709Sdas				 * 1 */
142182709Sdas	if (++buf_ptr >= buf_end)
143182709Sdas	    fill_buffer();
144112158Sdas    }
145165743Sdas
146219557Sdas    /* Scan an alphanumeric token */
147112158Sdas    if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
148112158Sdas	/*
149112158Sdas	 * we have a character or number
150112158Sdas	 */
151112158Sdas	register char *j;	/* used for searching thru list of
152112158Sdas				 *
153112158Sdas				 * reserved words */
154112158Sdas	register struct templ *p;
155112158Sdas
156112158Sdas	if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
157112158Sdas	    int         seendot = 0,
158112158Sdas	                seenexp = 0,
159112158Sdas			seensfx = 0;
160112158Sdas	    if (*buf_ptr == '0' &&
161112158Sdas		    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
162112158Sdas		*e_token++ = *buf_ptr++;
163112158Sdas		*e_token++ = *buf_ptr++;
164112158Sdas		while (isxdigit(*buf_ptr)) {
165112158Sdas		    CHECK_SIZE_TOKEN;
166112158Sdas		    *e_token++ = *buf_ptr++;
167112158Sdas		}
168112158Sdas	    }
169187808Sdas	    else
170112158Sdas		while (1) {
171112158Sdas		    if (*buf_ptr == '.')
172112158Sdas			if (seendot)
173112158Sdas			    break;
174112158Sdas			else
175112158Sdas			    seendot++;
176112158Sdas		    CHECK_SIZE_TOKEN;
177165743Sdas		    *e_token++ = *buf_ptr++;
178219557Sdas		    if (!isdigit(*buf_ptr) && *buf_ptr != '.')
179165743Sdas			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
180182709Sdas			    break;
181219557Sdas			else {
182165743Sdas			    seenexp++;
183219557Sdas			    seendot++;
184165743Sdas			    CHECK_SIZE_TOKEN;
185112158Sdas			    *e_token++ = *buf_ptr++;
186112158Sdas			    if (*buf_ptr == '+' || *buf_ptr == '-')
187112158Sdas				*e_token++ = *buf_ptr++;
188112158Sdas			}
189112158Sdas		}
190112158Sdas	    while (1) {
191124703Sdas		if (!(seensfx & 1) &&
192124703Sdas			(*buf_ptr == 'U' || *buf_ptr == 'u')) {
193124703Sdas		    CHECK_SIZE_TOKEN;
194124703Sdas		    *e_token++ = *buf_ptr++;
195112158Sdas		    seensfx |= 1;
196165743Sdas		    continue;
197112158Sdas		}
198112158Sdas        	if (!(seensfx & 2) &&
199112158Sdas			(*buf_ptr == 'L' || *buf_ptr == 'l')) {
200187808Sdas		    CHECK_SIZE_TOKEN;
201112158Sdas		    if (buf_ptr[1] == buf_ptr[0])
202112158Sdas		        *e_token++ = *buf_ptr++;
203112158Sdas		    *e_token++ = *buf_ptr++;
204112158Sdas		    seensfx |= 2;
205112158Sdas		    continue;
206112158Sdas		}
207112158Sdas		break;
208112158Sdas	    }
209112158Sdas	}
210112158Sdas	else
211112158Sdas	    while (chartype[*buf_ptr] == alphanum) {	/* copy it over */
212112158Sdas		CHECK_SIZE_TOKEN;
213112158Sdas		*e_token++ = *buf_ptr++;
214112158Sdas		if (buf_ptr >= buf_end)
215187808Sdas		    fill_buffer();
216187808Sdas	    }
217187808Sdas	*e_token++ = '\0';
218187808Sdas	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
219187808Sdas	    if (++buf_ptr >= buf_end)
220187808Sdas		fill_buffer();
221112415Sdas	}
222187808Sdas	ps.its_a_keyword = false;
223187808Sdas	ps.sizeof_keyword = false;
224112158Sdas	if (l_struct) {		/* if last token was 'struct', then this token
225165743Sdas				 * should be treated as a declaration */
226112158Sdas	    l_struct = false;
227112158Sdas	    last_code = ident;
228112158Sdas	    ps.last_u_d = true;
229112158Sdas	    return (decl);
230112158Sdas	}
231112158Sdas	ps.last_u_d = false;	/* Operator after indentifier is binary */
232112158Sdas	last_code = ident;	/* Remember that this is the code we will
233112158Sdas				 * return */
234112158Sdas
235112158Sdas	/*
236112158Sdas	 * This loop will check if the token is a keyword.
237112158Sdas	 */
238112158Sdas	for (p = specials; (j = p->rwd) != 0; p++) {
239112158Sdas	    register char *p = s_token;	/* point at scanned token */
240112158Sdas	    if (*j++ != *p++ || *j++ != *p++)
241112158Sdas		continue;	/* This test depends on the fact that
242112158Sdas				 * identifiers are always at least 1 character
243112158Sdas				 * long (ie. the first two bytes of the
244112158Sdas				 * identifier are always meaningful) */
245112158Sdas	    if (p[-1] == 0)
246112158Sdas		break;		/* If its a one-character identifier */
247112158Sdas	    while (*p++ == *j)
248112158Sdas		if (*j++ == 0)
249112158Sdas		    goto found_keyword;	/* I wish that C had a multi-level
250112158Sdas					 * break... */
251112158Sdas	}
252112158Sdas	if (p->rwd) {		/* we have a keyword */
253112158Sdas    found_keyword:
254187808Sdas	    ps.its_a_keyword = true;
255112158Sdas	    ps.last_u_d = true;
256112158Sdas	    switch (p->rwcode) {
257112158Sdas	    case 1:		/* it is a switch */
258112158Sdas		return (swstmt);
259112158Sdas	    case 2:		/* a case or default */
260112158Sdas		return (casestmt);
261112158Sdas
262112158Sdas	    case 3:		/* a "struct" */
263112158Sdas		/*
264112158Sdas		 * Next time around, we may want to know that we have had a
265112158Sdas		 * 'struct'
266112158Sdas		 */
267112158Sdas		l_struct = true;
268112158Sdas
269112158Sdas		/*
270112158Sdas		 * Fall through to test for a cast, function prototype or
271112158Sdas		 * sizeof().
272112158Sdas		 */
273112158Sdas	    case 4:		/* one of the declaration keywords */
274112158Sdas		if (ps.p_l_follow) {
275112158Sdas		    ps.cast_mask |= 1 << ps.p_l_follow;
276112158Sdas
277112158Sdas		    /*
278112158Sdas		     * Forget that we saw `struct' if we're in a sizeof().
279112158Sdas		     */
280112158Sdas		    if (ps.sizeof_mask)
281112158Sdas			l_struct = false;
282112158Sdas
283112158Sdas		    break;	/* inside parens: cast, prototype or sizeof() */
284112158Sdas		}
285112158Sdas		last_code = decl;
286112158Sdas		return (decl);
287112158Sdas
288112158Sdas	    case 5:		/* if, while, for */
289112158Sdas		return (sp_paren);
290112158Sdas
291112158Sdas	    case 6:		/* do, else */
292112158Sdas		return (sp_nparen);
293112158Sdas
294112158Sdas	    case 7:
295112158Sdas		ps.sizeof_keyword = true;
296112158Sdas	    default:		/* all others are treated like any other
297112158Sdas				 * identifier */
298112158Sdas		return (ident);
299112158Sdas	    }			/* end of switch */
300165743Sdas	}			/* end of if (found_it) */
301165743Sdas	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
302112158Sdas	    register char *tp = buf_ptr;
303112158Sdas	    while (tp < buf_end)
304112158Sdas		if (*tp++ == ')' && (*tp == ';' || *tp == ','))
305112158Sdas		    goto not_proc;
306112158Sdas	    strncpy(ps.procname, token, sizeof ps.procname - 1);
307112158Sdas	    ps.in_parameter_declaration = 1;
308219557Sdas	    rparen_count = 1;
309219557Sdas    not_proc:;
310112158Sdas	}
311112158Sdas	/*
312112158Sdas	 * The following hack attempts to guess whether or not the current
313112158Sdas	 * token is in fact a declaration keyword -- one that has been
314112158Sdas	 * typedefd
315112158Sdas	 */
316112158Sdas	if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
317112158Sdas		&& !ps.p_l_follow
318112158Sdas	        && !ps.block_init
319112158Sdas		&& (ps.last_token == rparen || ps.last_token == semicolon ||
320219557Sdas		    ps.last_token == decl ||
321219557Sdas		    ps.last_token == lbrace || ps.last_token == rbrace)) {
322112158Sdas	    ps.its_a_keyword = true;
323112158Sdas	    ps.last_u_d = true;
324165743Sdas	    last_code = decl;
325219557Sdas	    return decl;
326219557Sdas	}
327165743Sdas	if (last_code == decl)	/* if this is a declared variable, then
328112158Sdas				 * following sign is unary */
329112158Sdas	    ps.last_u_d = true;	/* will make "int a -1" work */
330112158Sdas	last_code = ident;
331112158Sdas	return (ident);		/* the ident is not in the list */
332112158Sdas    }				/* end of procesing for alpanum character */
333112158Sdas
334112158Sdas    /* Scan a non-alphanumeric token */
335112158Sdas
336112158Sdas    *e_token++ = *buf_ptr;		/* if it is only a one-character token, it is
337112158Sdas				 * moved here */
338112158Sdas    *e_token = '\0';
339112158Sdas    if (++buf_ptr >= buf_end)
340112158Sdas	fill_buffer();
341112158Sdas
342112158Sdas    switch (*token) {
343112158Sdas    case '\n':
344112158Sdas	unary_delim = ps.last_u_d;
345112158Sdas	ps.last_nl = true;	/* remember that we just had a newline */
346112158Sdas	code = (had_eof ? 0 : newline);
347112158Sdas
348112158Sdas	/*
349112158Sdas	 * if data has been exausted, the newline is a dummy, and we should
350219557Sdas	 * return code to stop
351112158Sdas	 */
352112158Sdas	break;
353112158Sdas
354112158Sdas    case '\'':			/* start of quoted character */
355112158Sdas    case '"':			/* start of string */
356219557Sdas	qchar = *token;
357112158Sdas	if (troff) {
358112158Sdas	    e_token[-1] = '`';
359112158Sdas	    if (qchar == '"')
360112158Sdas		*e_token++ = '`';
361112158Sdas	    e_token = chfont(&bodyf, &stringf, e_token);
362112158Sdas	}
363112158Sdas	do {			/* copy the string */
364112158Sdas	    while (1) {		/* move one character or [/<char>]<char> */
365112158Sdas		if (*buf_ptr == '\n') {
366112158Sdas		    printf("%d: Unterminated literal\n", line_no);
367112158Sdas		    goto stop_lit;
368219557Sdas		}
369112158Sdas		CHECK_SIZE_TOKEN;	/* Only have to do this once in this loop,
370112158Sdas					 * since CHECK_SIZE guarantees that there
371112158Sdas					 * are at least 5 entries left */
372112158Sdas		*e_token = *buf_ptr++;
373112158Sdas		if (buf_ptr >= buf_end)
374112158Sdas		    fill_buffer();
375112158Sdas		if (*e_token == BACKSLASH) {	/* if escape, copy extra char */
376112158Sdas		    if (*buf_ptr == '\n')	/* check for escaped newline */
377219557Sdas			++line_no;
378112158Sdas		    if (troff) {
379112158Sdas			*++e_token = BACKSLASH;
380112158Sdas			if (*buf_ptr == BACKSLASH)
381219557Sdas			    *++e_token = BACKSLASH;
382112158Sdas		    }
383112158Sdas		    *++e_token = *buf_ptr++;
384112158Sdas		    ++e_token;	/* we must increment this again because we
385112158Sdas				 * copied two chars */
386112158Sdas		    if (buf_ptr >= buf_end)
387112158Sdas			fill_buffer();
388112158Sdas		}
389112158Sdas		else
390112158Sdas		    break;	/* we copied one character */
391112158Sdas	    }			/* end of while (1) */
392112158Sdas	} while (*e_token++ != qchar);
393219557Sdas	if (troff) {
394112158Sdas	    e_token = chfont(&stringf, &bodyf, e_token - 1);
395112158Sdas	    if (qchar == '"')
396112158Sdas		*e_token++ = '\'';
397112158Sdas	}
398219557Sdasstop_lit:
399112158Sdas	code = ident;
400112158Sdas	break;
401112158Sdas
402112158Sdas    case ('('):
403112158Sdas    case ('['):
404219557Sdas	unary_delim = true;
405219557Sdas	code = lparen;
406219557Sdas	break;
407112158Sdas
408112158Sdas    case (')'):
409219557Sdas    case (']'):
410112158Sdas	code = rparen;
411219557Sdas	break;
412112158Sdas
413112158Sdas    case '#':
414112158Sdas	unary_delim = ps.last_u_d;
415112158Sdas	code = preesc;
416112158Sdas	break;
417112158Sdas
418112158Sdas    case '?':
419112158Sdas	unary_delim = true;
420112158Sdas	code = question;
421219557Sdas	break;
422112158Sdas
423112158Sdas    case (':'):
424112158Sdas	code = colon;
425219557Sdas	unary_delim = true;
426112158Sdas	break;
427112158Sdas
428112158Sdas    case (';'):
429219557Sdas	unary_delim = true;
430112158Sdas	code = semicolon;
431112158Sdas	break;
432112158Sdas
433112158Sdas    case ('{'):
434112158Sdas	unary_delim = true;
435112158Sdas
436112158Sdas	/*
437112158Sdas	 * if (ps.in_or_st) ps.block_init = 1;
438112158Sdas	 */
439112158Sdas	/* ?	code = ps.block_init ? lparen : lbrace; */
440112158Sdas	code = lbrace;
441112158Sdas	break;
442112158Sdas
443182709Sdas    case ('}'):
444112158Sdas	unary_delim = true;
445182709Sdas	/* ?	code = ps.block_init ? rparen : rbrace; */
446112158Sdas	code = rbrace;
447182709Sdas	break;
448182709Sdas
449112158Sdas    case 014:			/* a form feed */
450112158Sdas	unary_delim = ps.last_u_d;
451112158Sdas	ps.last_nl = true;	/* remember this so we can set 'ps.col_1'
452112158Sdas				 * right */
453112158Sdas	code = form_feed;
454112158Sdas	break;
455112158Sdas
456112158Sdas    case (','):
457219557Sdas	unary_delim = true;
458112158Sdas	code = comma;
459112158Sdas	break;
460112158Sdas
461112158Sdas    case '.':
462112158Sdas	unary_delim = false;
463112158Sdas	code = period;
464182709Sdas	break;
465112158Sdas
466112158Sdas    case '-':
467219557Sdas    case '+':			/* check for -, +, --, ++ */
468219557Sdas	code = (ps.last_u_d ? unary_op : binary_op);
469112158Sdas	unary_delim = true;
470112158Sdas
471219557Sdas	if (*buf_ptr == token[0]) {
472219557Sdas	    /* check for doubled character */
473112158Sdas	    *e_token++ = *buf_ptr++;
474112158Sdas	    /* buffer overflow will be checked at end of loop */
475219557Sdas	    if (last_code == ident || last_code == rparen) {
476219557Sdas		code = (ps.last_u_d ? unary_op : postop);
477112158Sdas		/* check for following ++ or -- */
478112158Sdas		unary_delim = false;
479112158Sdas	    }
480219557Sdas	}
481219557Sdas	else if (*buf_ptr == '=')
482112158Sdas	    /* check for operator += */
483112158Sdas	    *e_token++ = *buf_ptr++;
484219557Sdas	else if (*buf_ptr == '>') {
485219557Sdas	    /* check for operator -> */
486112158Sdas	    *e_token++ = *buf_ptr++;
487219557Sdas	    if (!pointer_as_binop) {
488219557Sdas		unary_delim = false;
489219557Sdas		code = unary_op;
490219557Sdas		ps.want_blank = false;
491219557Sdas	    }
492219557Sdas	}
493219557Sdas	break;			/* buffer overflow will be checked at end of
494219557Sdas				 * switch */
495219557Sdas
496219557Sdas    case '=':
497219557Sdas	if (ps.in_or_st)
498112158Sdas	    ps.block_init = 1;
499112158Sdas#ifdef undef
500112158Sdas	if (chartype[*buf_ptr] == opchar) {	/* we have two char assignment */
501112158Sdas	    e_token[-1] = *buf_ptr++;
502112158Sdas	    if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
503219557Sdas		*e_token++ = *buf_ptr++;
504112158Sdas	    *e_token++ = '=';	/* Flip =+ to += */
505219557Sdas	    *e_token = 0;
506219557Sdas	}
507219557Sdas#else
508112158Sdas	if (*buf_ptr == '=') {/* == */
509112158Sdas	    *e_token++ = '=';	/* Flip =+ to += */
510112158Sdas	    buf_ptr++;
511112158Sdas	    *e_token = 0;
512112158Sdas	}
513219557Sdas#endif
514219557Sdas	code = binary_op;
515112158Sdas	unary_delim = true;
516112158Sdas	break;
517219557Sdas	/* can drop thru!!! */
518112158Sdas
519112158Sdas    case '>':
520112158Sdas    case '<':
521112158Sdas    case '!':			/* ops like <, <<, <=, !=, etc */
522112158Sdas	if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
523219557Sdas	    *e_token++ = *buf_ptr;
524112158Sdas	    if (++buf_ptr >= buf_end)
525112158Sdas		fill_buffer();
526112158Sdas	}
527112158Sdas	if (*buf_ptr == '=')
528112158Sdas	    *e_token++ = *buf_ptr++;
529112158Sdas	code = (ps.last_u_d ? unary_op : binary_op);
530112158Sdas	unary_delim = true;
531112158Sdas	break;
532219557Sdas
533219557Sdas    default:
534112158Sdas	if (token[0] == '/' && *buf_ptr == '*') {
535112158Sdas	    /* it is start of comment */
536112158Sdas	    *e_token++ = '*';
537219557Sdas
538112158Sdas	    if (++buf_ptr >= buf_end)
539219557Sdas		fill_buffer();
540112158Sdas
541219557Sdas	    code = comment;
542112158Sdas	    unary_delim = ps.last_u_d;
543112158Sdas	    break;
544219557Sdas	}
545112158Sdas	while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
546112158Sdas	    /*
547112158Sdas	     * handle ||, &&, etc, and also things as in int *****i
548112158Sdas	     */
549219557Sdas	    *e_token++ = *buf_ptr;
550112158Sdas	    if (++buf_ptr >= buf_end)
551219557Sdas		fill_buffer();
552219557Sdas	}
553219557Sdas	code = (ps.last_u_d ? unary_op : binary_op);
554219557Sdas	unary_delim = true;
555219557Sdas
556112158Sdas
557219557Sdas    }				/* end of switch */
558112158Sdas    if (code != newline) {
559219557Sdas	l_struct = false;
560219557Sdas	last_code = code;
561112158Sdas    }
562112158Sdas    if (buf_ptr >= buf_end)	/* check for input buffer empty */
563219557Sdas	fill_buffer();
564219557Sdas    ps.last_u_d = unary_delim;
565112158Sdas    *e_token = '\0';		/* null terminate the token */
566112158Sdas    return (code);
567112158Sdas}
568112158Sdas
569112158Sdas/*
570112158Sdas * Add the given keyword to the keyword table, using val as the keyword type
571112158Sdas */
572112158Sdasaddkey(key, val)
573112158Sdas    char       *key;
574112158Sdas{
575112158Sdas    register struct templ *p = specials;
576112158Sdas    while (p->rwd)
577187808Sdas	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
578112158Sdas	    return;
579112158Sdas	else
580112158Sdas	    p++;
581112158Sdas    if (p >= specials + sizeof specials / sizeof specials[0])
582219557Sdas	return;			/* For now, table overflows are silently
583112158Sdas				 * ignored */
584112158Sdas    p->rwd = key;
585112158Sdas    p->rwcode = val;
586112158Sdas    p[1].rwd = 0;
587112158Sdas    p[1].rwcode = 0;
588112158Sdas    return;
589112158Sdas}
590112158Sdas