lexi.c revision 98771
11590Srgrimes/*
21590Srgrimes * Copyright (c) 1985 Sun Microsystems, Inc.
31590Srgrimes * Copyright (c) 1980, 1993
41590Srgrimes *	The Regents of the University of California.  All rights reserved.
51590Srgrimes * All rights reserved.
61590Srgrimes *
71590Srgrimes * Redistribution and use in source and binary forms, with or without
81590Srgrimes * modification, are permitted provided that the following conditions
91590Srgrimes * are met:
101590Srgrimes * 1. Redistributions of source code must retain the above copyright
111590Srgrimes *    notice, this list of conditions and the following disclaimer.
121590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
131590Srgrimes *    notice, this list of conditions and the following disclaimer in the
141590Srgrimes *    documentation and/or other materials provided with the distribution.
151590Srgrimes * 3. All advertising materials mentioning features or use of this software
161590Srgrimes *    must display the following acknowledgement:
171590Srgrimes *	This product includes software developed by the University of
181590Srgrimes *	California, Berkeley and its contributors.
191590Srgrimes * 4. Neither the name of the University nor the names of its contributors
201590Srgrimes *    may be used to endorse or promote products derived from this software
211590Srgrimes *    without specific prior written permission.
221590Srgrimes *
231590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
241590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
251590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
261590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
271590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
281590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
291590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
301590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
311590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
321590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
331590Srgrimes * SUCH DAMAGE.
341590Srgrimes */
351590Srgrimes
3685632Sschweikh#if 0
371590Srgrimes#ifndef lint
381590Srgrimesstatic char sccsid[] = "@(#)lexi.c	8.1 (Berkeley) 6/6/93";
3969796Sobrienstatic const char rcsid[] =
4070342Sobrien  "$FreeBSD: head/usr.bin/indent/lexi.c 98771 2002-06-24 17:40:27Z jmallett $";
411590Srgrimes#endif /* not lint */
4285632Sschweikh#endif
431590Srgrimes
441590Srgrimes/*
451590Srgrimes * Here we have the token scanner for indent.  It scans off one token and puts
461590Srgrimes * it in the global variable "token".  It returns a code, indicating the type
471590Srgrimes * of token scanned.
481590Srgrimes */
491590Srgrimes
501590Srgrimes#include <stdio.h>
511590Srgrimes#include <ctype.h>
521590Srgrimes#include <stdlib.h>
531590Srgrimes#include <string.h>
541590Srgrimes#include "indent_globs.h"
551590Srgrimes#include "indent_codes.h"
5693440Sdwmalone#include "indent.h"
571590Srgrimes
581590Srgrimes#define alphanum 1
591590Srgrimes#define opchar 3
601590Srgrimes
6185632Sschweikhvoid fill_buffer(void);
6285632Sschweikh
631590Srgrimesstruct templ {
6493440Sdwmalone    const char *rwd;
651590Srgrimes    int         rwcode;
661590Srgrimes};
671590Srgrimes
6869795Sobrienstruct templ specials[1000] =
691590Srgrimes{
7085632Sschweikh    {"switch", 1},
7185632Sschweikh    {"case", 2},
7285632Sschweikh    {"break", 0},
7385632Sschweikh    {"struct", 3},
7485632Sschweikh    {"union", 3},
7585632Sschweikh    {"enum", 3},
7685632Sschweikh    {"default", 2},
7785632Sschweikh    {"int", 4},
7885632Sschweikh    {"char", 4},
7985632Sschweikh    {"float", 4},
8085632Sschweikh    {"double", 4},
8185632Sschweikh    {"long", 4},
8285632Sschweikh    {"short", 4},
8385632Sschweikh    {"typdef", 4},
8485632Sschweikh    {"unsigned", 4},
8585632Sschweikh    {"register", 4},
8685632Sschweikh    {"static", 4},
8785632Sschweikh    {"global", 4},
8885632Sschweikh    {"extern", 4},
8985632Sschweikh    {"void", 4},
9085632Sschweikh    {"goto", 0},
9185632Sschweikh    {"return", 0},
9285632Sschweikh    {"if", 5},
9385632Sschweikh    {"while", 5},
9485632Sschweikh    {"for", 5},
9585632Sschweikh    {"else", 6},
9685632Sschweikh    {"do", 6},
9785632Sschweikh    {"sizeof", 7},
9885632Sschweikh    {"const", 9},
9985632Sschweikh    {"volatile", 9},
10085632Sschweikh    {0, 0}
1011590Srgrimes};
1021590Srgrimes
1031590Srgrimeschar        chartype[128] =
1041590Srgrimes{				/* this is used to facilitate the decision of
1051590Srgrimes				 * what type (alphanumeric, operator) each
1061590Srgrimes				 * character is */
1071590Srgrimes    0, 0, 0, 0, 0, 0, 0, 0,
1081590Srgrimes    0, 0, 0, 0, 0, 0, 0, 0,
1091590Srgrimes    0, 0, 0, 0, 0, 0, 0, 0,
1101590Srgrimes    0, 0, 0, 0, 0, 0, 0, 0,
1111590Srgrimes    0, 3, 0, 0, 1, 3, 3, 0,
1121590Srgrimes    0, 0, 3, 3, 0, 3, 0, 3,
1131590Srgrimes    1, 1, 1, 1, 1, 1, 1, 1,
1141590Srgrimes    1, 1, 0, 0, 3, 3, 3, 3,
1151590Srgrimes    0, 1, 1, 1, 1, 1, 1, 1,
1161590Srgrimes    1, 1, 1, 1, 1, 1, 1, 1,
1171590Srgrimes    1, 1, 1, 1, 1, 1, 1, 1,
1181590Srgrimes    1, 1, 1, 0, 0, 0, 3, 1,
1191590Srgrimes    0, 1, 1, 1, 1, 1, 1, 1,
1201590Srgrimes    1, 1, 1, 1, 1, 1, 1, 1,
1211590Srgrimes    1, 1, 1, 1, 1, 1, 1, 1,
1221590Srgrimes    1, 1, 1, 0, 3, 0, 3, 0
1231590Srgrimes};
1241590Srgrimes
1251590Srgrimesint
12685632Sschweikhlexi(void)
1271590Srgrimes{
1281590Srgrimes    int         unary_delim;	/* this is set to 1 if the current token
1291590Srgrimes				 * forces a following operator to be unary */
1301590Srgrimes    static int  last_code;	/* the last token type returned */
1311590Srgrimes    static int  l_struct;	/* set to 1 if the last token was 'struct' */
1321590Srgrimes    int         code;		/* internal code to be returned */
1331590Srgrimes    char        qchar;		/* the delimiter character for a string */
1341590Srgrimes
1351590Srgrimes    e_token = s_token;		/* point to start of place to save token */
1361590Srgrimes    unary_delim = false;
1371590Srgrimes    ps.col_1 = ps.last_nl;	/* tell world that this token started in
1381590Srgrimes				 * column 1 iff the last thing scanned was nl */
1391590Srgrimes    ps.last_nl = false;
1401590Srgrimes
1411590Srgrimes    while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
1421590Srgrimes	ps.col_1 = false;	/* leading blanks imply token is not in column
1431590Srgrimes				 * 1 */
1441590Srgrimes	if (++buf_ptr >= buf_end)
1451590Srgrimes	    fill_buffer();
1461590Srgrimes    }
1471590Srgrimes
1481590Srgrimes    /* Scan an alphanumeric token */
14985632Sschweikh    if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
1501590Srgrimes	/*
1511590Srgrimes	 * we have a character or number
1521590Srgrimes	 */
15393440Sdwmalone	const char *j;		/* used for searching thru list of
1548874Srgrimes				 *
1551590Srgrimes				 * reserved words */
15698771Sjmallett	struct templ *p;
1571590Srgrimes
15885632Sschweikh	if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
1591590Srgrimes	    int         seendot = 0,
16036211Srnordier	                seenexp = 0,
16136211Srnordier			seensfx = 0;
1621590Srgrimes	    if (*buf_ptr == '0' &&
1631590Srgrimes		    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
1641590Srgrimes		*e_token++ = *buf_ptr++;
1651590Srgrimes		*e_token++ = *buf_ptr++;
1661590Srgrimes		while (isxdigit(*buf_ptr)) {
1671590Srgrimes		    CHECK_SIZE_TOKEN;
1681590Srgrimes		    *e_token++ = *buf_ptr++;
1691590Srgrimes		}
1701590Srgrimes	    }
1711590Srgrimes	    else
1721590Srgrimes		while (1) {
17385632Sschweikh		    if (*buf_ptr == '.') {
1741590Srgrimes			if (seendot)
1751590Srgrimes			    break;
1761590Srgrimes			else
1771590Srgrimes			    seendot++;
17885632Sschweikh		    }
1791590Srgrimes		    CHECK_SIZE_TOKEN;
1801590Srgrimes		    *e_token++ = *buf_ptr++;
18185632Sschweikh		    if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
1821590Srgrimes			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
1831590Srgrimes			    break;
1841590Srgrimes			else {
1851590Srgrimes			    seenexp++;
1861590Srgrimes			    seendot++;
1871590Srgrimes			    CHECK_SIZE_TOKEN;
1881590Srgrimes			    *e_token++ = *buf_ptr++;
1891590Srgrimes			    if (*buf_ptr == '+' || *buf_ptr == '-')
1901590Srgrimes				*e_token++ = *buf_ptr++;
1911590Srgrimes			}
19285632Sschweikh		    }
1931590Srgrimes		}
19436211Srnordier	    while (1) {
19536211Srnordier		if (!(seensfx & 1) &&
19636211Srnordier			(*buf_ptr == 'U' || *buf_ptr == 'u')) {
19736211Srnordier		    CHECK_SIZE_TOKEN;
19836211Srnordier		    *e_token++ = *buf_ptr++;
19936211Srnordier		    seensfx |= 1;
20036211Srnordier		    continue;
20136211Srnordier		}
20236211Srnordier        	if (!(seensfx & 2) &&
20336211Srnordier			(*buf_ptr == 'L' || *buf_ptr == 'l')) {
20436211Srnordier		    CHECK_SIZE_TOKEN;
20536211Srnordier		    if (buf_ptr[1] == buf_ptr[0])
20636211Srnordier		        *e_token++ = *buf_ptr++;
20736211Srnordier		    *e_token++ = *buf_ptr++;
20836211Srnordier		    seensfx |= 2;
20936211Srnordier		    continue;
21036211Srnordier		}
21136211Srnordier		break;
21236211Srnordier	    }
2131590Srgrimes	}
2141590Srgrimes	else
21585632Sschweikh	    while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
21685182Sschweikh		/* fill_buffer() terminates buffer with newline */
21785182Sschweikh		if (*buf_ptr == BACKSLASH) {
21885182Sschweikh		    if (*(buf_ptr + 1) == '\n') {
21985182Sschweikh			buf_ptr += 2;
22085182Sschweikh			if (buf_ptr >= buf_end)
22185182Sschweikh			    fill_buffer();
22285182Sschweikh			} else
22385182Sschweikh			    break;
22485182Sschweikh		}
2251590Srgrimes		CHECK_SIZE_TOKEN;
22685182Sschweikh		/* copy it over */
2271590Srgrimes		*e_token++ = *buf_ptr++;
2281590Srgrimes		if (buf_ptr >= buf_end)
2291590Srgrimes		    fill_buffer();
2301590Srgrimes	    }
2311590Srgrimes	*e_token++ = '\0';
2321590Srgrimes	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
2331590Srgrimes	    if (++buf_ptr >= buf_end)
2341590Srgrimes		fill_buffer();
2351590Srgrimes	}
2361590Srgrimes	ps.its_a_keyword = false;
2371590Srgrimes	ps.sizeof_keyword = false;
2381590Srgrimes	if (l_struct) {		/* if last token was 'struct', then this token
2391590Srgrimes				 * should be treated as a declaration */
2401590Srgrimes	    l_struct = false;
2411590Srgrimes	    last_code = ident;
2421590Srgrimes	    ps.last_u_d = true;
2431590Srgrimes	    return (decl);
2441590Srgrimes	}
2451590Srgrimes	ps.last_u_d = false;	/* Operator after indentifier is binary */
2461590Srgrimes	last_code = ident;	/* Remember that this is the code we will
2471590Srgrimes				 * return */
2481590Srgrimes
2491590Srgrimes	/*
2501590Srgrimes	 * This loop will check if the token is a keyword.
2511590Srgrimes	 */
2521590Srgrimes	for (p = specials; (j = p->rwd) != 0; p++) {
25393440Sdwmalone	    const char *q = s_token;	/* point at scanned token */
25493440Sdwmalone	    if (*j++ != *q++ || *j++ != *q++)
2551590Srgrimes		continue;	/* This test depends on the fact that
2561590Srgrimes				 * identifiers are always at least 1 character
2571590Srgrimes				 * long (ie. the first two bytes of the
2581590Srgrimes				 * identifier are always meaningful) */
25993440Sdwmalone	    if (q[-1] == 0)
2601590Srgrimes		break;		/* If its a one-character identifier */
26193440Sdwmalone	    while (*q++ == *j)
2621590Srgrimes		if (*j++ == 0)
2631590Srgrimes		    goto found_keyword;	/* I wish that C had a multi-level
2641590Srgrimes					 * break... */
2651590Srgrimes	}
2661590Srgrimes	if (p->rwd) {		/* we have a keyword */
2671590Srgrimes    found_keyword:
2681590Srgrimes	    ps.its_a_keyword = true;
2691590Srgrimes	    ps.last_u_d = true;
2701590Srgrimes	    switch (p->rwcode) {
2711590Srgrimes	    case 1:		/* it is a switch */
2721590Srgrimes		return (swstmt);
2731590Srgrimes	    case 2:		/* a case or default */
2741590Srgrimes		return (casestmt);
2751590Srgrimes
2761590Srgrimes	    case 3:		/* a "struct" */
27775727Sobrien		if (ps.p_l_follow)
27875727Sobrien			break;	/* inside parens: cast */
27969795Sobrien		/*
28069795Sobrien		 * Next time around, we may want to know that we have had a
28169795Sobrien		 * 'struct'
28269795Sobrien		 */
2831590Srgrimes		l_struct = true;
2841590Srgrimes
2851590Srgrimes		/*
28669795Sobrien		 * Fall through to test for a cast, function prototype or
28769795Sobrien		 * sizeof().
2881590Srgrimes		 */
2891590Srgrimes	    case 4:		/* one of the declaration keywords */
2901590Srgrimes		if (ps.p_l_follow) {
2911590Srgrimes		    ps.cast_mask |= 1 << ps.p_l_follow;
29269795Sobrien
29369795Sobrien		    /*
29469795Sobrien		     * Forget that we saw `struct' if we're in a sizeof().
29569795Sobrien		     */
29669795Sobrien		    if (ps.sizeof_mask)
29769795Sobrien			l_struct = false;
29869795Sobrien
29969795Sobrien		    break;	/* inside parens: cast, prototype or sizeof() */
3001590Srgrimes		}
3011590Srgrimes		last_code = decl;
3021590Srgrimes		return (decl);
3031590Srgrimes
3041590Srgrimes	    case 5:		/* if, while, for */
3051590Srgrimes		return (sp_paren);
3061590Srgrimes
3071590Srgrimes	    case 6:		/* do, else */
3081590Srgrimes		return (sp_nparen);
3091590Srgrimes
3101590Srgrimes	    case 7:
3111590Srgrimes		ps.sizeof_keyword = true;
3121590Srgrimes	    default:		/* all others are treated like any other
3131590Srgrimes				 * identifier */
3141590Srgrimes		return (ident);
3151590Srgrimes	    }			/* end of switch */
3161590Srgrimes	}			/* end of if (found_it) */
3171590Srgrimes	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
31898771Sjmallett	    char *tp = buf_ptr;
3191590Srgrimes	    while (tp < buf_end)
3201590Srgrimes		if (*tp++ == ')' && (*tp == ';' || *tp == ','))
3211590Srgrimes		    goto not_proc;
3221590Srgrimes	    strncpy(ps.procname, token, sizeof ps.procname - 1);
3231590Srgrimes	    ps.in_parameter_declaration = 1;
3241590Srgrimes	    rparen_count = 1;
3251590Srgrimes    not_proc:;
3261590Srgrimes	}
3271590Srgrimes	/*
3281590Srgrimes	 * The following hack attempts to guess whether or not the current
3291590Srgrimes	 * token is in fact a declaration keyword -- one that has been
3301590Srgrimes	 * typedefd
3311590Srgrimes	 */
3321590Srgrimes	if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
3331590Srgrimes		&& !ps.p_l_follow
3341590Srgrimes	        && !ps.block_init
3351590Srgrimes		&& (ps.last_token == rparen || ps.last_token == semicolon ||
3361590Srgrimes		    ps.last_token == decl ||
3371590Srgrimes		    ps.last_token == lbrace || ps.last_token == rbrace)) {
3381590Srgrimes	    ps.its_a_keyword = true;
3391590Srgrimes	    ps.last_u_d = true;
3401590Srgrimes	    last_code = decl;
3411590Srgrimes	    return decl;
3421590Srgrimes	}
3431590Srgrimes	if (last_code == decl)	/* if this is a declared variable, then
3441590Srgrimes				 * following sign is unary */
3451590Srgrimes	    ps.last_u_d = true;	/* will make "int a -1" work */
3461590Srgrimes	last_code = ident;
3471590Srgrimes	return (ident);		/* the ident is not in the list */
3481590Srgrimes    }				/* end of procesing for alpanum character */
3491590Srgrimes
3501590Srgrimes    /* Scan a non-alphanumeric token */
3511590Srgrimes
3521590Srgrimes    *e_token++ = *buf_ptr;		/* if it is only a one-character token, it is
3531590Srgrimes				 * moved here */
3541590Srgrimes    *e_token = '\0';
3551590Srgrimes    if (++buf_ptr >= buf_end)
3561590Srgrimes	fill_buffer();
3571590Srgrimes
3581590Srgrimes    switch (*token) {
3591590Srgrimes    case '\n':
3601590Srgrimes	unary_delim = ps.last_u_d;
3611590Srgrimes	ps.last_nl = true;	/* remember that we just had a newline */
3621590Srgrimes	code = (had_eof ? 0 : newline);
3631590Srgrimes
3641590Srgrimes	/*
3651590Srgrimes	 * if data has been exausted, the newline is a dummy, and we should
3661590Srgrimes	 * return code to stop
3671590Srgrimes	 */
3681590Srgrimes	break;
3691590Srgrimes
3701590Srgrimes    case '\'':			/* start of quoted character */
3711590Srgrimes    case '"':			/* start of string */
3721590Srgrimes	qchar = *token;
3731590Srgrimes	if (troff) {
3741590Srgrimes	    e_token[-1] = '`';
3751590Srgrimes	    if (qchar == '"')
3761590Srgrimes		*e_token++ = '`';
3771590Srgrimes	    e_token = chfont(&bodyf, &stringf, e_token);
3781590Srgrimes	}
3791590Srgrimes	do {			/* copy the string */
3801590Srgrimes	    while (1) {		/* move one character or [/<char>]<char> */
3811590Srgrimes		if (*buf_ptr == '\n') {
3821590Srgrimes		    printf("%d: Unterminated literal\n", line_no);
3831590Srgrimes		    goto stop_lit;
3841590Srgrimes		}
3851590Srgrimes		CHECK_SIZE_TOKEN;	/* Only have to do this once in this loop,
3861590Srgrimes					 * since CHECK_SIZE guarantees that there
3871590Srgrimes					 * are at least 5 entries left */
3881590Srgrimes		*e_token = *buf_ptr++;
3891590Srgrimes		if (buf_ptr >= buf_end)
3901590Srgrimes		    fill_buffer();
3911590Srgrimes		if (*e_token == BACKSLASH) {	/* if escape, copy extra char */
3921590Srgrimes		    if (*buf_ptr == '\n')	/* check for escaped newline */
3931590Srgrimes			++line_no;
3941590Srgrimes		    if (troff) {
3951590Srgrimes			*++e_token = BACKSLASH;
3961590Srgrimes			if (*buf_ptr == BACKSLASH)
3971590Srgrimes			    *++e_token = BACKSLASH;
3981590Srgrimes		    }
3991590Srgrimes		    *++e_token = *buf_ptr++;
4001590Srgrimes		    ++e_token;	/* we must increment this again because we
4011590Srgrimes				 * copied two chars */
4021590Srgrimes		    if (buf_ptr >= buf_end)
4031590Srgrimes			fill_buffer();
4041590Srgrimes		}
4051590Srgrimes		else
4061590Srgrimes		    break;	/* we copied one character */
4071590Srgrimes	    }			/* end of while (1) */
4081590Srgrimes	} while (*e_token++ != qchar);
4091590Srgrimes	if (troff) {
4101590Srgrimes	    e_token = chfont(&stringf, &bodyf, e_token - 1);
4111590Srgrimes	    if (qchar == '"')
4121590Srgrimes		*e_token++ = '\'';
4131590Srgrimes	}
4141590Srgrimesstop_lit:
4151590Srgrimes	code = ident;
4161590Srgrimes	break;
4171590Srgrimes
4181590Srgrimes    case ('('):
4191590Srgrimes    case ('['):
4201590Srgrimes	unary_delim = true;
4211590Srgrimes	code = lparen;
4221590Srgrimes	break;
4231590Srgrimes
4241590Srgrimes    case (')'):
4251590Srgrimes    case (']'):
4261590Srgrimes	code = rparen;
4271590Srgrimes	break;
4281590Srgrimes
4291590Srgrimes    case '#':
4301590Srgrimes	unary_delim = ps.last_u_d;
4311590Srgrimes	code = preesc;
4321590Srgrimes	break;
4331590Srgrimes
4341590Srgrimes    case '?':
4351590Srgrimes	unary_delim = true;
4361590Srgrimes	code = question;
4371590Srgrimes	break;
4381590Srgrimes
4391590Srgrimes    case (':'):
4401590Srgrimes	code = colon;
4411590Srgrimes	unary_delim = true;
4421590Srgrimes	break;
4431590Srgrimes
4441590Srgrimes    case (';'):
4451590Srgrimes	unary_delim = true;
4461590Srgrimes	code = semicolon;
4471590Srgrimes	break;
4481590Srgrimes
4491590Srgrimes    case ('{'):
4501590Srgrimes	unary_delim = true;
4511590Srgrimes
4521590Srgrimes	/*
4531590Srgrimes	 * if (ps.in_or_st) ps.block_init = 1;
4541590Srgrimes	 */
4551590Srgrimes	/* ?	code = ps.block_init ? lparen : lbrace; */
4561590Srgrimes	code = lbrace;
4571590Srgrimes	break;
4581590Srgrimes
4591590Srgrimes    case ('}'):
4601590Srgrimes	unary_delim = true;
4611590Srgrimes	/* ?	code = ps.block_init ? rparen : rbrace; */
4621590Srgrimes	code = rbrace;
4631590Srgrimes	break;
4641590Srgrimes
4651590Srgrimes    case 014:			/* a form feed */
4661590Srgrimes	unary_delim = ps.last_u_d;
4671590Srgrimes	ps.last_nl = true;	/* remember this so we can set 'ps.col_1'
4681590Srgrimes				 * right */
4691590Srgrimes	code = form_feed;
4701590Srgrimes	break;
4711590Srgrimes
4721590Srgrimes    case (','):
4731590Srgrimes	unary_delim = true;
4741590Srgrimes	code = comma;
4751590Srgrimes	break;
4761590Srgrimes
4771590Srgrimes    case '.':
4781590Srgrimes	unary_delim = false;
4791590Srgrimes	code = period;
4801590Srgrimes	break;
4811590Srgrimes
4821590Srgrimes    case '-':
4831590Srgrimes    case '+':			/* check for -, +, --, ++ */
4841590Srgrimes	code = (ps.last_u_d ? unary_op : binary_op);
4851590Srgrimes	unary_delim = true;
4861590Srgrimes
4871590Srgrimes	if (*buf_ptr == token[0]) {
4881590Srgrimes	    /* check for doubled character */
4891590Srgrimes	    *e_token++ = *buf_ptr++;
4901590Srgrimes	    /* buffer overflow will be checked at end of loop */
4911590Srgrimes	    if (last_code == ident || last_code == rparen) {
4921590Srgrimes		code = (ps.last_u_d ? unary_op : postop);
4931590Srgrimes		/* check for following ++ or -- */
4941590Srgrimes		unary_delim = false;
4951590Srgrimes	    }
4961590Srgrimes	}
4971590Srgrimes	else if (*buf_ptr == '=')
4981590Srgrimes	    /* check for operator += */
4991590Srgrimes	    *e_token++ = *buf_ptr++;
5001590Srgrimes	else if (*buf_ptr == '>') {
5011590Srgrimes	    /* check for operator -> */
5021590Srgrimes	    *e_token++ = *buf_ptr++;
5031590Srgrimes	    if (!pointer_as_binop) {
5041590Srgrimes		unary_delim = false;
5051590Srgrimes		code = unary_op;
5061590Srgrimes		ps.want_blank = false;
5071590Srgrimes	    }
5081590Srgrimes	}
5091590Srgrimes	break;			/* buffer overflow will be checked at end of
5101590Srgrimes				 * switch */
5111590Srgrimes
5121590Srgrimes    case '=':
5131590Srgrimes	if (ps.in_or_st)
5141590Srgrimes	    ps.block_init = 1;
5151590Srgrimes#ifdef undef
5161590Srgrimes	if (chartype[*buf_ptr] == opchar) {	/* we have two char assignment */
5171590Srgrimes	    e_token[-1] = *buf_ptr++;
5181590Srgrimes	    if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
5191590Srgrimes		*e_token++ = *buf_ptr++;
5201590Srgrimes	    *e_token++ = '=';	/* Flip =+ to += */
5211590Srgrimes	    *e_token = 0;
5221590Srgrimes	}
5231590Srgrimes#else
5241590Srgrimes	if (*buf_ptr == '=') {/* == */
5251590Srgrimes	    *e_token++ = '=';	/* Flip =+ to += */
5261590Srgrimes	    buf_ptr++;
5271590Srgrimes	    *e_token = 0;
5281590Srgrimes	}
5291590Srgrimes#endif
5301590Srgrimes	code = binary_op;
5311590Srgrimes	unary_delim = true;
5321590Srgrimes	break;
5331590Srgrimes	/* can drop thru!!! */
5341590Srgrimes
5351590Srgrimes    case '>':
5361590Srgrimes    case '<':
5371590Srgrimes    case '!':			/* ops like <, <<, <=, !=, etc */
5381590Srgrimes	if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
5391590Srgrimes	    *e_token++ = *buf_ptr;
5401590Srgrimes	    if (++buf_ptr >= buf_end)
5411590Srgrimes		fill_buffer();
5421590Srgrimes	}
5431590Srgrimes	if (*buf_ptr == '=')
5441590Srgrimes	    *e_token++ = *buf_ptr++;
5451590Srgrimes	code = (ps.last_u_d ? unary_op : binary_op);
5461590Srgrimes	unary_delim = true;
5471590Srgrimes	break;
5481590Srgrimes
5491590Srgrimes    default:
5501590Srgrimes	if (token[0] == '/' && *buf_ptr == '*') {
5511590Srgrimes	    /* it is start of comment */
5521590Srgrimes	    *e_token++ = '*';
5531590Srgrimes
5541590Srgrimes	    if (++buf_ptr >= buf_end)
5551590Srgrimes		fill_buffer();
5561590Srgrimes
5571590Srgrimes	    code = comment;
5581590Srgrimes	    unary_delim = ps.last_u_d;
5591590Srgrimes	    break;
5601590Srgrimes	}
5611590Srgrimes	while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
5621590Srgrimes	    /*
5631590Srgrimes	     * handle ||, &&, etc, and also things as in int *****i
5641590Srgrimes	     */
5651590Srgrimes	    *e_token++ = *buf_ptr;
5661590Srgrimes	    if (++buf_ptr >= buf_end)
5671590Srgrimes		fill_buffer();
5681590Srgrimes	}
5691590Srgrimes	code = (ps.last_u_d ? unary_op : binary_op);
5701590Srgrimes	unary_delim = true;
5711590Srgrimes
5721590Srgrimes
5731590Srgrimes    }				/* end of switch */
5741590Srgrimes    if (code != newline) {
5751590Srgrimes	l_struct = false;
5761590Srgrimes	last_code = code;
5771590Srgrimes    }
5781590Srgrimes    if (buf_ptr >= buf_end)	/* check for input buffer empty */
5791590Srgrimes	fill_buffer();
5801590Srgrimes    ps.last_u_d = unary_delim;
5811590Srgrimes    *e_token = '\0';		/* null terminate the token */
5821590Srgrimes    return (code);
5831590Srgrimes}
5841590Srgrimes
5851590Srgrimes/*
5861590Srgrimes * Add the given keyword to the keyword table, using val as the keyword type
5871590Srgrimes */
58885632Sschweikhvoid
58985632Sschweikhaddkey(char *key, int val)
5901590Srgrimes{
59198771Sjmallett    struct templ *p = specials;
5921590Srgrimes    while (p->rwd)
5931590Srgrimes	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
5941590Srgrimes	    return;
5951590Srgrimes	else
5961590Srgrimes	    p++;
5971590Srgrimes    if (p >= specials + sizeof specials / sizeof specials[0])
5981590Srgrimes	return;			/* For now, table overflows are silently
5991590Srgrimes				 * ignored */
6001590Srgrimes    p->rwd = key;
6011590Srgrimes    p->rwcode = val;
6021590Srgrimes    p[1].rwd = 0;
6031590Srgrimes    p[1].rwcode = 0;
6041590Srgrimes}
605