scan.l revision 286614
159243Sobrien%{ 259243Sobrien/* $NetBSD: scan.l,v 1.37 2007/02/06 00:08:31 he Exp $ */ 359243Sobrien 459243Sobrien/* 559243Sobrien * Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved. 659243Sobrien * Copyright (c) 1994, 1995 Jochen Pohl 759243Sobrien * All Rights Reserved. 859243Sobrien * 959243Sobrien * Redistribution and use in source and binary forms, with or without 1059243Sobrien * modification, are permitted provided that the following conditions 1159243Sobrien * are met: 1259243Sobrien * 1. Redistributions of source code must retain the above copyright 1359243Sobrien * notice, this list of conditions and the following disclaimer. 1459243Sobrien * 2. Redistributions in binary form must reproduce the above copyright 1559243Sobrien * notice, this list of conditions and the following disclaimer in the 1659243Sobrien * documentation and/or other materials provided with the distribution. 1759243Sobrien * 3. All advertising materials mentioning features or use of this software 1859243Sobrien * must display the following acknowledgement: 1959243Sobrien * This product includes software developed by Jochen Pohl for 2059243Sobrien * The NetBSD Project. 2159243Sobrien * 4. The name of the author may not be used to endorse or promote products 2259243Sobrien * derived from this software without specific prior written permission. 2359243Sobrien * 2459243Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 2559243Sobrien * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 2659243Sobrien * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2759243Sobrien * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2859243Sobrien * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2959243Sobrien * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 3059243Sobrien * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 3159243Sobrien * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 3259243Sobrien * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 3359243Sobrien * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3459243Sobrien */ 3559243Sobrien 3659243Sobrien#include <sys/cdefs.h> 3759243Sobrien#if defined(__RCSID) && !defined(lint) 3859243Sobrien__RCSID("$NetBSD: scan.l,v 1.37 2007/02/06 00:08:31 he Exp $"); 3959243Sobrien#endif 4059243Sobrien__FBSDID("$FreeBSD: head/usr.bin/xlint/lint1/scan.l 286614 2015-08-11 02:58:33Z pfg $"); 4159243Sobrien 4259243Sobrien#include <stdlib.h> 4359243Sobrien#include <string.h> 4459243Sobrien#include <limits.h> 4559243Sobrien#include <float.h> 4659243Sobrien#include <ctype.h> 4759243Sobrien#include <errno.h> 4859243Sobrien#include <err.h> 4959243Sobrien#include <math.h> 5059243Sobrien 5159243Sobrien#include "lint1.h" 5259243Sobrien#include "cgram.h" 5359243Sobrien 5459243Sobrien#define CHAR_MASK (~(~0 << CHAR_BIT)) 5559243Sobrien 5659243Sobrien/* Current position (its also updated when an included file is parsed) */ 5759243Sobrienpos_t curr_pos = { 1, "", 0 }; 5859243Sobrien 5959243Sobrien/* 6059243Sobrien * Current position in C source (not updated when an included file is 6159243Sobrien * parsed). 6259243Sobrien */ 6359243Sobrienpos_t csrc_pos = { 1, "", 0 }; 6459243Sobrien 6559243Sobrienstatic void incline(void); 6659243Sobrienstatic void badchar(int); 6759243Sobrienstatic sbuf_t *allocsb(void); 6859243Sobrienstatic void freesb(sbuf_t *); 6959243Sobrienstatic int inpc(void); 7059243Sobrienstatic int hash(const char *); 7159243Sobrienstatic sym_t *search(sbuf_t *); 7259243Sobrienstatic int name(void); 7359243Sobrienstatic int keyw(sym_t *); 7459243Sobrienstatic int icon(int); 7559243Sobrienstatic int fcon(void); 7659243Sobrienstatic int operator(int, op_t); 7759243Sobrienstatic int ccon(void); 7859243Sobrienstatic int wccon(void); 7959243Sobrienstatic int getescc(int); 8059243Sobrienstatic void directive(void); 8159243Sobrienstatic void comment(void); 8259243Sobrienstatic void slashslashcomment(void); 8359243Sobrienstatic int string(void); 8459243Sobrienstatic int wcstrg(void); 85131962Smp 8659243Sobrien%} 8759243Sobrien 8859243Sobrien%option nounput 8959243Sobrien 9059243SobrienL [_A-Za-z] 9159243SobrienD [0-9] 9259243SobrienNZD [1-9] 9359243SobrienOD [0-7] 9459243SobrienHD [0-9A-Fa-f] 9559243SobrienEX ([eE][+-]?[0-9]+) 9659243Sobrien 9759243Sobrien%% 9859243Sobrien 9959243Sobrien{L}({L}|{D})* return (name()); 10059243Sobrien0{OD}*[lLuU]* return (icon(8)); 10159243Sobrien{NZD}{D}*[lLuU]* return (icon(10)); 10259243Sobrien0[xX]{HD}+[lLuU]* return (icon(16)); 10359243Sobrien{D}+\.{D}*{EX}?[fFlL]? | 10459243Sobrien{D}+{EX}[fFlL]? | 10559243Sobrien0[xX]{HD}+p{HD}+[fFlL]? | 10659243Sobrien\.{D}+{EX}?[fFlL]? return (fcon()); 10759243Sobrien"=" return (operator(T_ASSIGN, ASSIGN)); 10859243Sobrien"*=" return (operator(T_OPASS, MULASS)); 10959243Sobrien"/=" return (operator(T_OPASS, DIVASS)); 11059243Sobrien"%=" return (operator(T_OPASS, MODASS)); 11159243Sobrien"+=" return (operator(T_OPASS, ADDASS)); 11259243Sobrien"-=" return (operator(T_OPASS, SUBASS)); 11359243Sobrien"<<=" return (operator(T_OPASS, SHLASS)); 11459243Sobrien">>=" return (operator(T_OPASS, SHRASS)); 11559243Sobrien"&=" return (operator(T_OPASS, ANDASS)); 11659243Sobrien"^=" return (operator(T_OPASS, XORASS)); 11759243Sobrien"|=" return (operator(T_OPASS, ORASS)); 11859243Sobrien"||" return (operator(T_LOGOR, LOGOR)); 11959243Sobrien"&&" return (operator(T_LOGAND, LOGAND)); 12059243Sobrien"|" return (operator(T_OR, OR)); 12159243Sobrien"&" return (operator(T_AND, AND)); 12259243Sobrien"^" return (operator(T_XOR, XOR)); 12359243Sobrien"==" return (operator(T_EQOP, EQ)); 12459243Sobrien"!=" return (operator(T_EQOP, NE)); 12559243Sobrien"<" return (operator(T_RELOP, LT)); 12659243Sobrien">" return (operator(T_RELOP, GT)); 12759243Sobrien"<=" return (operator(T_RELOP, LE)); 12859243Sobrien">=" return (operator(T_RELOP, GE)); 12959243Sobrien"<<" return (operator(T_SHFTOP, SHL)); 13059243Sobrien">>" return (operator(T_SHFTOP, SHR)); 13159243Sobrien"++" return (operator(T_INCDEC, INC)); 13259243Sobrien"--" return (operator(T_INCDEC, DEC)); 13359243Sobrien"->" return (operator(T_STROP, ARROW)); 13459243Sobrien"." return (operator(T_STROP, POINT)); 13559243Sobrien"+" return (operator(T_ADDOP, PLUS)); 13659243Sobrien"-" return (operator(T_ADDOP, MINUS)); 13759243Sobrien"*" return (operator(T_MULT, MULT)); 13859243Sobrien"/" return (operator(T_DIVOP, DIV)); 13959243Sobrien"%" return (operator(T_DIVOP, MOD)); 14059243Sobrien"!" return (operator(T_UNOP, NOT)); 14159243Sobrien"~" return (operator(T_UNOP, COMPL)); 14259243Sobrien"\"" return (string()); 14359243Sobrien"L\"" return (wcstrg()); 14459243Sobrien";" return (T_SEMI); 14559243Sobrien"{" return (T_LBRACE); 14659243Sobrien"}" return (T_RBRACE); 14759243Sobrien"," return (T_COMMA); 14859243Sobrien":" return (T_COLON); 14959243Sobrien"?" return (T_QUEST); 15059243Sobrien"[" return (T_LBRACK); 15159243Sobrien"]" return (T_RBRACK); 15259243Sobrien"(" return (T_LPARN); 15359243Sobrien")" return (T_RPARN); 15459243Sobrien"..." return (T_ELLIPSE); 15559243Sobrien"'" return (ccon()); 15659243Sobrien"L'" return (wccon()); 15759243Sobrien^#.*$ directive(); 15859243Sobrien\n incline(); 15959243Sobrien\t|" "|\f|\v ; 16059243Sobrien"/*" comment(); 16159243Sobrien"//" slashslashcomment(); 16259243Sobrien. badchar(yytext[0]); 16359243Sobrien 16459243Sobrien%% 16559243Sobrien 16659243Sobrienstatic void 16759243Sobrienincline(void) 16859243Sobrien{ 16959243Sobrien curr_pos.p_line++; 17059243Sobrien curr_pos.p_uniq = 0; 17159243Sobrien if (curr_pos.p_file == csrc_pos.p_file) { 17259243Sobrien csrc_pos.p_line++; 17359243Sobrien csrc_pos.p_uniq = 0; 17459243Sobrien } 17559243Sobrien} 17659243Sobrien 17759243Sobrienstatic void 17859243Sobrienbadchar(int c) 17959243Sobrien{ 18059243Sobrien 18159243Sobrien /* unknown character \%o */ 18259243Sobrien error(250, c); 18359243Sobrien} 18459243Sobrien 18559243Sobrien/* 18659243Sobrien * Keywords. 18759243Sobrien * During initialisation they are written to the symbol table. 18859243Sobrien */ 18959243Sobrienstatic struct kwtab { 19059243Sobrien const char *kw_name; /* keyword */ 19159243Sobrien int kw_token; /* token returned by yylex() */ 19259243Sobrien scl_t kw_scl; /* storage class if kw_token T_SCLASS */ 19359243Sobrien tspec_t kw_tspec; /* type spec. if kw_token T_TYPE or T_SOU */ 19459243Sobrien tqual_t kw_tqual; /* type qual. fi kw_token T_QUAL */ 19559243Sobrien u_int kw_c89; /* c89 keyword */ 19659243Sobrien u_int kw_c99; /* c99 keyword */ 19759243Sobrien u_int kw_gcc; /* GCC keyword */ 19859243Sobrien} kwtab[] = { 19959243Sobrien { "asm", T_ASM, 0, 0, 0, 0, 0, 1 }, 20059243Sobrien { "__asm", T_ASM, 0, 0, 0, 0, 0, 0 }, 20159243Sobrien { "__asm__", T_ASM, 0, 0, 0, 0, 0, 0 }, 20259243Sobrien { "auto", T_SCLASS, AUTO, 0, 0, 0, 0, 0 }, 20359243Sobrien { "break", T_BREAK, 0, 0, 0, 0, 0, 0 }, 20459243Sobrien { "case", T_CASE, 0, 0, 0, 0, 0, 0 }, 20559243Sobrien { "char", T_TYPE, 0, CHAR, 0, 0, 0, 0 }, 20659243Sobrien { "const", T_QUAL, 0, 0, CONST, 1, 0, 0 }, 20759243Sobrien { "__const__", T_QUAL, 0, 0, CONST, 0, 0, 0 }, 20859243Sobrien { "__const", T_QUAL, 0, 0, CONST, 0, 0, 0 }, 20959243Sobrien { "continue", T_CONTINUE, 0, 0, 0, 0, 0, 0 }, 21059243Sobrien { "default", T_DEFAULT, 0, 0, 0, 0, 0, 0 }, 21159243Sobrien { "do", T_DO, 0, 0, 0, 0, 0, 0 }, 21259243Sobrien { "double", T_TYPE, 0, DOUBLE, 0, 0, 0, 0 }, 21359243Sobrien { "else", T_ELSE, 0, 0, 0, 0, 0, 0 }, 21459243Sobrien { "enum", T_ENUM, 0, 0, 0, 0, 0, 0 }, 21559243Sobrien { "extern", T_SCLASS, EXTERN, 0, 0, 0, 0, 0 }, 21659243Sobrien { "float", T_TYPE, 0, FLOAT, 0, 0, 0, 0 }, 21759243Sobrien { "for", T_FOR, 0, 0, 0, 0, 0, 0 }, 21859243Sobrien { "goto", T_GOTO, 0, 0, 0, 0, 0, 0 }, 21959243Sobrien { "if", T_IF, 0, 0, 0, 0, 0, 0 }, 22059243Sobrien { "inline", T_SCLASS, INLINE, 0, 0, 0, 1, 0 }, 22159243Sobrien { "__inline__", T_SCLASS, INLINE, 0, 0, 0, 0, 0 }, 22259243Sobrien { "__inline", T_SCLASS, INLINE, 0, 0, 0, 0, 0 }, 22359243Sobrien { "int", T_TYPE, 0, INT, 0, 0, 0, 0 }, 22459243Sobrien { "__symbolrename", T_SYMBOLRENAME, 0, 0, 0, 0, 0, 0 }, 22559243Sobrien { "long", T_TYPE, 0, LONG, 0, 0, 0, 0 }, 22659243Sobrien { "register", T_SCLASS, REG, 0, 0, 0, 0, 0 }, 22759243Sobrien { "return", T_RETURN, 0, 0, 0, 0, 0, 0 }, 22859243Sobrien { "short", T_TYPE, 0, SHORT, 0, 0, 0, 0 }, 22959243Sobrien { "signed", T_TYPE, 0, SIGNED, 0, 1, 0, 0 }, 23059243Sobrien { "__signed__", T_TYPE, 0, SIGNED, 0, 0, 0, 0 }, 23159243Sobrien { "__signed", T_TYPE, 0, SIGNED, 0, 0, 0, 0 }, 23259243Sobrien { "sizeof", T_SIZEOF, 0, 0, 0, 0, 0, 0 }, 23359243Sobrien { "static", T_SCLASS, STATIC, 0, 0, 0, 0, 0 }, 23459243Sobrien { "struct", T_SOU, 0, STRUCT, 0, 0, 0, 0 }, 23559243Sobrien { "switch", T_SWITCH, 0, 0, 0, 0, 0, 0 }, 23659243Sobrien { "typedef", T_SCLASS, TYPEDEF, 0, 0, 0, 0, 0 }, 23759243Sobrien { "union", T_SOU, 0, UNION, 0, 0, 0, 0 }, 23859243Sobrien { "unsigned", T_TYPE, 0, UNSIGN, 0, 0, 0, 0 }, 23959243Sobrien { "void", T_TYPE, 0, VOID, 0, 0, 0, 0 }, 24059243Sobrien { "volatile", T_QUAL, 0, 0, VOLATILE, 1, 0, 0 }, 24159243Sobrien { "__volatile__", T_QUAL, 0, 0, VOLATILE, 0, 0, 0 }, 24259243Sobrien { "__volatile", T_QUAL, 0, 0, VOLATILE, 0, 0, 0 }, 24359243Sobrien { "while", T_WHILE, 0, 0, 0, 0, 0, 0 }, 24459243Sobrien { NULL, 0, 0, 0, 0, 0, 0, 0 } 24559243Sobrien}; 24659243Sobrien 24759243Sobrien/* Symbol table */ 24859243Sobrienstatic sym_t *symtab[HSHSIZ1]; 24959243Sobrien 25059243Sobrien/* bit i of the entry with index i is set */ 25159243Sobrienuint64_t qbmasks[sizeof(uint64_t) * CHAR_BIT]; 25259243Sobrien 25359243Sobrien/* least significant i bits are set in the entry with index i */ 25459243Sobrienuint64_t qlmasks[sizeof(uint64_t) * CHAR_BIT + 1]; 25559243Sobrien 25659243Sobrien/* least significant i bits are not set in the entry with index i */ 25759243Sobrienuint64_t qumasks[sizeof(uint64_t) * CHAR_BIT + 1]; 25859243Sobrien 25959243Sobrien/* free list for sbuf structures */ 26059243Sobrienstatic sbuf_t *sbfrlst; 26159243Sobrien 26259243Sobrien/* type of next expected symbol */ 26359243Sobriensymt_t symtyp; 26459243Sobrien 26559243Sobrien 26659243Sobrien/* 26759243Sobrien * All keywords are written to the symbol table. This saves us looking 26859243Sobrien * in an extra table for each name we found. 26959243Sobrien */ 27059243Sobrienvoid 27159243Sobrieninitscan(void) 27259243Sobrien{ 27359243Sobrien struct kwtab *kw; 27459243Sobrien sym_t *sym; 27559243Sobrien int h, i; 27659243Sobrien uint64_t uq; 27759243Sobrien 27859243Sobrien for (kw = kwtab; kw->kw_name != NULL; kw++) { 27959243Sobrien if ((kw->kw_c89 || kw->kw_c99) && tflag) 28059243Sobrien continue; 28159243Sobrien if (kw->kw_c99 && !(Sflag || gflag)) 28259243Sobrien continue; 28359243Sobrien if (kw->kw_gcc && !gflag) 28459243Sobrien continue; 28559243Sobrien sym = getblk(sizeof (sym_t)); 28659243Sobrien sym->s_name = kw->kw_name; 28759243Sobrien sym->s_keyw = 1; 28859243Sobrien sym->s_value.v_quad = kw->kw_token; 28959243Sobrien if (kw->kw_token == T_TYPE || kw->kw_token == T_SOU) { 29059243Sobrien sym->s_tspec = kw->kw_tspec; 29159243Sobrien } else if (kw->kw_token == T_SCLASS) { 29259243Sobrien sym->s_scl = kw->kw_scl; 29359243Sobrien } else if (kw->kw_token == T_QUAL) { 29459243Sobrien sym->s_tqual = kw->kw_tqual; 29559243Sobrien } 29659243Sobrien h = hash(sym->s_name); 29759243Sobrien if ((sym->s_link = symtab[h]) != NULL) 29859243Sobrien symtab[h]->s_rlink = &sym->s_link; 29959243Sobrien (symtab[h] = sym)->s_rlink = &symtab[h]; 30059243Sobrien } 30159243Sobrien 30259243Sobrien /* initialize bit-masks for quads */ 30359243Sobrien for (i = 0; i < sizeof (uint64_t) * CHAR_BIT; i++) { 30459243Sobrien qbmasks[i] = (uint64_t)1 << i; 30559243Sobrien uq = ~(uint64_t)0 << i; 30659243Sobrien qumasks[i] = uq; 30759243Sobrien qlmasks[i] = ~uq; 30859243Sobrien } 30959243Sobrien qumasks[i] = 0; 31059243Sobrien qlmasks[i] = ~(uint64_t)0; 31159243Sobrien} 31259243Sobrien 31359243Sobrien/* 31459243Sobrien * Get a free sbuf structure, if possible from the free list 31559243Sobrien */ 31659243Sobrienstatic sbuf_t * 31759243Sobrienallocsb(void) 31859243Sobrien{ 31959243Sobrien sbuf_t *sb; 32059243Sobrien 32159243Sobrien if ((sb = sbfrlst) != NULL) { 32259243Sobrien sbfrlst = sb->sb_nxt; 32359243Sobrien } else { 32459243Sobrien if ((sb = malloc(sizeof (sbuf_t))) == NULL) 32559243Sobrien nomem(); 32659243Sobrien } 32759243Sobrien (void)memset(sb, 0, sizeof (*sb)); 32859243Sobrien return (sb); 32959243Sobrien} 33059243Sobrien 33159243Sobrien/* 33259243Sobrien * Put a sbuf structure to the free list 33359243Sobrien */ 33459243Sobrienstatic void 33559243Sobrienfreesb(sbuf_t *sb) 33659243Sobrien{ 33759243Sobrien 33859243Sobrien sb->sb_nxt = sbfrlst; 33959243Sobrien sbfrlst = sb; 34059243Sobrien} 34159243Sobrien 34259243Sobrien/* 34359243Sobrien * Read a character and ensure that it is positive (except EOF). 34459243Sobrien * Increment line count(s) if necessary. 34559243Sobrien */ 34659243Sobrienstatic int 34759243Sobrieninpc(void) 34859243Sobrien{ 34959243Sobrien int c; 35059243Sobrien 35159243Sobrien if ((c = input()) != EOF && (c &= CHAR_MASK) == '\n') 35259243Sobrien incline(); 35359243Sobrien return (c); 35459243Sobrien} 35559243Sobrien 35659243Sobrienstatic int 35759243Sobrienhash(const char *s) 35859243Sobrien{ 35959243Sobrien u_int v; 36059243Sobrien const u_char *us; 36159243Sobrien 36259243Sobrien v = 0; 36359243Sobrien for (us = (const u_char *)s; *us != '\0'; us++) { 36459243Sobrien v = (v << sizeof (v)) + *us; 36559243Sobrien v ^= v >> (sizeof (v) * CHAR_BIT - sizeof (v)); 36659243Sobrien } 36759243Sobrien return (v % HSHSIZ1); 36859243Sobrien} 36959243Sobrien 37059243Sobrien/* 37159243Sobrien * Lex has found a letter followed by zero or more letters or digits. 37259243Sobrien * It looks for a symbol in the symbol table with the same name. This 37359243Sobrien * symbol must either be a keyword or a symbol of the type required by 37459243Sobrien * symtyp (label, member, tag, ...). 37559243Sobrien * 37659243Sobrien * If it is a keyword, the token is returned. In some cases it is described 37759243Sobrien * more deeply by data written to yylval. 37859243Sobrien * 37959243Sobrien * If it is a symbol, T_NAME is returned and the pointer to a sbuf struct 38059243Sobrien * is stored in yylval. This struct contains the name of the symbol, it's 38159243Sobrien * length and hash value. If there is already a symbol of the same name 38259243Sobrien * and type in the symbol table, the sbuf struct also contains a pointer 38359243Sobrien * to the symbol table entry. 38459243Sobrien */ 38559243Sobrienstatic int 38659243Sobrienname(void) 38759243Sobrien{ 38859243Sobrien char *s; 38959243Sobrien sbuf_t *sb; 39059243Sobrien sym_t *sym; 39159243Sobrien int tok; 39259243Sobrien 39359243Sobrien sb = allocsb(); 39459243Sobrien sb->sb_name = yytext; 39559243Sobrien sb->sb_len = yyleng; 39659243Sobrien sb->sb_hash = hash(yytext); 39759243Sobrien 39859243Sobrien if ((sym = search(sb)) != NULL && sym->s_keyw) { 39959243Sobrien freesb(sb); 40059243Sobrien return (keyw(sym)); 40159243Sobrien } 40259243Sobrien 40359243Sobrien sb->sb_sym = sym; 40459243Sobrien 40559243Sobrien if (sym != NULL) { 40659243Sobrien if (blklev < sym->s_blklev) 40759243Sobrien LERROR("name()"); 40859243Sobrien sb->sb_name = sym->s_name; 40959243Sobrien sb->sb_len = strlen(sym->s_name); 41059243Sobrien tok = sym->s_scl == TYPEDEF ? T_TYPENAME : T_NAME; 41159243Sobrien } else { 41259243Sobrien s = getblk(yyleng + 1); 41359243Sobrien (void)memcpy(s, yytext, yyleng + 1); 41459243Sobrien sb->sb_name = s; 41559243Sobrien sb->sb_len = yyleng; 41659243Sobrien tok = T_NAME; 41759243Sobrien } 41859243Sobrien 41959243Sobrien yylval.y_sb = sb; 42059243Sobrien return (tok); 42159243Sobrien} 42259243Sobrien 42359243Sobrienstatic sym_t * 42459243Sobriensearch(sbuf_t *sb) 42559243Sobrien{ 42659243Sobrien sym_t *sym; 42759243Sobrien 42859243Sobrien for (sym = symtab[sb->sb_hash]; sym != NULL; sym = sym->s_link) { 42959243Sobrien if (strcmp(sym->s_name, sb->sb_name) == 0) { 43059243Sobrien if (sym->s_keyw || sym->s_kind == symtyp) 43159243Sobrien return (sym); 43259243Sobrien } 43359243Sobrien } 43459243Sobrien 43559243Sobrien return (NULL); 43659243Sobrien} 43759243Sobrien 43859243Sobrienstatic int 43959243Sobrienkeyw(sym_t *sym) 44059243Sobrien{ 44159243Sobrien int t; 44259243Sobrien 44359243Sobrien if ((t = (int)sym->s_value.v_quad) == T_SCLASS) { 44459243Sobrien yylval.y_scl = sym->s_scl; 445131962Smp } else if (t == T_TYPE || t == T_SOU) { 446131962Smp yylval.y_tspec = sym->s_tspec; 44759243Sobrien } else if (t == T_QUAL) { 44859243Sobrien yylval.y_tqual = sym->s_tqual; 44959243Sobrien } 45059243Sobrien return (t); 45159243Sobrien} 45259243Sobrien 45359243Sobrien/* 45459243Sobrien * Convert a string representing an integer into internal representation. 45559243Sobrien * The value is returned in yylval. icon() (and yylex()) returns T_CON. 45659243Sobrien */ 45759243Sobrienstatic int 45859243Sobrienicon(int base) 45959243Sobrien{ 46059243Sobrien int l_suffix, u_suffix; 46159243Sobrien int len; 46259243Sobrien const char *cp; 46359243Sobrien char c, *eptr; 46459243Sobrien tspec_t typ; 46559243Sobrien u_long ul = 0; 46659243Sobrien uint64_t uq = 0; 46759243Sobrien int ansiu; 46859243Sobrien static tspec_t contypes[2][3] = { 46959243Sobrien { INT, LONG, QUAD }, 47059243Sobrien { UINT, ULONG, UQUAD } 47159243Sobrien }; 47259243Sobrien 47359243Sobrien cp = yytext; 47459243Sobrien len = yyleng; 47559243Sobrien 47659243Sobrien /* skip 0x */ 47759243Sobrien if (base == 16) { 47859243Sobrien cp += 2; 47959243Sobrien len -= 2; 48059243Sobrien } 48159243Sobrien 48259243Sobrien /* read suffixes */ 48359243Sobrien l_suffix = u_suffix = 0; 48459243Sobrien for ( ; ; ) { 48559243Sobrien if ((c = cp[len - 1]) == 'l' || c == 'L') { 48659243Sobrien l_suffix++; 48759243Sobrien } else if (c == 'u' || c == 'U') { 48859243Sobrien u_suffix++; 48959243Sobrien } else { 49059243Sobrien break; 49159243Sobrien } 49259243Sobrien len--; 49359243Sobrien } 49459243Sobrien if (l_suffix > 2 || u_suffix > 1) { 49559243Sobrien /* malformed integer constant */ 49659243Sobrien warning(251); 49759243Sobrien if (l_suffix > 2) 49859243Sobrien l_suffix = 2; 49959243Sobrien if (u_suffix > 1) 50059243Sobrien u_suffix = 1; 50159243Sobrien } 50259243Sobrien if (tflag && u_suffix != 0) { 50359243Sobrien /* suffix U is illegal in traditional C */ 50459243Sobrien warning(97); 50559243Sobrien } 50659243Sobrien typ = contypes[u_suffix][l_suffix]; 50759243Sobrien 50859243Sobrien errno = 0; 50959243Sobrien if (l_suffix < 2) { 51059243Sobrien ul = strtoul(cp, &eptr, base); 51159243Sobrien } else { 51259243Sobrien uq = strtouq(cp, &eptr, base); 51359243Sobrien } 51459243Sobrien if (eptr != cp + len) 51559243Sobrien LERROR("icon()"); 51659243Sobrien if (errno != 0) 51759243Sobrien /* integer constant out of range */ 51859243Sobrien warning(252); 51959243Sobrien 52059243Sobrien /* 52159243Sobrien * If the value is too big for the current type, we must choose 52259243Sobrien * another type. 52359243Sobrien */ 52459243Sobrien ansiu = 0; 52559243Sobrien switch (typ) { 52659243Sobrien case INT: 52759243Sobrien if (ul <= INT_MAX) { 52859243Sobrien /* ok */ 52959243Sobrien } else if (ul <= (unsigned)UINT_MAX && base != 10) { 53059243Sobrien typ = UINT; 53159243Sobrien#if INT_MAX != LONG_MAX 53259243Sobrien } else if (ul <= LONG_MAX) { 53359243Sobrien typ = LONG; 53459243Sobrien#endif 53559243Sobrien } else { 53659243Sobrien typ = ULONG; 53759243Sobrien } 53859243Sobrien if (typ == UINT || typ == ULONG) { 53959243Sobrien if (tflag) { 54059243Sobrien typ = LONG; 54159243Sobrien } else if (!sflag) { 54259243Sobrien /* 54359243Sobrien * Remember that the constant is unsigned 54459243Sobrien * only in ANSI C 54559243Sobrien */ 54659243Sobrien ansiu = 1; 54759243Sobrien } 54859243Sobrien } 54959243Sobrien break; 55059243Sobrien case UINT: 55159243Sobrien if (ul > (u_int)UINT_MAX) 55259243Sobrien typ = ULONG; 55359243Sobrien break; 55459243Sobrien case LONG: 55559243Sobrien if (ul > LONG_MAX && !tflag) { 55659243Sobrien typ = ULONG; 55759243Sobrien if (!sflag) 55859243Sobrien ansiu = 1; 55959243Sobrien } 56059243Sobrien break; 56159243Sobrien case QUAD: 56259243Sobrien if (uq > QUAD_MAX && !tflag) { 56359243Sobrien typ = UQUAD; 56459243Sobrien if (!sflag) 56559243Sobrien ansiu = 1; 56659243Sobrien } 56759243Sobrien break; 56859243Sobrien /* LINTED (enumeration values not handled in switch) */ 56959243Sobrien case STRUCT: 57059243Sobrien case VOID: 57159243Sobrien case LDOUBLE: 57259243Sobrien case FUNC: 57359243Sobrien case ARRAY: 57459243Sobrien case PTR: 57559243Sobrien case ENUM: 57659243Sobrien case UNION: 57759243Sobrien case SIGNED: 57859243Sobrien case NOTSPEC: 57959243Sobrien case DOUBLE: 58059243Sobrien case FLOAT: 58159243Sobrien case UQUAD: 58259243Sobrien case ULONG: 58359243Sobrien case USHORT: 58459243Sobrien case SHORT: 58559243Sobrien case UCHAR: 58659243Sobrien case SCHAR: 58759243Sobrien case CHAR: 58859243Sobrien case UNSIGN: 58959243Sobrien break; 59059243Sobrien } 59159243Sobrien 59259243Sobrien if (typ != QUAD && typ != UQUAD) { 59359243Sobrien if (isutyp(typ)) { 59459243Sobrien uq = ul; 59559243Sobrien } else { 59659243Sobrien uq = (int64_t)(long)ul; 59759243Sobrien } 59859243Sobrien } 59959243Sobrien 60059243Sobrien uq = (uint64_t)xsign((int64_t)uq, typ, -1); 60159243Sobrien 60259243Sobrien if ((yylval.y_val = calloc(1, sizeof(val_t))) == NULL) 60359243Sobrien nomem(); 60459243Sobrien yylval.y_val->v_tspec = typ; 60559243Sobrien yylval.y_val->v_ansiu = ansiu; 60659243Sobrien yylval.y_val->v_quad = (int64_t)uq; 60759243Sobrien 60859243Sobrien return (T_CON); 60959243Sobrien} 61059243Sobrien 61159243Sobrien/* 61259243Sobrien * Returns 1 if t is a signed type and the value is negative. 61359243Sobrien * 61459243Sobrien * len is the number of significant bits. If len is -1, len is set 61559243Sobrien * to the width of type t. 61659243Sobrien */ 61759243Sobrienint 61859243Sobriensign(int64_t q, tspec_t t, int len) 619{ 620 621 if (t == PTR || isutyp(t)) 622 return (0); 623 return (msb(q, t, len)); 624} 625 626int 627msb(int64_t q, tspec_t t, int len) 628{ 629 630 if (len <= 0) 631 len = size(t); 632 return ((q & qbmasks[len - 1]) != 0); 633} 634 635/* 636 * Extends the sign of q. 637 */ 638int64_t 639xsign(int64_t q, tspec_t t, int len) 640{ 641 642 if (len <= 0) 643 len = size(t); 644 645 if (t == PTR || isutyp(t) || !sign(q, t, len)) { 646 q &= qlmasks[len]; 647 } else { 648 q |= qumasks[len]; 649 } 650 return (q); 651} 652 653/* 654 * Convert a string representing a floating point value into its interal 655 * representation. Type and value are returned in yylval. fcon() 656 * (and yylex()) returns T_CON. 657 * XXX Currently it is not possible to convert constants of type 658 * long double which are greater than DBL_MAX. 659 */ 660static int 661fcon(void) 662{ 663 const char *cp; 664 int len; 665 tspec_t typ; 666 char c, *eptr; 667 double d; 668 float f = 0; 669 670 cp = yytext; 671 len = yyleng; 672 673 if ((c = cp[len - 1]) == 'f' || c == 'F') { 674 typ = FLOAT; 675 len--; 676 } else if (c == 'l' || c == 'L') { 677 typ = LDOUBLE; 678 len--; 679 } else { 680 typ = DOUBLE; 681 } 682 683 if (tflag && typ != DOUBLE) { 684 /* suffixes F and L are illegal in traditional C */ 685 warning(98); 686 } 687 688 errno = 0; 689 d = strtod(cp, &eptr); 690 if (eptr != cp + len) { 691 switch (*eptr) { 692 /* 693 * XXX: non-native non-current strtod() may not handle hex 694 * floats, ignore the rest if we find traces of hex float 695 * syntax... 696 */ 697 case 'p': 698 case 'P': 699 case 'x': 700 case 'X': 701 d = 0; 702 errno = 0; 703 break; 704 default: 705 LERROR("fcon()"); 706 } 707 } 708 if (errno != 0) 709 /* floating-point constant out of range */ 710 warning(248); 711 712 if (typ == FLOAT) { 713 f = (float)d; 714 if (!finite(f)) { 715 /* floating-point constant out of range */ 716 warning(248); 717 f = f > 0 ? FLT_MAX : -FLT_MAX; 718 } 719 } 720 721 if ((yylval.y_val = calloc(1, sizeof (val_t))) == NULL) 722 nomem(); 723 yylval.y_val->v_tspec = typ; 724 if (typ == FLOAT) { 725 yylval.y_val->v_ldbl = f; 726 } else { 727 yylval.y_val->v_ldbl = d; 728 } 729 730 return (T_CON); 731} 732 733static int 734operator(int t, op_t o) 735{ 736 737 yylval.y_op = o; 738 return (t); 739} 740 741/* 742 * Called if lex found a leading \'. 743 */ 744static int 745ccon(void) 746{ 747 int n, val, c; 748 char cv; 749 750 n = 0; 751 val = 0; 752 while ((c = getescc('\'')) >= 0) { 753 val = (val << CHAR_BIT) + c; 754 n++; 755 } 756 if (c == -2) { 757 /* unterminated character constant */ 758 error(253); 759 } else { 760 if (n > sizeof (int) || (n > 1 && (pflag || hflag))) { 761 /* too many characters in character constant */ 762 error(71); 763 } else if (n > 1) { 764 /* multi-character character constant */ 765 warning(294); 766 } else if (n == 0) { 767 /* empty character constant */ 768 error(73); 769 } 770 } 771 if (n == 1) { 772 cv = (char)val; 773 val = cv; 774 } 775 776 yylval.y_val = xcalloc(1, sizeof (val_t)); 777 yylval.y_val->v_tspec = INT; 778 yylval.y_val->v_quad = val; 779 780 return (T_CON); 781} 782 783/* 784 * Called if lex found a leading L\' 785 */ 786static int 787wccon(void) 788{ 789 static char buf[MB_LEN_MAX + 1]; 790 int i, c; 791 wchar_t wc; 792 793 i = 0; 794 while ((c = getescc('\'')) >= 0) { 795 if (i < MB_CUR_MAX) 796 buf[i] = (char)c; 797 i++; 798 } 799 800 wc = 0; 801 802 if (c == -2) { 803 /* unterminated character constant */ 804 error(253); 805 } else if (c == 0) { 806 /* empty character constant */ 807 error(73); 808 } else { 809 if (i > MB_CUR_MAX) { 810 i = MB_CUR_MAX; 811 /* too many characters in character constant */ 812 error(71); 813 } else { 814 buf[i] = '\0'; 815 (void)mbtowc(NULL, NULL, 0); 816 if (mbtowc(&wc, buf, MB_CUR_MAX) < 0) 817 /* invalid multibyte character */ 818 error(291); 819 } 820 } 821 822 if ((yylval.y_val = calloc(1, sizeof (val_t))) == NULL) 823 nomem(); 824 yylval.y_val->v_tspec = WCHAR; 825 yylval.y_val->v_quad = wc; 826 827 return (T_CON); 828} 829 830/* 831 * Read a character which is part of a character constant or of a string 832 * and handle escapes. 833 * 834 * The Argument is the character which delimits the character constant or 835 * string. 836 * 837 * Returns -1 if the end of the character constant or string is reached, 838 * -2 if the EOF is reached, and the character otherwise. 839 */ 840static int 841getescc(int d) 842{ 843 static int pbc = -1; 844 int n, c, v; 845 846 if (pbc == -1) { 847 c = inpc(); 848 } else { 849 c = pbc; 850 pbc = -1; 851 } 852 if (c == d) 853 return (-1); 854 switch (c) { 855 case '\n': 856 if (tflag) { 857 /* newline in string or char constant */ 858 error(254); 859 return (-2); 860 } 861 return (c); 862 case EOF: 863 return (-2); 864 case '\\': 865 switch (c = inpc()) { 866 case '"': 867 if (tflag && d == '\'') 868 /* \" inside character constant undef. ... */ 869 warning(262); 870 return ('"'); 871 case '\'': 872 return ('\''); 873 case '?': 874 if (tflag) 875 /* \? undefined in traditional C */ 876 warning(263); 877 return ('?'); 878 case '\\': 879 return ('\\'); 880 case 'a': 881 if (tflag) 882 /* \a undefined in traditional C */ 883 warning(81); 884 return ('\a'); 885 case 'b': 886 return ('\b'); 887 case 'f': 888 return ('\f'); 889 case 'n': 890 return ('\n'); 891 case 'r': 892 return ('\r'); 893 case 't': 894 return ('\t'); 895 case 'v': 896 if (tflag) 897 /* \v undefined in traditional C */ 898 warning(264); 899 return ('\v'); 900 case '8': case '9': 901 /* bad octal digit %c */ 902 warning(77, c); 903 /* FALLTHROUGH */ 904 case '0': case '1': case '2': case '3': 905 case '4': case '5': case '6': case '7': 906 n = 3; 907 v = 0; 908 do { 909 v = (v << 3) + (c - '0'); 910 c = inpc(); 911 } while (--n && isdigit(c) && (tflag || c <= '7')); 912 if (tflag && n > 0 && isdigit(c)) 913 /* bad octal digit %c */ 914 warning(77, c); 915 pbc = c; 916 if (v > UCHAR_MAX) { 917 /* character escape does not fit in char. */ 918 warning(76); 919 v &= CHAR_MASK; 920 } 921 return (v); 922 case 'x': 923 if (tflag) 924 /* \x undefined in traditional C */ 925 warning(82); 926 v = 0; 927 n = 0; 928 while ((c = inpc()) >= 0 && isxdigit(c)) { 929 c = isdigit(c) ? 930 c - '0' : toupper(c) - 'A' + 10; 931 v = (v << 4) + c; 932 if (n >= 0) { 933 if ((v & ~CHAR_MASK) != 0) { 934 /* overflow in hex escape */ 935 warning(75); 936 n = -1; 937 } else { 938 n++; 939 } 940 } 941 } 942 pbc = c; 943 if (n == 0) { 944 /* no hex digits follow \x */ 945 error(74); 946 } if (n == -1) { 947 v &= CHAR_MASK; 948 } 949 return (v); 950 case '\n': 951 return (getescc(d)); 952 case EOF: 953 return (-2); 954 default: 955 if (isprint(c)) { 956 /* dubious escape \%c */ 957 warning(79, c); 958 } else { 959 /* dubious escape \%o */ 960 warning(80, c); 961 } 962 } 963 } 964 return (c); 965} 966 967/* 968 * Called for preprocessor directives. Currently implemented are: 969 * # lineno 970 * # lineno "filename" 971 */ 972static void 973directive(void) 974{ 975 const char *cp, *fn; 976 char c, *eptr; 977 size_t fnl; 978 long ln; 979 static int first = 1; 980 981 /* Go to first non-whitespace after # */ 982 for (cp = yytext + 1; (c = *cp) == ' ' || c == '\t'; cp++) 983 continue; 984 985 if (!isdigit((unsigned char)c)) { 986 error: 987 /* undefined or invalid # directive */ 988 warning(255); 989 return; 990 } 991 ln = strtol(--cp, &eptr, 10); 992 if (cp == eptr) 993 goto error; 994 if ((c = *(cp = eptr)) != ' ' && c != '\t' && c != '\0') 995 goto error; 996 while ((c = *cp++) == ' ' || c == '\t') 997 continue; 998 if (c != '\0') { 999 if (c != '"') 1000 goto error; 1001 fn = cp; 1002 while ((c = *cp) != '"' && c != '\0') 1003 cp++; 1004 if (c != '"') 1005 goto error; 1006 if ((fnl = cp++ - fn) > PATH_MAX) 1007 goto error; 1008 while ((c = *cp++) == ' ' || c == '\t') 1009 continue; 1010#if 0 1011 if (c != '\0') 1012 warning("extra character(s) after directive"); 1013#endif 1014 1015 /* empty string means stdin */ 1016 if (fnl == 0) { 1017 fn = "{standard input}"; 1018 fnl = 16; /* strlen (fn) */ 1019 } 1020 curr_pos.p_file = fnnalloc(fn, fnl); 1021 /* 1022 * If this is the first directive, the name is the name 1023 * of the C source file as specified at the command line. 1024 * It is written to the output file. 1025 */ 1026 if (first) { 1027 csrc_pos.p_file = curr_pos.p_file; 1028 outsrc(curr_pos.p_file); 1029 first = 0; 1030 } 1031 } 1032 curr_pos.p_line = (int)ln - 1; 1033 curr_pos.p_uniq = 0; 1034 if (curr_pos.p_file == csrc_pos.p_file) { 1035 csrc_pos.p_line = (int)ln - 1; 1036 csrc_pos.p_uniq = 0; 1037 } 1038} 1039 1040/* 1041 * Handle lint comments. Following comments are currently understood: 1042 * ARGSUSEDn 1043 * BITFIELDTYPE 1044 * CONSTCOND CONSTANTCOND CONSTANTCONDITION 1045 * FALLTHRU FALLTHROUGH 1046 * LINTLIBRARY 1047 * LINTED NOSTRICT 1048 * LONGLONG 1049 * NOTREACHED 1050 * PRINTFLIKEn 1051 * PROTOLIB 1052 * SCANFLIKEn 1053 * VARARGSn 1054 * If one of this comments is recognized, the arguments, if any, are 1055 * parsed and a function which handles this comment is called. 1056 */ 1057static void 1058comment(void) 1059{ 1060 int c, lc; 1061 static struct { 1062 const char *keywd; 1063 int arg; 1064 void (*func)(int); 1065 } keywtab[] = { 1066 { "ARGSUSED", 1, argsused }, 1067 { "BITFIELDTYPE", 0, bitfieldtype }, 1068 { "CONSTCOND", 0, constcond }, 1069 { "CONSTANTCOND", 0, constcond }, 1070 { "CONSTANTCONDITION", 0, constcond }, 1071 { "FALLTHRU", 0, fallthru }, 1072 { "FALLTHROUGH", 0, fallthru }, 1073 { "LINTLIBRARY", 0, lintlib }, 1074 { "LINTED", 0, linted }, 1075 { "LONGLONG", 0, longlong }, 1076 { "NOSTRICT", 0, linted }, 1077 { "NOTREACHED", 0, notreach }, 1078 { "PRINTFLIKE", 1, printflike }, 1079 { "PROTOLIB", 1, protolib }, 1080 { "SCANFLIKE", 1, scanflike }, 1081 { "VARARGS", 1, varargs }, 1082 }; 1083 char keywd[32]; 1084 char arg[32]; 1085 int l, i, a; 1086 int eoc; 1087 1088 eoc = 0; 1089 1090 /* Skip white spaces after the start of the comment */ 1091 while ((c = inpc()) != EOF && isspace(c)) 1092 continue; 1093 1094 /* Read the potential keyword to keywd */ 1095 l = 0; 1096 while (c != EOF && isupper(c) && l < sizeof (keywd) - 1) { 1097 keywd[l++] = (char)c; 1098 c = inpc(); 1099 } 1100 keywd[l] = '\0'; 1101 1102 /* look for the keyword */ 1103 for (i = 0; i < sizeof (keywtab) / sizeof (keywtab[0]); i++) { 1104 if (strcmp(keywtab[i].keywd, keywd) == 0) 1105 break; 1106 } 1107 if (i == sizeof (keywtab) / sizeof (keywtab[0])) 1108 goto skip_rest; 1109 1110 /* skip white spaces after the keyword */ 1111 while (c != EOF && isspace(c)) 1112 c = inpc(); 1113 1114 /* read the argument, if the keyword accepts one and there is one */ 1115 l = 0; 1116 if (keywtab[i].arg) { 1117 while (c != EOF && isdigit(c) && l < sizeof (arg) - 1) { 1118 arg[l++] = (char)c; 1119 c = inpc(); 1120 } 1121 } 1122 arg[l] = '\0'; 1123 a = l != 0 ? atoi(arg) : -1; 1124 1125 /* skip white spaces after the argument */ 1126 while (c != EOF && isspace(c)) 1127 c = inpc(); 1128 1129 if (c != '*' || (c = inpc()) != '/') { 1130 if (keywtab[i].func != linted) 1131 /* extra characters in lint comment */ 1132 warning(257); 1133 } else { 1134 /* 1135 * remember that we have already found the end of the 1136 * comment 1137 */ 1138 eoc = 1; 1139 } 1140 1141 if (keywtab[i].func != NULL) 1142 (*keywtab[i].func)(a); 1143 1144 skip_rest: 1145 while (!eoc) { 1146 lc = c; 1147 if ((c = inpc()) == EOF) { 1148 /* unterminated comment */ 1149 error(256); 1150 break; 1151 } 1152 if (lc == '*' && c == '/') 1153 eoc = 1; 1154 } 1155} 1156 1157/* 1158 * Handle // style comments 1159 */ 1160static void 1161slashslashcomment(void) 1162{ 1163 int c; 1164 1165 if (!Sflag && !gflag) 1166 /* // comments only supported in C99 */ 1167 (void)gnuism(312, tflag ? "traditional" : "ANSI"); 1168 1169 while ((c = inpc()) != EOF && c != '\n') 1170 continue; 1171} 1172 1173/* 1174 * Clear flags for lint comments LINTED, LONGLONG and CONSTCOND. 1175 * clrwflgs() is called after function definitions and global and 1176 * local declarations and definitions. It is also called between 1177 * the controlling expression and the body of control statements 1178 * (if, switch, for, while). 1179 */ 1180void 1181clrwflgs(void) 1182{ 1183 1184 nowarn = 0; 1185 quadflg = 0; 1186 ccflg = 0; 1187} 1188 1189/* 1190 * Strings are stored in a dynamically alloceted buffer and passed 1191 * in yylval.y_xstrg to the parser. The parser or the routines called 1192 * by the parser are responsible for freeing this buffer. 1193 */ 1194static int 1195string(void) 1196{ 1197 u_char *s; 1198 int c; 1199 size_t len, max; 1200 strg_t *strg; 1201 1202 if ((s = malloc(max = 64)) == NULL) 1203 nomem(); 1204 1205 len = 0; 1206 while ((c = getescc('"')) >= 0) { 1207 /* +1 to reserve space for a trailing NUL character */ 1208 if (len + 1 == max) 1209 if ((s = realloc(s, max *= 2)) == NULL) 1210 nomem(); 1211 s[len++] = (char)c; 1212 } 1213 s[len] = '\0'; 1214 if (c == -2) 1215 /* unterminated string constant */ 1216 error(258); 1217 1218 if ((strg = calloc(1, sizeof (strg_t))) == NULL) 1219 nomem(); 1220 strg->st_tspec = CHAR; 1221 strg->st_len = len; 1222 strg->st_cp = s; 1223 1224 yylval.y_strg = strg; 1225 return (T_STRING); 1226} 1227 1228static int 1229wcstrg(void) 1230{ 1231 char *s; 1232 int c, i, n, wi; 1233 size_t len, max, wlen; 1234 wchar_t *ws; 1235 strg_t *strg; 1236 1237 if ((s = malloc(max = 64)) == NULL) 1238 nomem(); 1239 len = 0; 1240 while ((c = getescc('"')) >= 0) { 1241 /* +1 to save space for a trailing NUL character */ 1242 if (len + 1 >= max) 1243 if ((s = realloc(s, max *= 2)) == NULL) 1244 nomem(); 1245 s[len++] = (char)c; 1246 } 1247 s[len] = '\0'; 1248 if (c == -2) 1249 /* unterminated string constant */ 1250 error(258); 1251 1252 /* get length of wide character string */ 1253 (void)mblen(NULL, 0); 1254 for (i = 0, wlen = 0; i < len; i += n, wlen++) { 1255 if ((n = mblen(&s[i], MB_CUR_MAX)) == -1) { 1256 /* invalid multibyte character */ 1257 error(291); 1258 break; 1259 } 1260 if (n == 0) 1261 n = 1; 1262 } 1263 1264 if ((ws = malloc((wlen + 1) * sizeof (wchar_t))) == NULL) 1265 nomem(); 1266 1267 /* convert from multibyte to wide char */ 1268 (void)mbtowc(NULL, NULL, 0); 1269 for (i = 0, wi = 0; i < len; i += n, wi++) { 1270 if ((n = mbtowc(&ws[wi], &s[i], MB_CUR_MAX)) == -1) 1271 break; 1272 if (n == 0) 1273 n = 1; 1274 } 1275 ws[wi] = 0; 1276 free(s); 1277 1278 if ((strg = calloc(1, sizeof (strg_t))) == NULL) 1279 nomem(); 1280 strg->st_tspec = WCHAR; 1281 strg->st_len = wlen; 1282 strg->st_wcp = ws; 1283 1284 yylval.y_strg = strg; 1285 return (T_STRING); 1286} 1287 1288/* 1289 * As noted above the scanner does not create new symbol table entries 1290 * for symbols it cannot find in the symbol table. This is to avoid 1291 * putting undeclared symbols into the symbol table if a syntax error 1292 * occurs. 1293 * 1294 * getsym() is called as soon as it is probably ok to put the symbol to 1295 * the symbol table. This does not mean that it is not possible that 1296 * symbols are put to the symbol table which are than not completely 1297 * declared due to syntax errors. To avoid too many problems in this 1298 * case symbols get type int in getsym(). 1299 * 1300 * XXX calls to getsym() should be delayed until decl1*() is called 1301 */ 1302sym_t * 1303getsym(sbuf_t *sb) 1304{ 1305 dinfo_t *di; 1306 char *s; 1307 sym_t *sym; 1308 1309 sym = sb->sb_sym; 1310 1311 /* 1312 * During member declaration it is possible that name() looked 1313 * for symbols of type FVFT, although it should have looked for 1314 * symbols of type FTAG. Same can happen for labels. Both cases 1315 * are compensated here. 1316 */ 1317 if (symtyp == FMOS || symtyp == FLAB) { 1318 if (sym == NULL || sym->s_kind == FVFT) 1319 sym = search(sb); 1320 } 1321 1322 if (sym != NULL) { 1323 if (sym->s_kind != symtyp) 1324 LERROR("storesym()"); 1325 symtyp = FVFT; 1326 freesb(sb); 1327 return (sym); 1328 } 1329 1330 /* create a new symbol table entry */ 1331 1332 /* labels must always be allocated at level 1 (outhermost block) */ 1333 if (symtyp == FLAB) { 1334 sym = getlblk(1, sizeof (sym_t)); 1335 s = getlblk(1, sb->sb_len + 1); 1336 (void)memcpy(s, sb->sb_name, sb->sb_len + 1); 1337 sym->s_name = s; 1338 sym->s_blklev = 1; 1339 di = dcs; 1340 while (di->d_nxt != NULL && di->d_nxt->d_nxt != NULL) 1341 di = di->d_nxt; 1342 if (di->d_ctx != AUTO) 1343 LERROR("storesym()"); 1344 } else { 1345 sym = getblk(sizeof (sym_t)); 1346 sym->s_name = sb->sb_name; 1347 sym->s_blklev = blklev; 1348 di = dcs; 1349 } 1350 1351 UNIQUE_CURR_POS(sym->s_dpos); 1352 if ((sym->s_kind = symtyp) != FLAB) 1353 sym->s_type = gettyp(INT); 1354 1355 symtyp = FVFT; 1356 1357 if ((sym->s_link = symtab[sb->sb_hash]) != NULL) 1358 symtab[sb->sb_hash]->s_rlink = &sym->s_link; 1359 (symtab[sb->sb_hash] = sym)->s_rlink = &symtab[sb->sb_hash]; 1360 1361 *di->d_ldlsym = sym; 1362 di->d_ldlsym = &sym->s_dlnxt; 1363 1364 freesb(sb); 1365 return (sym); 1366} 1367 1368/* 1369 * Construct a temporary symbol. The symbol starts with a digit, so that 1370 * it is illegal. 1371 */ 1372sym_t * 1373mktempsym(type_t *t) 1374{ 1375 static int n = 0; 1376 int h; 1377 char *s = getlblk(blklev, 64); 1378 sym_t *sym = getblk(sizeof (sym_t)); 1379 1380 (void)snprintf(s, 64, "%.8d_tmp", n++); 1381 h = hash(s); 1382 1383 sym->s_name = s; 1384 sym->s_type = t; 1385 sym->s_blklev = blklev; 1386 sym->s_scl = AUTO; 1387 sym->s_kind = FVFT; 1388 sym->s_used = 1; 1389 sym->s_set = 1; 1390 1391 if ((sym->s_link = symtab[h]) != NULL) 1392 symtab[h]->s_rlink = &sym->s_link; 1393 (symtab[h] = sym)->s_rlink = &symtab[h]; 1394 1395 *dcs->d_ldlsym = sym; 1396 dcs->d_ldlsym = &sym->s_dlnxt; 1397 1398 return sym; 1399} 1400 1401/* 1402 * Remove a symbol forever from the symbol table. s_blklev 1403 * is set to -1 to avoid that the symbol will later be put 1404 * back to the symbol table. 1405 */ 1406void 1407rmsym(sym_t *sym) 1408{ 1409 1410 if ((*sym->s_rlink = sym->s_link) != NULL) 1411 sym->s_link->s_rlink = sym->s_rlink; 1412 sym->s_blklev = -1; 1413 sym->s_link = NULL; 1414} 1415 1416/* 1417 * Remove a list of symbols declared at one level from the symbol 1418 * table. 1419 */ 1420void 1421rmsyms(sym_t *syms) 1422{ 1423 sym_t *sym; 1424 1425 for (sym = syms; sym != NULL; sym = sym->s_dlnxt) { 1426 if (sym->s_blklev != -1) { 1427 if ((*sym->s_rlink = sym->s_link) != NULL) 1428 sym->s_link->s_rlink = sym->s_rlink; 1429 sym->s_link = NULL; 1430 sym->s_rlink = NULL; 1431 } 1432 } 1433} 1434 1435/* 1436 * Put a symbol into the symbol table 1437 */ 1438void 1439inssym(int bl, sym_t *sym) 1440{ 1441 int h; 1442 1443 h = hash(sym->s_name); 1444 if ((sym->s_link = symtab[h]) != NULL) 1445 symtab[h]->s_rlink = &sym->s_link; 1446 (symtab[h] = sym)->s_rlink = &symtab[h]; 1447 sym->s_blklev = bl; 1448 if (sym->s_link != NULL && sym->s_blklev < sym->s_link->s_blklev) 1449 LERROR("inssym()"); 1450} 1451 1452/* 1453 * Called at level 0 after syntax errors 1454 * Removes all symbols which are not declared at level 0 from the 1455 * symbol table. Also frees all memory which is not associated with 1456 * level 0. 1457 */ 1458void 1459cleanup(void) 1460{ 1461 sym_t *sym, *nsym; 1462 int i; 1463 1464 for (i = 0; i < HSHSIZ1; i++) { 1465 for (sym = symtab[i]; sym != NULL; sym = nsym) { 1466 nsym = sym->s_link; 1467 if (sym->s_blklev >= 1) { 1468 if ((*sym->s_rlink = nsym) != NULL) 1469 nsym->s_rlink = sym->s_rlink; 1470 } 1471 } 1472 } 1473 1474 for (i = mblklev; i > 0; i--) 1475 freelblk(i); 1476} 1477 1478/* 1479 * Create a new symbol with the name of an existing symbol. 1480 */ 1481sym_t * 1482pushdown(sym_t *sym) 1483{ 1484 int h; 1485 sym_t *nsym; 1486 1487 h = hash(sym->s_name); 1488 nsym = getblk(sizeof (sym_t)); 1489 if (sym->s_blklev > blklev) 1490 LERROR("pushdown()"); 1491 nsym->s_name = sym->s_name; 1492 UNIQUE_CURR_POS(nsym->s_dpos); 1493 nsym->s_kind = sym->s_kind; 1494 nsym->s_blklev = blklev; 1495 1496 if ((nsym->s_link = symtab[h]) != NULL) 1497 symtab[h]->s_rlink = &nsym->s_link; 1498 (symtab[h] = nsym)->s_rlink = &symtab[h]; 1499 1500 *dcs->d_ldlsym = nsym; 1501 dcs->d_ldlsym = &nsym->s_dlnxt; 1502 1503 return (nsym); 1504} 1505 1506/* 1507 * Free any dynamically allocated memory referenced by 1508 * the value stack or yylval. 1509 * The type of information in yylval is described by tok. 1510 */ 1511void 1512freeyyv(void *sp, int tok) 1513{ 1514 if (tok == T_NAME || tok == T_TYPENAME) { 1515 sbuf_t *sb = *(sbuf_t **)sp; 1516 freesb(sb); 1517 } else if (tok == T_CON) { 1518 val_t *val = *(val_t **)sp; 1519 free(val); 1520 } else if (tok == T_STRING) { 1521 strg_t *strg = *(strg_t **)sp; 1522 if (strg->st_tspec == CHAR) { 1523 free(strg->st_cp); 1524 } else if (strg->st_tspec == WCHAR) { 1525 free(strg->st_wcp); 1526 } else { 1527 LERROR("fryylv()"); 1528 } 1529 free(strg); 1530 } 1531} 1532