%{ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ %} %{ #pragma ident "%Z%%M% %I% %E% SMI" %} %Start A str sc reg comment %{ #include #include "awk.h" #include "y.tab.h" #undef input /* defeat lex */ #undef unput static void unput(int); static void unputstr(char *); extern YYSTYPE yylval; extern int infunc; off_t lineno = 1; int bracecnt = 0; int brackcnt = 0; int parencnt = 0; #define DEBUG #ifdef DEBUG # define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); } #else # define RET(x) return(x) #endif /* * The standards (SUSV2) requires that Record size be atleast LINE_MAX. * LINE_MAX is a standard variable defined in limits.h. * Though nawk is not standards compliant, we let RECSIZE * grow with LINE_MAX instead of the magic number 1024. */ #define CBUFLEN (3 * LINE_MAX) #define CADD cbuf[clen++] = yytext[0]; \ if (clen >= CBUFLEN-1) { \ ERROR "string/reg expr %.10s... too long", cbuf SYNTAX; \ BEGIN A; \ } static uchar cbuf[CBUFLEN]; static uchar *s; static int clen, cflag; %} A [a-zA-Z_] B [a-zA-Z0-9_] D [0-9] O [0-7] H [0-9a-fA-F] WS [ \t] %% switch (yybgin-yysvec-1) { /* witchcraft */ case 0: BEGIN A; break; case sc: BEGIN A; RET('}'); } \n { lineno++; RET(NL); } #.* { ; } /* strip comments */ {WS}+ { ; } ; { RET(';'); } "\\"\n { lineno++; } BEGIN { RET(XBEGIN); } END { RET(XEND); } func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); } return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); } "&&" { RET(AND); } "||" { RET(BOR); } "!" { RET(NOT); } "!=" { yylval.i = NE; RET(NE); } "~" { yylval.i = MATCH; RET(MATCHOP); } "!~" { yylval.i = NOTMATCH; RET(MATCHOP); } "<" { yylval.i = LT; RET(LT); } "<=" { yylval.i = LE; RET(LE); } "==" { yylval.i = EQ; RET(EQ); } ">=" { yylval.i = GE; RET(GE); } ">" { yylval.i = GT; RET(GT); } ">>" { yylval.i = APPEND; RET(APPEND); } "++" { yylval.i = INCR; RET(INCR); } "--" { yylval.i = DECR; RET(DECR); } "+=" { yylval.i = ADDEQ; RET(ASGNOP); } "-=" { yylval.i = SUBEQ; RET(ASGNOP); } "*=" { yylval.i = MULTEQ; RET(ASGNOP); } "/=" { yylval.i = DIVEQ; RET(ASGNOP); } "%=" { yylval.i = MODEQ; RET(ASGNOP); } "^=" { yylval.i = POWEQ; RET(ASGNOP); } "**=" { yylval.i = POWEQ; RET(ASGNOP); } "=" { yylval.i = ASSIGN; RET(ASGNOP); } "**" { RET(POWER); } "^" { RET(POWER); } "$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); } "$NF" { unputstr("(NF)"); return(INDIRECT); } "$"{A}{B}* { int c, n; c = input(); unput(c); if (c == '(' || c == '[' || infunc && (n=isarg(yytext+1)) >= 0) { unputstr(yytext+1); return(INDIRECT); } else { yylval.cp = setsymtab((uchar *)yytext+1, (uchar *)"",0.0,STR|NUM,symtab); RET(IVAR); } } "$" { RET(INDIRECT); } NF { yylval.cp = setsymtab((uchar *)yytext, (uchar *)"", 0.0, NUM, symtab); RET(VARNF); } ({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? { yylval.cp = setsymtab((uchar *)yytext, tostring((uchar *)yytext), atof(yytext), CON|NUM, symtab); RET(NUMBER); } while { RET(WHILE); } for { RET(FOR); } do { RET(DO); } if { RET(IF); } else { RET(ELSE); } next { RET(NEXT); } exit { RET(EXIT); } break { RET(BREAK); } continue { RET(CONTINUE); } print { yylval.i = PRINT; RET(PRINT); } printf { yylval.i = PRINTF; RET(PRINTF); } sprintf { yylval.i = SPRINTF; RET(SPRINTF); } split { yylval.i = SPLIT; RET(SPLIT); } substr { RET(SUBSTR); } sub { yylval.i = SUB; RET(SUB); } gsub { yylval.i = GSUB; RET(GSUB); } index { RET(INDEX); } match { RET(MATCHFCN); } in { RET(IN); } getline { RET(GETLINE); } close { RET(CLOSE); } delete { RET(DELETE); } length { yylval.i = FLENGTH; RET(BLTIN); } log { yylval.i = FLOG; RET(BLTIN); } int { yylval.i = FINT; RET(BLTIN); } exp { yylval.i = FEXP; RET(BLTIN); } sqrt { yylval.i = FSQRT; RET(BLTIN); } sin { yylval.i = FSIN; RET(BLTIN); } cos { yylval.i = FCOS; RET(BLTIN); } atan2 { yylval.i = FATAN; RET(BLTIN); } system { yylval.i = FSYSTEM; RET(BLTIN); } rand { yylval.i = FRAND; RET(BLTIN); } srand { yylval.i = FSRAND; RET(BLTIN); } toupper { yylval.i = FTOUPPER; RET(BLTIN); } tolower { yylval.i = FTOLOWER; RET(BLTIN); } {A}{B}* { int n, c; c = input(); unput(c); /* look for '(' */ if (c != '(' && infunc && (n=isarg(yytext)) >= 0) { yylval.i = n; RET(ARG); } else { yylval.cp = setsymtab((uchar *)yytext, (uchar *)"",0.0,STR|NUM,symtab); if (c == '(') { RET(CALL); } else { RET(VAR); } } } \" { BEGIN str; clen = 0; } "}" { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); } "]" { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); } ")" { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); } . { if (yytext[0] == '{') bracecnt++; else if (yytext[0] == '[') brackcnt++; else if (yytext[0] == '(') parencnt++; RET(yylval.i = yytext[0]); /* everything else */ } \\. { cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; } \n { ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } "/" { BEGIN A; cbuf[clen] = 0; yylval.s = tostring(cbuf); unput('/'); RET(REGEXPR); } . { CADD; } \" { BEGIN A; cbuf[clen] = 0; s = tostring(cbuf); cbuf[clen] = ' '; cbuf[++clen] = 0; yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab); RET(STRING); } \n { ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } "\\\"" { cbuf[clen++] = '"'; } "\\"n { cbuf[clen++] = '\n'; } "\\"t { cbuf[clen++] = '\t'; } "\\"f { cbuf[clen++] = '\f'; } "\\"r { cbuf[clen++] = '\r'; } "\\"b { cbuf[clen++] = '\b'; } "\\"v { cbuf[clen++] = '\v'; } /* these ANSIisms may not be known by */ "\\"a { cbuf[clen++] = '\007'; } /* your compiler. hence 007 for bell */ "\\\\" { cbuf[clen++] = '\\'; } "\\"({O}{O}{O}|{O}{O}|{O}) { int n; sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; } "\\"x({H}+) { int n; /* ANSI permits any number! */ sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; } "\\". { cbuf[clen++] = yytext[1]; } . { CADD; } %% void startreg() { BEGIN reg; clen = 0; } /* input() and unput() are transcriptions of the standard lex macros for input and output with additions for error message printing. God help us all if someone changes how lex works. */ uchar ebuf[300]; uchar *ep = ebuf; int input(void) { register int c; extern uchar *lexprog; if (yysptr > yysbuf) c = U(*--yysptr); else if (lexprog != NULL) /* awk '...' */ c = *lexprog++; else /* awk -f ... */ c = pgetc(); if (c == '\n') yylineno++; else if (c == EOF) c = 0; if (ep >= ebuf + sizeof ebuf) ep = ebuf; return *ep++ = c; } static void unput(int c) { yytchar = c; if (yytchar == '\n') yylineno--; *yysptr++ = yytchar; if (--ep < ebuf) ep = ebuf + sizeof(ebuf) - 1; } static void unputstr(char *s) { int i; for (i = strlen(s)-1; i >= 0; i--) unput(s[i]); }