bt_parse.y revision 1.42
1/*	$OpenBSD: bt_parse.y,v 1.42 2021/09/09 09:53:11 mpi Exp $	*/
2
3/*
4 * Copyright (c) 2019-2021 Martin Pieuchot <mpi@openbsd.org>
5 * Copyright (c) 2019 Tobias Heider <tobhe@openbsd.org>
6 * Copyright (c) 2015 Ted Unangst <tedu@openbsd.org>
7 *
8 * Permission to use, copy, modify, and distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 */
20
21/*
22 * B tracing language parser.
23 *
24 * The dialect of the language understood by this parser aims to be
25 * compatible with the one understood by bpftrace(8), see:
26 *
27 * https://github.com/iovisor/bpftrace/blob/master/docs/reference_guide.md
28 *
29 */
30
31%{
32#include <sys/queue.h>
33
34#include <assert.h>
35#include <ctype.h>
36#include <err.h>
37#include <limits.h>
38#include <stdarg.h>
39#include <stdint.h>
40#include <stdio.h>
41
42#include "bt_parser.h"
43
44/* Name for the default map @[], hopefully nobody will use this one ;) */
45#define UNNAMED_MAP	"___unnamed_map_doesnt_have_any_name"
46
47/* Number of rules to evaluate. */
48struct bt_ruleq		g_rules = TAILQ_HEAD_INITIALIZER(g_rules);
49
50/* Number of probes except BEGIN/END. */
51int		 	g_nprobes;
52
53/* List of global variables, including maps. */
54SLIST_HEAD(, bt_var)	 g_variables;
55
56/* List of local variables, cleaned for each new rule. */
57SLIST_HEAD(, bt_var)	l_variables;
58
59struct bt_arg 		g_nullba = BA_INITIALIZER(0, B_AT_LONG);
60struct bt_arg		g_maxba = BA_INITIALIZER(LONG_MAX, B_AT_LONG);
61
62struct bt_rule	*br_new(struct bt_probe *, struct bt_filter *,
63		     struct bt_stmt *);
64struct bt_probe	*bp_new(const char *, const char *, const char *, int32_t);
65struct bt_arg	*ba_append(struct bt_arg *, struct bt_arg *);
66struct bt_arg	*ba_op(enum bt_argtype, struct bt_arg *, struct bt_arg *);
67struct bt_stmt	*bs_new(enum bt_action, struct bt_arg *, struct bt_var *);
68struct bt_stmt	*bs_append(struct bt_stmt *, struct bt_stmt *);
69
70struct bt_var	*bg_lookup(const char *);
71struct bt_stmt	*bg_store(const char *, struct bt_arg *);
72struct bt_arg	*bg_find(const char *);
73struct bt_var	*bg_get(const char *);
74
75struct bt_var	*bl_lookup(const char *);
76struct bt_stmt	*bl_store(const char *, struct bt_arg *);
77struct bt_arg	*bl_find(const char *);
78
79struct bt_arg	*bm_find(const char *, struct bt_arg *);
80struct bt_stmt	*bm_insert(const char *, struct bt_arg *, struct bt_arg *);
81struct bt_stmt	*bm_op(enum bt_action, struct bt_arg *, struct bt_arg *);
82
83struct bt_stmt	*bh_inc(const char *, struct bt_arg *, struct bt_arg *);
84
85/*
86 * Lexer
87 */
88const char	*pbuf;
89size_t		 plen;
90size_t		 pindex;
91int		 perrors = 0;
92
93typedef struct {
94	union {
95		long			 number;
96		int			 i;
97		const char		*string;
98		struct bt_probe		*probe;
99		struct bt_filter	*filter;
100		struct bt_stmt		*stmt;
101		struct bt_arg		*arg;
102	} v;
103	const char			*filename;
104	int				 lineno;
105	int				 colno;
106} yystype;
107#define YYSTYPE yystype
108
109static void	 yyerror(const char *, ...);
110static int	 yylex(void);
111
112static int pflag;
113%}
114
115%token	<v.i>		ERROR ENDFILT
116%token	<v.i>		OP_EQ OP_NE OP_LE OP_LT OP_GE OP_GT OP_LAND OP_LOR
117/* Builtins */
118%token	<v.i>		BUILTIN BEGIN END HZ IF
119/* Functions and Map operators */
120%token  <v.i>		F_DELETE F_PRINT
121%token	<v.i>		MFUNC FUNC0 FUNC1 FUNCN OP1 OP4 MOP0 MOP1
122%token	<v.string>	STRING CSTRING
123%token	<v.number>	NUMBER
124
125%type	<v.string>	gvar lvar
126%type	<v.i>		beginend
127%type	<v.probe>	plist probe pname
128%type	<v.filter>	filter
129%type	<v.stmt>	action stmt stmtblck stmtlist block
130%type	<v.arg>		pat vargs mentry mpat pargs staticv
131%type	<v.arg>		expr term fterm variable factor
132%%
133
134grammar	: /* empty */
135	| grammar '\n'
136	| grammar rule
137	| grammar error
138	;
139
140rule	: plist filter action		{ br_new($1, $2, $3); }
141	;
142
143beginend: BEGIN	| END ;
144
145plist	: plist ',' probe		{ $$ = bp_append($1, $3); }
146	| probe
147	;
148
149probe	: { pflag = 1; } pname		{ $$ = $2; pflag = 0; }
150	| beginend			{ $$ = bp_new(NULL, NULL, NULL, $1); }
151	;
152
153pname	: STRING ':' STRING ':' STRING	{ $$ = bp_new($1, $3, $5, 0); }
154	| STRING ':' HZ ':' NUMBER	{ $$ = bp_new($1, "hz", NULL, $5); }
155	;
156
157staticv	: '$' NUMBER			{ $$ = get_varg($2); }
158	| '$' '#'			{ $$ = get_nargs(); }
159	;
160
161gvar	: '@' STRING			{ $$ = $2; }
162	| '@'				{ $$ = UNNAMED_MAP; }
163	;
164
165lvar	: '$' STRING			{ $$ = $2; }
166	;
167
168mentry	: gvar '[' vargs ']'		{ $$ = bm_find($1, $3); }
169	;
170
171mpat	: MOP0 '(' ')'			{ $$ = ba_new(NULL, $1); }
172	| MOP1 '(' pat ')'		{ $$ = ba_new($3, $1); }
173	| pat
174	;
175
176pat	: CSTRING			{ $$ = ba_new($1, B_AT_STR); }
177	| expr
178	;
179
180filter	: /* empty */			{ $$ = NULL; }
181	| '/' expr ENDFILT		{ $$ = bc_new(NULL, B_AT_OP_NE, $2); }
182	;
183
184/*
185 * Give higher precedence to:
186 *  1. && and ||
187 *  2. ==, !=, <<, <, >=, >, +, =, &, ^, |
188 *  3. * and /
189 */
190expr	: expr OP_LAND term	{ $$ = ba_op(B_AT_OP_LAND, $1, $3); }
191	| expr OP_LOR term	{ $$ = ba_op(B_AT_OP_LOR, $1, $3); }
192	| term
193	;
194
195term	: term OP_EQ fterm	{ $$ = ba_op(B_AT_OP_EQ, $1, $3); }
196	| term OP_NE fterm	{ $$ = ba_op(B_AT_OP_NE, $1, $3); }
197	| term OP_LE fterm	{ $$ = ba_op(B_AT_OP_LE, $1, $3); }
198	| term OP_LT fterm	{ $$ = ba_op(B_AT_OP_LT, $1, $3); }
199	| term OP_GE fterm	{ $$ = ba_op(B_AT_OP_GE, $1, $3); }
200	| term OP_GT fterm	{ $$ = ba_op(B_AT_OP_GT, $1, $3); }
201	| term '+' fterm	{ $$ = ba_op(B_AT_OP_PLUS, $1, $3); }
202	| term '-' fterm	{ $$ = ba_op(B_AT_OP_MINUS, $1, $3); }
203	| term '&' fterm	{ $$ = ba_op(B_AT_OP_BAND, $1, $3); }
204	| term '^' fterm	{ $$ = ba_op(B_AT_OP_XOR, $1, $3); }
205	| term '|' fterm	{ $$ = ba_op(B_AT_OP_BOR, $1, $3); }
206	| fterm
207	;
208
209fterm	: fterm '*' factor	{ $$ = ba_op(B_AT_OP_MULT, $1, $3); }
210	| fterm '/' factor	{ $$ = ba_op(B_AT_OP_DIVIDE, $1, $3); }
211	| factor
212	;
213
214variable: lvar			{ $$ = bl_find($1); }
215	| gvar			{ $$ = bg_find($1); }
216	;
217
218factor : '(' expr ')'		{ $$ = $2; }
219	| NUMBER		{ $$ = ba_new($1, B_AT_LONG); }
220	| BUILTIN		{ $$ = ba_new(NULL, $1); }
221	| staticv
222	| variable
223	| mentry
224	;
225
226
227vargs	: pat
228	| vargs ',' pat			{ $$ = ba_append($1, $3); }
229	;
230
231pargs	: expr
232	| gvar ',' pat			{ $$ = ba_append(bg_find($1), $3); }
233	;
234
235NL	: /* empty */
236	| '\n'
237	;
238
239stmt	: ';' NL			{ $$ = NULL; }
240	| gvar '=' pat			{ $$ = bg_store($1, $3); }
241	| lvar '=' pat			{ $$ = bl_store($1, $3); }
242	| gvar '[' vargs ']' '=' mpat	{ $$ = bm_insert($1, $3, $6); }
243	| FUNCN '(' vargs ')'		{ $$ = bs_new($1, $3, NULL); }
244	| FUNC1 '(' pat ')'		{ $$ = bs_new($1, $3, NULL); }
245	| MFUNC '(' variable ')'	{ $$ = bs_new($1, $3, NULL); }
246	| FUNC0 '(' ')'			{ $$ = bs_new($1, NULL, NULL); }
247	| F_DELETE '(' mentry ')'	{ $$ = bm_op($1, $3, NULL); }
248	| F_PRINT '(' pargs ')'		{ $$ = bs_new($1, $3, NULL); }
249	| gvar '=' OP1 '(' pat ')'	{ $$ = bh_inc($1, $5, NULL); }
250	| gvar '=' OP4 '(' pat ',' vargs ')'	{ $$ = bh_inc($1, $5, $7); }
251	;
252
253stmtblck: IF '(' expr ')' block			{ $$ = bt_new($3, $5); }
254	;
255
256stmtlist: stmtlist stmtblck		{ $$ = bs_append($1, $2); }
257	| stmtlist stmt			{ $$ = bs_append($1, $2); }
258	| stmtblck
259	| stmt
260	;
261
262block	: '{' stmt ';' '}'			{ $$ = $2; }
263	| stmt ';'
264	;
265
266action	: '{' stmtlist '}'		{ $$ = $2; }
267	;
268
269%%
270
271struct bt_arg*
272get_varg(int index)
273{
274	extern int nargs;
275	extern char **vargs;
276	const char *errstr = NULL;
277	long val;
278
279	if (0 < index && index <= nargs) {
280		val = (long)strtonum(vargs[index-1], LONG_MIN, LONG_MAX,
281		    &errstr);
282		if (errstr == NULL)
283			return ba_new(val, B_AT_LONG);
284		return ba_new(vargs[index-1], B_AT_STR);
285	}
286
287	return ba_new(0L, B_AT_NIL);
288}
289
290struct bt_arg*
291get_nargs(void)
292{
293	extern int nargs;
294
295	return ba_new((long) nargs, B_AT_LONG);
296}
297
298/* Create a new rule, representing  "probe / filter / { action }" */
299struct bt_rule *
300br_new(struct bt_probe *probe, struct bt_filter *filter, struct bt_stmt *head)
301{
302	struct bt_rule *br;
303
304	br = calloc(1, sizeof(*br));
305	if (br == NULL)
306		err(1, "bt_rule: calloc");
307	/* SLIST_INSERT_HEAD() nullify the next pointer. */
308	SLIST_FIRST(&br->br_probes) = probe;
309	br->br_filter = filter;
310	/* SLIST_INSERT_HEAD() nullify the next pointer. */
311	SLIST_FIRST(&br->br_action) = head;
312
313	SLIST_FIRST(&br->br_variables) = SLIST_FIRST(&l_variables);
314	SLIST_INIT(&l_variables);
315
316	do {
317		if (probe->bp_type != B_PT_PROBE)
318			continue;
319		g_nprobes++;
320	} while ((probe = SLIST_NEXT(probe, bp_next)) != NULL);
321
322	TAILQ_INSERT_TAIL(&g_rules, br, br_next);
323
324	return br;
325}
326
327/* Create a new condition */
328struct bt_filter *
329bc_new(struct bt_arg *term, enum bt_argtype op, struct bt_arg *ba)
330{
331	struct bt_filter *bf;
332
333	bf = calloc(1, sizeof(*bf));
334	if (bf == NULL)
335		err(1, "bt_filter: calloc");
336
337	bf->bf_condition = bs_new(B_AC_TEST, ba_op(op, term, ba), NULL);
338
339	return bf;
340}
341
342/* Create a new if/else test */
343struct bt_stmt *
344bt_new(struct bt_arg *ba, struct bt_stmt *condbs)
345{
346	struct bt_arg *bop;
347
348	bop = ba_op(B_AT_OP_NE, NULL, ba);
349
350	return bs_new(B_AC_TEST, bop, (struct bt_var *)condbs);
351
352}
353/* Create a new probe */
354struct bt_probe *
355bp_new(const char *prov, const char *func, const char *name, int32_t rate)
356{
357	struct bt_probe *bp;
358	enum bt_ptype ptype;
359
360	if (rate < 0 || rate > INT32_MAX)
361		errx(1, "only positive values permitted");
362
363	if (prov == NULL && func == NULL && name == NULL)
364		ptype = rate; /* BEGIN or END */
365	else
366		ptype = B_PT_PROBE;
367
368	bp = calloc(1, sizeof(*bp));
369	if (bp == NULL)
370		err(1, "bt_probe: calloc");
371	bp->bp_prov = prov;
372	bp->bp_func = func;
373	bp->bp_name = name;
374	bp->bp_rate = rate;
375	bp->bp_type = ptype;
376
377	return bp;
378}
379
380/*
381 * Link two probes together, to build a probe list attached to
382 * a single action.
383 */
384struct bt_probe *
385bp_append(struct bt_probe *bp0, struct bt_probe *bp1)
386{
387	struct bt_probe *bp = bp0;
388
389	assert(bp1 != NULL);
390
391	if (bp0 == NULL)
392		return bp1;
393
394	while (SLIST_NEXT(bp, bp_next) != NULL)
395		bp = SLIST_NEXT(bp, bp_next);
396
397	SLIST_INSERT_AFTER(bp, bp1, bp_next);
398
399	return bp0;
400}
401
402/* Create a new argument */
403struct bt_arg *
404ba_new0(void *val, enum bt_argtype type)
405{
406	struct bt_arg *ba;
407
408	ba = calloc(1, sizeof(*ba));
409	if (ba == NULL)
410		err(1, "bt_arg: calloc");
411	ba->ba_value = val;
412	ba->ba_type = type;
413
414	return ba;
415}
416
417/*
418 * Link two arguments together, to build an argument list used in
419 * function calls.
420 */
421struct bt_arg *
422ba_append(struct bt_arg *da0, struct bt_arg *da1)
423{
424	struct bt_arg *ba = da0;
425
426	assert(da1 != NULL);
427
428	if (da0 == NULL)
429		return da1;
430
431	while (SLIST_NEXT(ba, ba_next) != NULL)
432		ba = SLIST_NEXT(ba, ba_next);
433
434	SLIST_INSERT_AFTER(ba, da1, ba_next);
435
436	return da0;
437}
438
439/* Create an operator argument */
440struct bt_arg *
441ba_op(enum bt_argtype op, struct bt_arg *da0, struct bt_arg *da1)
442{
443	return ba_new(ba_append(da0, da1), op);
444}
445
446/* Create a new statement: function call or assignment. */
447struct bt_stmt *
448bs_new(enum bt_action act, struct bt_arg *head, struct bt_var *var)
449{
450	struct bt_stmt *bs;
451
452	bs = calloc(1, sizeof(*bs));
453	if (bs == NULL)
454		err(1, "bt_stmt: calloc");
455	bs->bs_act = act;
456	bs->bs_var = var;
457	/* SLIST_INSERT_HEAD() nullify the next pointer. */
458	SLIST_FIRST(&bs->bs_args) = head;
459
460	return bs;
461}
462
463/* Link two statements together, to build an 'action'. */
464struct bt_stmt *
465bs_append(struct bt_stmt *ds0, struct bt_stmt *ds1)
466{
467	struct bt_stmt *bs = ds0;
468
469	if (ds0 == NULL)
470		return ds1;
471
472	if (ds1 == NULL)
473		return ds0;
474
475	while (SLIST_NEXT(bs, bs_next) != NULL)
476		bs = SLIST_NEXT(bs, bs_next);
477
478	SLIST_INSERT_AFTER(bs, ds1, bs_next);
479
480	return ds0;
481}
482
483const char *
484bv_name(struct bt_var *bv)
485{
486	if (strncmp(bv->bv_name, UNNAMED_MAP, strlen(UNNAMED_MAP)) == 0)
487		return "";
488	return bv->bv_name;
489}
490
491/* Allocate a variable. */
492struct bt_var *
493bv_new(const char *vname)
494{
495	struct bt_var *bv;
496
497	bv = calloc(1, sizeof(*bv));
498	if (bv == NULL)
499		err(1, "bt_var: calloc");
500	bv->bv_name = vname;
501
502	return bv;
503}
504
505/* Return the global variable corresponding to `vname'. */
506struct bt_var *
507bg_lookup(const char *vname)
508{
509	struct bt_var *bv;
510
511	SLIST_FOREACH(bv, &g_variables, bv_next) {
512		if (strcmp(vname, bv->bv_name) == 0)
513			break;
514	}
515
516	return bv;
517}
518
519/* Find or allocate a global variable corresponding to `vname' */
520struct bt_var *
521bg_get(const char *vname)
522{
523	struct bt_var *bv;
524
525	bv = bg_lookup(vname);
526	if (bv == NULL) {
527		bv = bv_new(vname);
528		SLIST_INSERT_HEAD(&g_variables, bv, bv_next);
529	}
530
531	return bv;
532}
533
534/* Create an "argument" that points to an existing untyped variable. */
535struct bt_arg *
536bg_find(const char *vname)
537{
538	struct bt_var *bv;
539
540	bv = bg_lookup(vname);
541	if (bv == NULL)
542		yyerror("variable '%s' accessed before being set", vname);
543
544	return ba_new(bv, B_AT_VAR);
545}
546
547/* Create a 'store' statement to assign a value to a global variable. */
548struct bt_stmt *
549bg_store(const char *vname, struct bt_arg *vval)
550{
551	return bs_new(B_AC_STORE, vval, bg_get(vname));
552}
553
554/* Return the local variable corresponding to `vname'. */
555struct bt_var *
556bl_lookup(const char *vname)
557{
558	struct bt_var *bv;
559
560	SLIST_FOREACH(bv, &l_variables, bv_next) {
561		if (strcmp(vname, bv->bv_name) == 0)
562			break;
563	}
564
565	return bv;
566}
567
568/* Find or create a local variable corresponding to `vname' */
569struct bt_arg *
570bl_find(const char *vname)
571{
572	struct bt_var *bv;
573
574	bv = bl_lookup(vname);
575	if (bv == NULL) {
576		bv = bv_new(vname);
577		SLIST_INSERT_HEAD(&l_variables, bv, bv_next);
578	}
579
580	return ba_new(bv, B_AT_VAR);
581}
582
583/* Create a 'store' statement to assign a value to a local variable. */
584struct bt_stmt *
585bl_store(const char *vname, struct bt_arg *vval)
586{
587	struct bt_var *bv;
588
589	bv = bl_lookup(vname);
590	if (bv == NULL) {
591		bv = bv_new(vname);
592		SLIST_INSERT_HEAD(&l_variables, bv, bv_next);
593	}
594
595	return bs_new(B_AC_STORE, vval, bv);
596}
597
598struct bt_stmt *
599bm_op(enum bt_action mact, struct bt_arg *ba, struct bt_arg *mval)
600{
601	return bs_new(mact, ba, (struct bt_var *)mval);
602}
603
604/* Create a 'map store' statement to assign a value to a map entry. */
605struct bt_stmt *
606bm_insert(const char *mname, struct bt_arg *mkey, struct bt_arg *mval)
607{
608	struct bt_arg *ba;
609
610	ba = ba_new(bg_get(mname), B_AT_MAP);
611	ba->ba_key = mkey;
612
613	return bs_new(B_AC_INSERT, ba, (struct bt_var *)mval);
614}
615
616/* Create an argument that points to a variable and attach a key to it. */
617struct bt_arg *
618bm_find(const char *vname, struct bt_arg *mkey)
619{
620	struct bt_var *bv;
621	struct bt_arg *ba;
622
623	bv = bg_lookup(vname);
624	if (bv == NULL)
625		yyerror("variable '%s' accessed before being set", vname);
626
627	ba = ba_new(bv, B_AT_MAP);
628	ba->ba_key = mkey;
629	return ba;
630}
631
632/*
633 * Histograms implemented using associative arrays (maps).  In the case
634 * of linear histograms `ba_key' points to a list of (min, max, step)
635 * necessary to "bucketize" any value.
636 */
637struct bt_stmt *
638bh_inc(const char *hname, struct bt_arg *hval, struct bt_arg *hrange)
639{
640	struct bt_arg *ba;
641
642	if (hrange == NULL) {
643		/* Power-of-2 histogram */
644	} else {
645		long min = 0, max;
646		int count = 0;
647
648		/* Linear histogram */
649		for (ba = hrange; ba != NULL; ba = SLIST_NEXT(ba, ba_next)) {
650			if (++count > 3)
651				yyerror("too many arguments");
652			if (ba->ba_type != B_AT_LONG)
653				yyerror("type invalid");
654
655			switch (count) {
656			case 1:
657				min = (long)ba->ba_value;
658				if (min >= 0)
659					break;
660				yyerror("negative minium");
661			case 2:
662				max = (long)ba->ba_value;
663				if (max > min)
664					break;
665				yyerror("maximum smaller than minium (%d < %d)",
666				    max,  min);
667			case 3:
668				break;
669			default:
670				assert(0);
671			}
672		}
673		if (count < 3)
674			yyerror("%d missing arguments", 3 - count);
675	}
676
677	ba = ba_new(bg_get(hname), B_AT_HIST);
678	ba->ba_key = hrange;
679	return bs_new(B_AC_BUCKETIZE, ba, (struct bt_var *)hval);
680}
681
682struct keyword {
683	const char	*word;
684	int		 token;
685	int		 type;
686};
687
688int
689kw_cmp(const void *str, const void *xkw)
690{
691	return (strcmp(str, ((const struct keyword *)xkw)->word));
692}
693
694struct keyword *
695lookup(char *s)
696{
697	static const struct keyword kws[] = {
698		{ "BEGIN",	BEGIN,		B_PT_BEGIN },
699		{ "END",	END,		B_PT_END },
700		{ "arg0",	BUILTIN,	B_AT_BI_ARG0 },
701		{ "arg1",	BUILTIN,	B_AT_BI_ARG1 },
702		{ "arg2",	BUILTIN,	B_AT_BI_ARG2 },
703		{ "arg3",	BUILTIN,	B_AT_BI_ARG3 },
704		{ "arg4",	BUILTIN,	B_AT_BI_ARG4 },
705		{ "arg5",	BUILTIN,	B_AT_BI_ARG5 },
706		{ "arg6",	BUILTIN,	B_AT_BI_ARG6 },
707		{ "arg7",	BUILTIN,	B_AT_BI_ARG7 },
708		{ "arg8",	BUILTIN,	B_AT_BI_ARG8 },
709		{ "arg9",	BUILTIN,	B_AT_BI_ARG9 },
710		{ "clear",	MFUNC,		B_AC_CLEAR },
711		{ "comm",	BUILTIN,	B_AT_BI_COMM },
712		{ "count",	MOP0, 		B_AT_MF_COUNT },
713		{ "cpu",	BUILTIN,	B_AT_BI_CPU },
714		{ "delete",	F_DELETE,	B_AC_DELETE },
715		{ "exit",	FUNC0,		B_AC_EXIT },
716		{ "hist",	OP1,		0 },
717		{ "hz",		HZ,		0 },
718		{ "if",		IF,		0 },
719		{ "kstack",	BUILTIN,	B_AT_BI_KSTACK },
720		{ "lhist",	OP4,		0 },
721		{ "max",	MOP1,		B_AT_MF_MAX },
722		{ "min",	MOP1,		B_AT_MF_MIN },
723		{ "nsecs",	BUILTIN,	B_AT_BI_NSECS },
724		{ "pid",	BUILTIN,	B_AT_BI_PID },
725		{ "print",	F_PRINT,	B_AC_PRINT },
726		{ "printf",	FUNCN,		B_AC_PRINTF },
727		{ "retval",	BUILTIN,	B_AT_BI_RETVAL },
728		{ "sum",	MOP1,		B_AT_MF_SUM },
729		{ "tid",	BUILTIN,	B_AT_BI_TID },
730		{ "time",	FUNC1,		B_AC_TIME },
731		{ "ustack",	BUILTIN,	B_AT_BI_USTACK },
732		{ "zero",	MFUNC,		B_AC_ZERO },
733	};
734
735	return bsearch(s, kws, nitems(kws), sizeof(kws[0]), kw_cmp);
736}
737
738int
739peek(void)
740{
741	if (pbuf != NULL) {
742		if (pindex < plen)
743			return pbuf[pindex];
744	}
745	return EOF;
746}
747
748int
749lgetc(void)
750{
751	if (pbuf != NULL) {
752		if (pindex < plen) {
753			yylval.colno++;
754			return pbuf[pindex++];
755		}
756	}
757	return EOF;
758}
759
760void
761lungetc(void)
762{
763	if (pbuf != NULL && pindex > 0) {
764		yylval.colno--;
765		pindex--;
766	}
767}
768
769int
770yylex(void)
771{
772	unsigned char	 buf[1024];
773	unsigned char	*ebuf, *p, *str;
774	int		 c;
775
776	ebuf = buf + sizeof(buf);
777	p = buf;
778
779again:
780	/* skip whitespaces */
781	for (c = lgetc(); isspace(c); c = lgetc()) {
782		if (c == '\n') {
783			yylval.lineno++;
784			yylval.colno = 0;
785		}
786	}
787
788	/* skip single line comments and shell magic */
789	if ((c == '/' && peek() == '/') ||
790	    (yylval.lineno == 1 && yylval.colno == 1 && c == '#' &&
791	     peek() == '!')) {
792		for (c = lgetc(); c != EOF; c = lgetc()) {
793			if (c == '\n') {
794				yylval.lineno++;
795				yylval.colno = 0;
796				goto again;
797			}
798		}
799	}
800
801	/* skip multi line comments */
802	if (c == '/' && peek() == '*') {
803		int pc;
804
805		for (pc = 0, c = lgetc(); c != EOF; c = lgetc()) {
806			if (pc == '*' && c == '/')
807				goto again;
808			else if (c == '\n')
809				yylval.lineno++;
810			pc = c;
811		}
812	}
813
814	switch (c) {
815	case '!':
816	case '=':
817		if (peek() == '=') {
818			lgetc();
819			return (c == '=') ? OP_EQ : OP_NE;
820		}
821		return c;
822	case '<':
823		if (peek() == '=') {
824			lgetc();
825			return OP_LE;
826		}
827		return OP_LT;
828	case '>':
829		if (peek() == '=') {
830			lgetc();
831			return OP_GE;
832		}
833		return OP_GT;
834	case '&':
835		if (peek() == '&') {
836			lgetc();
837			return OP_LAND;
838		}
839		return c;
840	case '|':
841		if (peek() == '|') {
842			lgetc();
843			return OP_LOR;
844		}
845		return c;
846	case '/':
847		if (peek() == '{' || peek() == '/' || peek() == '\n') {
848			return ENDFILT;
849		}
850		/* FALLTHROUGH */
851	case ',':
852	case '(':
853	case ')':
854	case '{':
855	case '}':
856	case ':':
857	case ';':
858		return c;
859	case EOF:
860		return 0;
861	case '"':
862		/* parse C-like string */
863		while ((c = lgetc()) != EOF && c != '"') {
864			if (c == '\\') {
865				c = lgetc();
866				switch (c) {
867				case '\\':	c = '\\';	break;
868				case '\'':	c = '\'';	break;
869				case '"':	c = '"';	break;
870				case 'a':	c = '\a';	break;
871				case 'b':	c = '\b';	break;
872				case 'e':	c = 033;	break;
873				case 'f':	c = '\f';	break;
874				case 'n':	c = '\n';	break;
875				case 'r':	c = '\r';	break;
876				case 't':	c = '\t';	break;
877				case 'v':	c = '\v';	break;
878				default:
879					yyerror("'%c' unsuported escape", c);
880					return ERROR;
881				}
882			}
883			*p++ = c;
884			if (p == ebuf) {
885				yyerror("too long line");
886				return ERROR;
887			}
888		}
889		if (c == EOF) {
890			yyerror("\"%s\" invalid EOF", buf);
891			return ERROR;
892		}
893		*p++ = '\0';
894		if ((str = strdup(buf)) == NULL)
895			err(1, "%s", __func__);
896		yylval.v.string = str;
897		return CSTRING;
898	default:
899		break;
900	}
901
902#define allowed_to_end_number(x) \
903    (isspace(x) || x == ')' || x == '/' || x == '{' || x == ';' || x == ']' || x == ',')
904
905	/* parsing number */
906	if (isdigit(c)) {
907		do {
908			*p++ = c;
909			if (p == ebuf) {
910				yyerror("too long line");
911				return ERROR;
912			}
913		} while ((c = lgetc()) != EOF && isdigit(c));
914		lungetc();
915		if (c == EOF || allowed_to_end_number(c)) {
916			const char *errstr = NULL;
917
918			*p = '\0';
919			yylval.v.number = strtonum(buf, LONG_MIN, LONG_MAX,
920			    &errstr);
921			if (errstr) {
922				yyerror("invalid number '%s' (%s)", buf,
923				    errstr);
924				return ERROR;
925			}
926			return NUMBER;
927		} else {
928			while (p > buf + 1) {
929				--p;
930				lungetc();
931			}
932			c = *--p;
933		}
934	}
935
936#define allowed_in_string(x) (isalnum(c) || c == '_')
937
938	/* parsing next word */
939	if (allowed_in_string(c)) {
940		struct keyword *kwp;
941		do {
942			*p++ = c;
943			if (p == ebuf) {
944				yyerror("too long line");
945				return ERROR;
946			}
947		} while ((c = lgetc()) != EOF && (allowed_in_string(c)));
948		lungetc();
949		*p = '\0';
950		kwp = lookup(buf);
951		if (kwp == NULL) {
952			if ((yylval.v.string = strdup(buf)) == NULL)
953				err(1, "%s", __func__);
954			return STRING;
955		}
956		if (pflag) {
957			/*
958			 * Probe lexer backdoor, interpret the token as a string
959			 * rather than a keyword. Otherwise, reserved keywords
960			 * would conflict with syscall names. The exception to
961			 * this is 'hz', which hopefully will never be a
962			 * syscall.
963			 */
964			if (kwp->token != HZ) {
965				yylval.v.string = kwp->word;
966				return STRING;
967			}
968		}
969		yylval.v.i = kwp->type;
970		return kwp->token;
971	}
972
973	if (c == '\n') {
974		yylval.lineno++;
975		yylval.colno = 0;
976	}
977	if (c == EOF)
978		return 0;
979	return c;
980}
981
982void
983pprint_syntax_error(void)
984{
985	char line[BUFSIZ];
986	int c, indent = yylval.colno;
987	size_t i;
988
989	strlcpy(line, &pbuf[pindex - yylval.colno], sizeof(line));
990
991	for (i = 0; line[i] != '\0' && (c = line[i]) != '\n'; i++) {
992		if (c == '\t')
993			indent += (8 - 1);
994		fputc(c, stderr);
995	}
996
997	fprintf(stderr, "\n%*c\n", indent, '^');
998}
999
1000void
1001yyerror(const char *fmt, ...)
1002{
1003	const char *prefix;
1004	va_list	va;
1005
1006	prefix = (yylval.filename != NULL) ? yylval.filename : getprogname();
1007
1008	fprintf(stderr, "%s:%d:%d: ", prefix, yylval.lineno, yylval.colno);
1009	va_start(va, fmt);
1010	vfprintf(stderr, fmt, va);
1011	va_end(va);
1012	fprintf(stderr, ":\n");
1013
1014	pprint_syntax_error();
1015
1016	perrors++;
1017}
1018
1019int
1020btparse(const char *str, size_t len, const char *filename, int debug)
1021{
1022	if (debug > 0)
1023		yydebug = 1;
1024	pbuf = str;
1025	plen = len;
1026	pindex = 0;
1027	yylval.filename = filename;
1028	yylval.lineno = 1;
1029
1030	yyparse();
1031	if (perrors)
1032		return perrors;
1033
1034	assert(SLIST_EMPTY(&l_variables));
1035
1036	return 0;
1037}
1038