1/**
2 * \file
3 * \brief Code synthesizer for bfdmux filters
4 *
5 * This file provides functions to create byte code in Bfdmux Intermediate Language
6 * from filter strings. This byte code can be executed using the functions in vm.c
7 * to filter network packets.
8 */
9/*
10 * Copyright (c) 2009, 2010, ETH Zurich.
11 * All rights reserved.
12 *
13 * This file is distributed under the terms in the attached LICENSE file.
14 * If you do not find this file, copies can be found by writing to:
15 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
16 */
17
18#ifndef DOXYGEN
19// exclude system headers from documentation
20
21#include <stdio.h>
22#include <stdlib.h>
23//#include <strings.h>
24#include <string.h>
25#include <stdbool.h>
26//#include <errno.h>
27
28//int errno;
29
30#endif							// DOXYGEN
31
32#include <bfdmuxtools/codegen.h>
33#include <bfdmuxtools/filter.h>
34#include <bfdmuxtools/tools.h>
35#include "opdefs.h"
36
37/**
38 * \brief Searches for 'substr' in 'str', between 'start_pos' and 'end_pos' inclusive, and not inside brackets!
39 * @param str the string to be searched
40 * @param start_pos the index of the first character of the search interval
41 * @param end_pos the index of the last character of the search interval
42 * @param substr the substring to look for
43 * @return returns the position at which 'substr' has been found, -1 if not found, -2 on bracket error
44 */
45static inline int
46substrfind(char *str, int start_pos, int end_pos, char *substr)
47{
48	int             i,
49	                j;
50	int             open_braces = 0;
51	int             sub_len = strlen(substr);
52	int             i_end = end_pos - sub_len + 1;
53	for (i = start_pos; i <= i_end; i++) {
54		if ((str[i] == '(') || (str[i] == '[')) {
55			open_braces++;
56		} else if ((str[i] == ')') || (str[i] == ']')) {
57			open_braces--;
58			if (open_braces < 0)
59				return -2;
60		} else if (open_braces == 0) {	// try matching substr at this
61			// position
62			j = 0;
63			while ((str[i + j] == substr[j]) && (j < sub_len)) {
64				j++;
65			}
66			if (j == sub_len)
67				return i;
68		}
69	}
70	return -1;
71}
72
73/**
74 * \brief Checks if 'out' still has 'space_needed' empty bytes
75 * @param out the byte array
76 * @param out_len the number of bytes already in the array 'out'
77 * @param out_sz the length of the currently reserved space of 'out'
78 * @param space_needed the number of bytes that need to be appended to 'out'
79 * @return returns true if enough space or reservation of larger space succeeded, otherwise false
80 */
81static inline          bool
82ensure_enough_space(uint8_t ** out, int *out_len, int *out_sz,
83					int space_needed)
84{
85	int             need = *out_len + space_needed - *out_sz;
86	if (need > 0) {				// need realloc!
87		*out_sz =
88			*out_sz +
89			INCREMENTAL_ALLOC_SIZE * (need / INCREMENTAL_ALLOC_SIZE + 1);
90		if ((*out_sz) > MAX_FILTER_CODE_SIZE)
91			return false;
92		void           *new_out = realloc(*out, *out_sz);
93		if (new_out == NULL)
94			return false;
95		*out = new_out;
96	}
97	return true;
98}
99
100/**
101 * \brief Tries to find the lowest precedence operator in an interval of 'expr'
102 * @param expr the expression to search in
103 * @param from_pos the first character of the serach interval
104 * @param to_pos the last character of the search interval
105 * @param pos the position where the operator has been found, if any
106 * @return the index of the operator in the op_list or -1 if no operator found
107 */
108static int
109find_operator(char *expr, int from_pos, int to_pos, int *pos)
110{
111	int             i,
112	                oppos;
113	i = 0;
114	while (op_list[i].opcode != 0) {
115		oppos = substrfind(expr, from_pos, to_pos, op_list[i].opstr);
116		if (oppos >= 0) {
117			*pos = oppos;
118			return i;
119		}
120		i++;
121	}
122	return -1;
123}
124
125/**
126 * \brief Removes leading and trailing spaces, and brackets that sourround the expression
127 * @param expr the expression to work with
128 * @param from_pos the start of the inverval to consider; may be shifted to the right
129 * @param to_pos the end of the interval to consider; may be shifted left
130 */
131static void
132remove_spaces_and_braces(char *expr, int *from_pos, int *to_pos)
133{
134	int             i,
135	                cnt;
136
137	// Cut off spaces and braces around expr
138	while ((*from_pos) <= (*to_pos)) {
139		if (expr[*from_pos] == ' ') {
140			(*from_pos)++;
141		} else if (expr[*to_pos] == ' ') {
142			(*to_pos)--;
143		} else if ((expr[*from_pos] == '(') && (expr[*to_pos] == ')')) {
144			// check if this is a matching bracket pair, eg. (...), and
145			// not (...).(...)
146			i = (*from_pos) + 1;
147			cnt = 0;
148			while (i < (*to_pos)) {
149				if (expr[i] == '(') {
150					cnt++;
151				} else if (expr[i] == ')') {
152					cnt--;
153				}
154				if (cnt < 0) {
155					break;		// The bracket at 'from_pos' was closed.
156					// pair does not match!
157				}
158				i++;
159			}
160			// Remove brackets, if matching pair
161			if (cnt == 0) {
162				(*from_pos)++;
163				(*to_pos)--;
164			} else {
165				break;
166			}
167		} else if ((expr[*from_pos] == '[') && (expr[*to_pos] == ']')) {	// same
168																			//
169			//
170			// for
171			// square
172			// brackets
173			// check if this is a matching bracket pair, eg. (...), and
174			// not (...).(...)
175			i = (*from_pos) + 1;
176			cnt = 0;
177			while (i < (*to_pos)) {
178				if (expr[i] == '[') {
179					cnt++;
180				} else if (expr[i] == ']') {
181					cnt--;
182				}
183				if (cnt < 0) {
184					break;		// The bracket at 'from_pos' was closed.
185					// pair does not match!
186				}
187				i++;
188			}
189			// Remove brackets, if matching pair
190			if (cnt == 0) {
191				(*from_pos)++;
192				(*to_pos)--;
193			} else {
194				break;
195			}
196		} else {
197			break;
198		}
199	}
200}
201
202
203/**
204 * \brief Compiles a whole subexpression and appends the byte code to 'out'
205 * @param expr the expression to work with
206 * @param from_pos the first character of the subexpression to compile
207 * @param to_pos the last character of the subexpression to compile
208 * @param out a pointer to the array that holds the compiled byte code
209 * @param out_len the number of bytes already in the array
210 * @param out_sz the current size of the array (reallocation, if full)
211 * @return returns 0 on success, -1 on memory error, or an index of a character in the subexpression that failed to compile
212 */
213static int
214compile_subtree(char *expr, int from_pos, int to_pos, uint8_t ** out,
215				int *out_len, int *out_sz)
216{
217	int             pos,
218	                old_len,
219	                msb;
220	int             op,
221	                err_pos;
222
223	remove_spaces_and_braces(expr, &from_pos, &to_pos);
224
225	if (from_pos > to_pos)
226		return (from_pos + to_pos) / 2 + 1;
227
228	// Try to find an operator (op will be it's index in the array, or -1
229	op = find_operator(expr, from_pos, to_pos, &pos);
230	if (op >= 0) {
231		int             opstrlen = strlen(op_list[op].opstr);
232		// Enforce syntax: Don't ignore left part of the expression, if
233		// operator binds to the right, or vice versa!
234		if ((op_list[op].arity == 0x10) && (pos + opstrlen <= to_pos)) {
235			return pos + 1;
236		} else if ((op_list[op].arity == 0x01) && (pos > from_pos)) {
237			return pos + 1;
238		}
239		if (!ensure_enough_space
240			(out, out_len, out_sz, op_list[op].reserved_length))
241			return -1;
242		(*out)[*out_len] = op_list[op].opcode;
243		*out_len += op_list[op].reserved_length;
244		old_len = *out_len;		// save output length
245		if (op_list[op].arity & 0x10) {	// left side relevant for operator
246			if ((err_pos =
247				 compile_subtree(expr, from_pos, pos - 1, out, out_len,
248								 out_sz)) != 0)
249				return err_pos;
250		}
251		if (op_list[op].arity & 0x01) {	// right side relevant for
252			// operator
253			if ((err_pos =
254				 compile_subtree(expr, pos + opstrlen, to_pos, out,
255								 out_len, out_sz)) != 0)
256				return err_pos;
257		}
258
259		if ((op_list[op].opcode == OP_OR) || (op_list[op].opcode == OP_AND)) {	// special
260																				//
261			//
262			// treatment
263			// for
264			// logical
265			// or/and
266			// operations
267			// compute and store subtree size in empty 32bits after opcode
268			*((uint32_t *) & ((*out)[old_len - 4])) = *out_len - old_len;
269		}
270		return 0;
271	}
272	// If no operator found, try to interpret string as integer constant
273	//errno = 0;
274	char           *endptr = expr + from_pos;
275	//uint64_t val = (long long int)strtol(endptr + from_pos, &endptr, 10);
276    	uintmax_t        val = strtoumax(expr + from_pos, &endptr, 0);
277	//if ((errno == 0) && (endptr == expr + to_pos + 1)) {	// ensures
278	if ((val != -1) && (endptr == expr + to_pos + 1)) {	// ensures
279		// that all
280		// characters
281		// were valid!
282		msb = find_msb(val);	// Determine size of operand
283		if (msb <= 8) {
284			if (!ensure_enough_space(out, out_len, out_sz, 1 + 1))
285				return -1;
286			(*out)[*out_len] = OP_INT8;
287			(*out_len)++;
288			(*out)[*out_len] = (uint8_t) val;
289			(*out_len)++;
290		} else if (msb <= 16) {
291			if (!ensure_enough_space(out, out_len, out_sz, 1 + 2))
292				return -1;
293			(*out)[*out_len] = OP_INT16;
294			(*out_len)++;
295			*((uint16_t *) & ((*out)[*out_len])) = (uint16_t) val;
296			*out_len += 2;
297		} else if (msb <= 32) {
298			if (!ensure_enough_space(out, out_len, out_sz, 1 + 4))
299				return -1;
300			(*out)[*out_len] = OP_INT32;
301			(*out_len)++;
302			*((uint32_t *) & ((*out)[*out_len])) = (uint32_t) val;
303			*out_len += 4;
304		} else {
305			if (!ensure_enough_space(out, out_len, out_sz, 1 + 8))
306				return -1;
307			(*out)[*out_len] = OP_INT64;
308			(*out_len)++;
309			*((uint64_t *) & ((*out)[*out_len])) = val;
310			*out_len += 8;
311		}
312		return 0;
313	}
314	return (from_pos + to_pos) / 2 + 1 + 1;
315}
316
317/**
318 * \brief Compiles a filter expression in Bfdmux Filter Language into Bfdmux Intermediate Code
319 * @param expression the expression to compile
320 * @param[out] filter_code Points to the memory buffer that contains the compiled filter, or NULL, if an error occurred
321 * @param[out] filter_len Indicates the length of the compiled code, or contains the error position in the filter string on failure
322 */
323void
324compile_filter(char *expression, uint8_t ** filter_code, int32_t *filter_len)
325{
326	int             out_sz = INITIAL_ALLOC_SIZE;	// initially allocate
327	// 64 bytes for code
328	// array
329	int             out_len = 0;
330	int             err_pos = 0;
331	uint8_t        *output = malloc(sizeof(uint8_t) * out_sz);
332	if (output != NULL) {
333		// int len = strlen(expression);
334//		printf("Filter %s\n", expression);
335		if ((err_pos =
336			 compile_subtree(expression, 0, strlen(expression) - 1,
337							 &output, &out_len, &out_sz)) != 0) {
338			free(output);
339			output = NULL;
340			out_len = err_pos;	// return error position
341		} else {
342			void           *new_output =
343				(uint8_t *) realloc(output, out_len);
344			if (new_output) {
345				output = new_output;
346			}
347		}
348	} else {
349		printf("compile_filter: malloc failed\n");
350	}
351
352	*filter_code = output;
353	*filter_len = out_len;
354}
355