lex.h revision 280849
1/*
2 * Copyright (C) 2004, 2005, 2007, 2008  Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1998-2002  Internet Software Consortium.
4 *
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
16 */
17
18/* $Id: lex.h,v 1.37 2008/05/30 23:47:01 tbox Exp $ */
19
20#ifndef ISC_LEX_H
21#define ISC_LEX_H 1
22
23/*****
24 ***** Module Info
25 *****/
26
27/*! \file isc/lex.h
28 * \brief The "lex" module provides a lightweight tokenizer.  It can operate
29 * on files or buffers, and can handle "include".  It is designed for
30 * parsing of DNS master files and the BIND configuration file, but
31 * should be general enough to tokenize other things, e.g. HTTP.
32 *
33 * \li MP:
34 *	No synchronization is provided.  Clients must ensure exclusive
35 *	access.
36 *
37 * \li Reliability:
38 *	No anticipated impact.
39 *
40 * \li Resources:
41 *	TBS
42 *
43 * \li Security:
44 *	No anticipated impact.
45 *
46 * \li Standards:
47 * 	None.
48 */
49
50/***
51 *** Imports
52 ***/
53
54#include <stdio.h>
55
56#include <isc/lang.h>
57#include <isc/region.h>
58#include <isc/types.h>
59
60ISC_LANG_BEGINDECLS
61
62/***
63 *** Options
64 ***/
65
66/*@{*/
67/*!
68 * Various options for isc_lex_gettoken().
69 */
70
71#define ISC_LEXOPT_EOL			0x01	/*%< Want end-of-line token. */
72#define ISC_LEXOPT_EOF			0x02	/*%< Want end-of-file token. */
73#define ISC_LEXOPT_INITIALWS		0x04	/*%< Want initial whitespace. */
74#define ISC_LEXOPT_NUMBER		0x08	/*%< Recognize numbers. */
75#define ISC_LEXOPT_QSTRING		0x10	/*%< Recognize qstrings. */
76/*@}*/
77
78/*@{*/
79/*!
80 * The ISC_LEXOPT_DNSMULTILINE option handles the processing of '(' and ')' in
81 * the DNS master file format.  If this option is set, then the
82 * ISC_LEXOPT_INITIALWS and ISC_LEXOPT_EOL options will be ignored when
83 * the paren count is > 0.  To use this option, '(' and ')' must be special
84 * characters.
85 */
86#define ISC_LEXOPT_DNSMULTILINE		0x20	/*%< Handle '(' and ')'. */
87#define ISC_LEXOPT_NOMORE		0x40	/*%< Want "no more" token. */
88
89#define ISC_LEXOPT_CNUMBER		0x80    /*%< Recognize octal and hex. */
90#define ISC_LEXOPT_ESCAPE		0x100	/*%< Recognize escapes. */
91#define ISC_LEXOPT_QSTRINGMULTILINE	0x200	/*%< Allow multiline "" strings */
92#define ISC_LEXOPT_OCTAL		0x400	/*%< Expect a octal number. */
93/*@}*/
94/*@{*/
95/*!
96 * Various commenting styles, which may be changed at any time with
97 * isc_lex_setcomments().
98 */
99
100#define ISC_LEXCOMMENT_C		0x01
101#define ISC_LEXCOMMENT_CPLUSPLUS	0x02
102#define ISC_LEXCOMMENT_SHELL		0x04
103#define ISC_LEXCOMMENT_DNSMASTERFILE	0x08
104/*@}*/
105
106/***
107 *** Types
108 ***/
109
110/*! Lex */
111
112typedef char isc_lexspecials_t[256];
113
114/* Tokens */
115
116typedef enum {
117	isc_tokentype_unknown = 0,
118	isc_tokentype_string = 1,
119	isc_tokentype_number = 2,
120	isc_tokentype_qstring = 3,
121	isc_tokentype_eol = 4,
122	isc_tokentype_eof = 5,
123	isc_tokentype_initialws = 6,
124	isc_tokentype_special = 7,
125	isc_tokentype_nomore = 8
126} isc_tokentype_t;
127
128typedef union {
129	char				as_char;
130	unsigned long			as_ulong;
131	isc_region_t			as_region;
132	isc_textregion_t		as_textregion;
133	void *				as_pointer;
134} isc_tokenvalue_t;
135
136typedef struct isc_token {
137	isc_tokentype_t			type;
138	isc_tokenvalue_t		value;
139} isc_token_t;
140
141/***
142 *** Functions
143 ***/
144
145isc_result_t
146isc_lex_create(isc_mem_t *mctx, size_t max_token, isc_lex_t **lexp);
147/*%<
148 * Create a lexer.
149 *
150 * 'max_token' is a hint of the number of bytes in the largest token.
151 *
152 * Requires:
153 *\li	'*lexp' is a valid lexer.
154 *
155 *\li	max_token > 0.
156 *
157 * Ensures:
158 *\li	On success, *lexp is attached to the newly created lexer.
159 *
160 * Returns:
161 *\li	#ISC_R_SUCCESS
162 *\li	#ISC_R_NOMEMORY
163 */
164
165void
166isc_lex_destroy(isc_lex_t **lexp);
167/*%<
168 * Destroy the lexer.
169 *
170 * Requires:
171 *\li	'*lexp' is a valid lexer.
172 *
173 * Ensures:
174 *\li	*lexp == NULL
175 */
176
177unsigned int
178isc_lex_getcomments(isc_lex_t *lex);
179/*%<
180 * Return the current lexer commenting styles.
181 *
182 * Requires:
183 *\li	'lex' is a valid lexer.
184 *
185 * Returns:
186 *\li	The commenting sytles which are currently allowed.
187 */
188
189void
190isc_lex_setcomments(isc_lex_t *lex, unsigned int comments);
191/*%<
192 * Set allowed lexer commenting styles.
193 *
194 * Requires:
195 *\li	'lex' is a valid lexer.
196 *
197 *\li	'comments' has meaningful values.
198 */
199
200void
201isc_lex_getspecials(isc_lex_t *lex, isc_lexspecials_t specials);
202/*%<
203 * Put the current list of specials into 'specials'.
204 *
205 * Requires:
206 *\li	'lex' is a valid lexer.
207 */
208
209void
210isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials);
211/*!<
212 * The characters in 'specials' are returned as tokens.  Along with
213 * whitespace, they delimit strings and numbers.
214 *
215 * Note:
216 *\li	Comment processing takes precedence over special character
217 *	recognition.
218 *
219 * Requires:
220 *\li	'lex' is a valid lexer.
221 */
222
223isc_result_t
224isc_lex_openfile(isc_lex_t *lex, const char *filename);
225/*%<
226 * Open 'filename' and make it the current input source for 'lex'.
227 *
228 * Requires:
229 *\li	'lex' is a valid lexer.
230 *
231 *\li	filename is a valid C string.
232 *
233 * Returns:
234 *\li	#ISC_R_SUCCESS
235 *\li	#ISC_R_NOMEMORY			Out of memory
236 *\li	#ISC_R_NOTFOUND			File not found
237 *\li	#ISC_R_NOPERM			No permission to open file
238 *\li	#ISC_R_FAILURE			Couldn't open file, not sure why
239 *\li	#ISC_R_UNEXPECTED
240 */
241
242isc_result_t
243isc_lex_openstream(isc_lex_t *lex, FILE *stream);
244/*%<
245 * Make 'stream' the current input source for 'lex'.
246 *
247 * Requires:
248 *\li	'lex' is a valid lexer.
249 *
250 *\li	'stream' is a valid C stream.
251 *
252 * Returns:
253 *\li	#ISC_R_SUCCESS
254 *\li	#ISC_R_NOMEMORY			Out of memory
255 */
256
257isc_result_t
258isc_lex_openbuffer(isc_lex_t *lex, isc_buffer_t *buffer);
259/*%<
260 * Make 'buffer' the current input source for 'lex'.
261 *
262 * Requires:
263 *\li	'lex' is a valid lexer.
264 *
265 *\li	'buffer' is a valid buffer.
266 *
267 * Returns:
268 *\li	#ISC_R_SUCCESS
269 *\li	#ISC_R_NOMEMORY			Out of memory
270 */
271
272isc_result_t
273isc_lex_close(isc_lex_t *lex);
274/*%<
275 * Close the most recently opened object (i.e. file or buffer).
276 *
277 * Returns:
278 *\li	#ISC_R_SUCCESS
279 *\li	#ISC_R_NOMORE			No more input sources
280 */
281
282isc_result_t
283isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp);
284/*%<
285 * Get the next token.
286 *
287 * Requires:
288 *\li	'lex' is a valid lexer.
289 *
290 *\li	'lex' has an input source.
291 *
292 *\li	'options' contains valid options.
293 *
294 *\li	'*tokenp' is a valid pointer.
295 *
296 * Returns:
297 *\li	#ISC_R_SUCCESS
298 *\li	#ISC_R_UNEXPECTEDEND
299 *\li	#ISC_R_NOMEMORY
300 *
301 *	These two results are returned only if their corresponding lexer
302 *	options are not set.
303 *
304 *\li	#ISC_R_EOF			End of input source
305 *\li	#ISC_R_NOMORE			No more input sources
306 */
307
308isc_result_t
309isc_lex_getmastertoken(isc_lex_t *lex, isc_token_t *token,
310		       isc_tokentype_t expect, isc_boolean_t eol);
311/*%<
312 * Get the next token from a DNS master file type stream.  This is a
313 * convenience function that sets appropriate options and handles quoted
314 * strings and end of line correctly for master files.  It also ungets
315 * unexpected tokens.
316 *
317 * Requires:
318 *\li	'lex' is a valid lexer.
319 *
320 *\li	'token' is a valid pointer
321 *
322 * Returns:
323 *
324 * \li	any return code from isc_lex_gettoken().
325 */
326
327isc_result_t
328isc_lex_getoctaltoken(isc_lex_t *lex, isc_token_t *token, isc_boolean_t eol);
329/*%<
330 * Get the next token from a DNS master file type stream.  This is a
331 * convenience function that sets appropriate options and handles end
332 * of line correctly for master files.  It also ungets unexpected tokens.
333 *
334 * Requires:
335 *\li	'lex' is a valid lexer.
336 *
337 *\li	'token' is a valid pointer
338 *
339 * Returns:
340 *
341 * \li	any return code from isc_lex_gettoken().
342 */
343
344void
345isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp);
346/*%<
347 * Unget the current token.
348 *
349 * Requires:
350 *\li	'lex' is a valid lexer.
351 *
352 *\li	'lex' has an input source.
353 *
354 *\li	'tokenp' points to a valid token.
355 *
356 *\li	There is no ungotten token already.
357 */
358
359void
360isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r);
361/*%<
362 * Returns a region containing the text of the last token returned.
363 *
364 * Requires:
365 *\li	'lex' is a valid lexer.
366 *
367 *\li	'lex' has an input source.
368 *
369 *\li	'tokenp' points to a valid token.
370 *
371 *\li	A token has been gotten and not ungotten.
372 */
373
374char *
375isc_lex_getsourcename(isc_lex_t *lex);
376/*%<
377 * Return the input source name.
378 *
379 * Requires:
380 *\li	'lex' is a valid lexer.
381 *
382 * Returns:
383 * \li	source name or NULL if no current source.
384 *\li	result valid while current input source exists.
385 */
386
387
388unsigned long
389isc_lex_getsourceline(isc_lex_t *lex);
390/*%<
391 * Return the input source line number.
392 *
393 * Requires:
394 *\li	'lex' is a valid lexer.
395 *
396 * Returns:
397 *\li 	Current line number or 0 if no current source.
398 */
399
400isc_result_t
401isc_lex_setsourcename(isc_lex_t *lex, const char *name);
402/*%<
403 * Assigns a new name to the input source.
404 *
405 * Requires:
406 *
407 * \li	'lex' is a valid lexer.
408 *
409 * Returns:
410 * \li	#ISC_R_SUCCESS
411 * \li	#ISC_R_NOMEMORY
412 * \li	#ISC_R_NOTFOUND - there are no sources.
413 */
414
415isc_boolean_t
416isc_lex_isfile(isc_lex_t *lex);
417/*%<
418 * Return whether the current input source is a file.
419 *
420 * Requires:
421 *\li	'lex' is a valid lexer.
422 *
423 * Returns:
424 * \li	#ISC_TRUE if the current input is a file,
425 *\li	#ISC_FALSE otherwise.
426 */
427
428
429ISC_LANG_ENDDECLS
430
431#endif /* ISC_LEX_H */
432