1/* BEGIN LICENSE BLOCK
2 * Version: CMPL 1.1
3 *
4 * The contents of this file are subject to the Cisco-style Mozilla Public
5 * License Version 1.1 (the "License"); you may not use this file except
6 * in compliance with the License.  You may obtain a copy of the License
7 * at www.eclipse-clp.org/license.
8 *
9 * Software distributed under the License is distributed on an "AS IS"
10 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
11 * the License for the specific language governing rights and limitations
12 * under the License.
13 *
14 * The Original Code is  The ECLiPSe Constraint Logic Programming System.
15 * The Initial Developer of the Original Code is  Cisco Systems, Inc.
16 * Portions created by the Initial Developer are
17 * Copyright (C) 1989-2006 Cisco Systems, Inc.  All Rights Reserved.
18 *
19 * Contributor(s): ECRC GmbH
20 *
21 * END LICENSE BLOCK */
22
23
24/*
25 * SEPIA INCLUDE FILE
26 *
27 * VERSION	$Id: lex.h,v 1.8 2011/04/27 05:15:50 jschimpf Exp $
28 */
29
30/*
31 * IDENTIFICATION		lex.h
32 *
33 * DESCRIPTION		see below
34 *
35 */
36
37/***************************************************************************
38*
39*	LEXICAL ANALIZER DEFINITIONS
40*	----------------------------
41*
42*	All the definitions used by the lexical analyser
43*	are in this file.  The parser should include this file.
44*
45*
46*	AUTHOR:  Jorge Bocca
47*	Everything changed by Pierre.
48*
49**************************************************************************/
50
51/*
52 * Character Classes
53 */
54
55/* unused characters */
56#define 	DL	0
57#define 	KI	0
58
59/* usual atoms characters */
60#define		UC	ACH-4	/* upper case alhabetic */
61#define		UL	ACH-3	/* underline */
62#define		LC	ACH-2	/* lower case alphabetic */
63#define		N	ACH-1	/* digit */
64#define 	ACH	5	/* to test the four preceeding */
65
66/* ignored separators */
67#define		BS	ACH	/* blank space */
68#define 	NL	ACH+1	/* EOL, newline */
69
70/* quote characters */
71#define		AQ	ACH+2	/* atom quote */
72#define		SQ	ACH+3	/* string quote */
73#define		LQ	ACH+4	/* codes-list quote */
74#define		CQ	ACH+5	/* chars-list quote */
75#define		SL	ACH+6	/* solo char */
76#define 	DS	ACH+7	/* special solo */
77#define		CM	ACH+8	/* line comment */
78
79#define		RA	ACH+9	/* radix quote */
80#define		AS	ACH+10	/* ascii quote */
81#define		TS	ACH+11	/* terminator symbol */
82
83/* symbol characters */
84#define 	SCH	ACH+12 	/* to test the five following */
85
86#define		ES	SCH+1	/* escape char in strings, symbol */
87#define 	CM1	SCH+2	/* comment external delimiter */
88#define		CM2	SCH+3	/* comment internal delimiter */
89#define		SY	SCH+4	/* symbol character */
90
91#define 	NBCH	SCH+5	/* the number of types */
92
93#define 	RE	NBCH	/* end of buffer */
94
95#define Symbol(c)		((c) > SCH)
96#define Alphanum(c)		((c) > 0 && (c) < ACH)
97
98/* recognize an octal digit */
99#define		octal(C)	(((C) >= '0') && ((C) <= '7'))
100
101
102/* TOKENS */
103
104#define LexError(token) ((token) < 0)
105
106#define		NO_TOKEN	0
107#define		BLANK_SPACE	1
108#define		EOI		2
109#define		EOCL		3
110#define		IDENTIFIER	4
111#define		QIDENTIFIER	5
112#define		CODES           6
113#define		COMMA		7
114#define		BAR		8
115#define		SOLO		9
116#define		NUMBER		10
117#define		STRING		11
118#define		REFERENCE	12
119#define		UREFERENCE	13
120#define		SPACE_SOLO	14
121#define		SPACE_NUMBER	15
122#define		CLOSING_SOLO	16
123#define		CHARS           17
124#define		NBTK		18
125
126
127/*
128 *	USER DEFINABLE SYNTAX PARAMETERS
129 */
130
131#define NEWLINE_IN_QUOTES	0x0001	/* allow newlines in quoted objects */
132#define LIMIT_ARG_PRECEDENCE	0x0002	/* limit argument precedence to 999 */
133#define NO_BLANKS		0x0004	/* don't allow blanks after functor */
134#define BAR_IS_NO_ATOM		0x0008	/* | is not an atom (unless quoted) */
135#define BLANKS_IN_NIL		0x0010	/* allow blanks inside [] and {}    */
136#define NO_ATTRIBUTES		0x0020	/* don't allow variable attributes  */
137#define DOLLAR_VAR		0x0040	/* special handling of '$VAR'(N)    */
138#define NESTED_COMMENTS		0x0080	/* guess what, nested comments      */
139#define BASED_BIGNUMS		0x0100	/* based numbers can be bignums     */
140#define DENSE_OUTPUT		0x0200	/* avoid spaces around ops	    */
141#define NO_ARRAY_SUBSCRIPTS	0x0400	/* don't allow array-like syntax    */
142#define DOUBLED_QUOTE_IS_QUOTE	0x0800	/* doubled quote means quote        */
143#define ISO_ESCAPES		0x1000	/* fully ISO-compliant escape seqs  */
144#define ISO_BASE_PREFIX		0x2000	/* ISO-compliant number base prefix */
145#define FLOATS_AS_BREALS	0x4000	/* parse floats as bounded reals    */
146#define	NO_CURLY_ARGUMENTS	0x8000	/* don't allow f{...} for f with ... */
147#define	BLANK_AFTER_SIGN	0x10000	/* allow blank after sign           */
148#define	VAR_FUNCTOR_IS_APPLY	0x20000	/* parse X{Args} as apply(X,[Args]) */
149#define	ATOM_SUBSCRIPTS		0x40000	/* allow subscripts after atoms     */
150#define	GENERAL_SUBSCRIPTS	0x80000	/* allow subscripts almost anywhere */
151#define	CURLY_ARGS_AS_LIST	0x100000/* parse {}(a,b,c) as {}([a,b,c])   */
152#define	FLOAT_NEEDS_POINT	0x200000/* require . in float constants     */
153#define BAR_IS_SEMICOLON	0x400000/* map infix |/2 to ;/2             */
154#define PLUS_IS_NO_SIGN 	0x800000/* + is always functor, never sign  */
155#define ISO_RESTRICTIONS	0x1000000/* other ISO restrictions	    */
156
157#define SYNTAX_FLAGS		25	/* number of flags above	*/
158
159
160/*
161 * Token structure returned by the lexer
162 */
163
164typedef struct {
165	int	class;		/* token class			*/
166
167	/*
168	 * Depending on the token class, we have either:
169	 * - a valid tagged pword value (term)
170	 * - or a char* (string) whose length is in term.val.nint
171	 */
172	pword	term;		/* token value			*/
173	char *	string;		/* token string (if any)	*/
174
175	source_pos_t pos;	/* source position of token	*/
176
177} token_desc;
178
179
180/*
181 * Functions exported by the lexer
182 */
183
184Extern int lex_an ARGS((stream_id, syntax_desc*, token_desc *));
185Extern int ec_need_quotes ARGS((dident, syntax_desc *));
186Extern char *string_to_number ARGS((char *start, pword *result, stream_id nst, syntax_desc *sd));
187
188