1/* BEGIN LICENSE BLOCK 2 * Version: CMPL 1.1 3 * 4 * The contents of this file are subject to the Cisco-style Mozilla Public 5 * License Version 1.1 (the "License"); you may not use this file except 6 * in compliance with the License. You may obtain a copy of the License 7 * at www.eclipse-clp.org/license. 8 * 9 * Software distributed under the License is distributed on an "AS IS" 10 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 11 * the License for the specific language governing rights and limitations 12 * under the License. 13 * 14 * The Original Code is The ECLiPSe Constraint Logic Programming System. 15 * The Initial Developer of the Original Code is Cisco Systems, Inc. 16 * Portions created by the Initial Developer are 17 * Copyright (C) 1989-2006 Cisco Systems, Inc. All Rights Reserved. 18 * 19 * Contributor(s): ECRC GmbH 20 * 21 * END LICENSE BLOCK */ 22 23 24/* 25 * SEPIA INCLUDE FILE 26 * 27 * VERSION $Id: lex.h,v 1.8 2011/04/27 05:15:50 jschimpf Exp $ 28 */ 29 30/* 31 * IDENTIFICATION lex.h 32 * 33 * DESCRIPTION see below 34 * 35 */ 36 37/*************************************************************************** 38* 39* LEXICAL ANALIZER DEFINITIONS 40* ---------------------------- 41* 42* All the definitions used by the lexical analyser 43* are in this file. The parser should include this file. 44* 45* 46* AUTHOR: Jorge Bocca 47* Everything changed by Pierre. 48* 49**************************************************************************/ 50 51/* 52 * Character Classes 53 */ 54 55/* unused characters */ 56#define DL 0 57#define KI 0 58 59/* usual atoms characters */ 60#define UC ACH-4 /* upper case alhabetic */ 61#define UL ACH-3 /* underline */ 62#define LC ACH-2 /* lower case alphabetic */ 63#define N ACH-1 /* digit */ 64#define ACH 5 /* to test the four preceeding */ 65 66/* ignored separators */ 67#define BS ACH /* blank space */ 68#define NL ACH+1 /* EOL, newline */ 69 70/* quote characters */ 71#define AQ ACH+2 /* atom quote */ 72#define SQ ACH+3 /* string quote */ 73#define LQ ACH+4 /* codes-list quote */ 74#define CQ ACH+5 /* chars-list quote */ 75#define SL ACH+6 /* solo char */ 76#define DS ACH+7 /* special solo */ 77#define CM ACH+8 /* line comment */ 78 79#define RA ACH+9 /* radix quote */ 80#define AS ACH+10 /* ascii quote */ 81#define TS ACH+11 /* terminator symbol */ 82 83/* symbol characters */ 84#define SCH ACH+12 /* to test the five following */ 85 86#define ES SCH+1 /* escape char in strings, symbol */ 87#define CM1 SCH+2 /* comment external delimiter */ 88#define CM2 SCH+3 /* comment internal delimiter */ 89#define SY SCH+4 /* symbol character */ 90 91#define NBCH SCH+5 /* the number of types */ 92 93#define RE NBCH /* end of buffer */ 94 95#define Symbol(c) ((c) > SCH) 96#define Alphanum(c) ((c) > 0 && (c) < ACH) 97 98/* recognize an octal digit */ 99#define octal(C) (((C) >= '0') && ((C) <= '7')) 100 101 102/* TOKENS */ 103 104#define LexError(token) ((token) < 0) 105 106#define NO_TOKEN 0 107#define BLANK_SPACE 1 108#define EOI 2 109#define EOCL 3 110#define IDENTIFIER 4 111#define QIDENTIFIER 5 112#define CODES 6 113#define COMMA 7 114#define BAR 8 115#define SOLO 9 116#define NUMBER 10 117#define STRING 11 118#define REFERENCE 12 119#define UREFERENCE 13 120#define SPACE_SOLO 14 121#define SPACE_NUMBER 15 122#define CLOSING_SOLO 16 123#define CHARS 17 124#define NBTK 18 125 126 127/* 128 * USER DEFINABLE SYNTAX PARAMETERS 129 */ 130 131#define NEWLINE_IN_QUOTES 0x0001 /* allow newlines in quoted objects */ 132#define LIMIT_ARG_PRECEDENCE 0x0002 /* limit argument precedence to 999 */ 133#define NO_BLANKS 0x0004 /* don't allow blanks after functor */ 134#define BAR_IS_NO_ATOM 0x0008 /* | is not an atom (unless quoted) */ 135#define BLANKS_IN_NIL 0x0010 /* allow blanks inside [] and {} */ 136#define NO_ATTRIBUTES 0x0020 /* don't allow variable attributes */ 137#define DOLLAR_VAR 0x0040 /* special handling of '$VAR'(N) */ 138#define NESTED_COMMENTS 0x0080 /* guess what, nested comments */ 139#define BASED_BIGNUMS 0x0100 /* based numbers can be bignums */ 140#define DENSE_OUTPUT 0x0200 /* avoid spaces around ops */ 141#define NO_ARRAY_SUBSCRIPTS 0x0400 /* don't allow array-like syntax */ 142#define DOUBLED_QUOTE_IS_QUOTE 0x0800 /* doubled quote means quote */ 143#define ISO_ESCAPES 0x1000 /* fully ISO-compliant escape seqs */ 144#define ISO_BASE_PREFIX 0x2000 /* ISO-compliant number base prefix */ 145#define FLOATS_AS_BREALS 0x4000 /* parse floats as bounded reals */ 146#define NO_CURLY_ARGUMENTS 0x8000 /* don't allow f{...} for f with ... */ 147#define BLANK_AFTER_SIGN 0x10000 /* allow blank after sign */ 148#define VAR_FUNCTOR_IS_APPLY 0x20000 /* parse X{Args} as apply(X,[Args]) */ 149#define ATOM_SUBSCRIPTS 0x40000 /* allow subscripts after atoms */ 150#define GENERAL_SUBSCRIPTS 0x80000 /* allow subscripts almost anywhere */ 151#define CURLY_ARGS_AS_LIST 0x100000/* parse {}(a,b,c) as {}([a,b,c]) */ 152#define FLOAT_NEEDS_POINT 0x200000/* require . in float constants */ 153#define BAR_IS_SEMICOLON 0x400000/* map infix |/2 to ;/2 */ 154#define PLUS_IS_NO_SIGN 0x800000/* + is always functor, never sign */ 155#define ISO_RESTRICTIONS 0x1000000/* other ISO restrictions */ 156 157#define SYNTAX_FLAGS 25 /* number of flags above */ 158 159 160/* 161 * Token structure returned by the lexer 162 */ 163 164typedef struct { 165 int class; /* token class */ 166 167 /* 168 * Depending on the token class, we have either: 169 * - a valid tagged pword value (term) 170 * - or a char* (string) whose length is in term.val.nint 171 */ 172 pword term; /* token value */ 173 char * string; /* token string (if any) */ 174 175 source_pos_t pos; /* source position of token */ 176 177} token_desc; 178 179 180/* 181 * Functions exported by the lexer 182 */ 183 184Extern int lex_an ARGS((stream_id, syntax_desc*, token_desc *)); 185Extern int ec_need_quotes ARGS((dident, syntax_desc *)); 186Extern char *string_to_number ARGS((char *start, pword *result, stream_id nst, syntax_desc *sd)); 187 188