1/* BEGIN LICENSE BLOCK
2 * Version: CMPL 1.1
3 *
4 * The contents of this file are subject to the Cisco-style Mozilla Public
5 * License Version 1.1 (the "License"); you may not use this file except
6 * in compliance with the License.  You may obtain a copy of the License
7 * at www.eclipse-clp.org/license.
8 *
9 * Software distributed under the License is distributed on an "AS IS"
10 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
11 * the License for the specific language governing rights and limitations
12 * under the License.
13 *
14 * The Original Code is  The ECLiPSe Constraint Logic Programming System.
15 * The Initial Developer of the Original Code is  Cisco Systems, Inc.
16 * Portions created by the Initial Developer are
17 * Copyright (C) 1989-2006 Cisco Systems, Inc.  All Rights Reserved.
18 *
19 * Contributor(s):
20 *
21 * END LICENSE BLOCK */
22
23/*
24 * VERSION	$Id: lex.c,v 1.17 2015/01/14 01:31:09 jschimpf Exp $
25 */
26
27/*
28 * IDENTIFICATION		lex.c
29 *
30 *
31 *
32 * AUTHOR	VERSION	 DATE	REASON
33 * Jorge Bocca
34 * Pierre Dufresne
35 * Micha Meier
36 */
37
38/*
39 * INCLUDES:
40 */
41#include <math.h>
42
43#include	"config.h"
44#include	"sepia.h"	/* to be able to have built-ins */
45#include	"types.h"	/* to have the standard types (for BIP) */
46#include	"embed.h"
47#include	"mem.h"		/* to use in_dict and DidName */
48#include	"error.h"	/* the BIP return values and standard errors */
49#include	"dict.h"
50#include	"lex.h"		/* the values returned lex_an */
51#include	"ec_io.h"
52#include	"emu_export.h"
53#include 	"module.h"
54#include	"property.h"	/* for MODULE_PROP */
55#include	"os_support.h"
56#include	"rounding_control.h"
57
58#ifdef HAVE_STRING_H
59#include <string.h>
60#else
61extern char		*strcpy();
62#endif
63
64#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67
68#ifdef STDC_HEADERS
69#include <stdlib.h>
70#else
71extern double atof();
72#endif
73
74/*
75 * DEFINES:
76 */
77
78/* The maximum numeric value of a character constant in a string.
79 * This should eventually be changed to depend on the stream encoding,
80 * e.g. 8-bit:255, utf8:2147483647
81 */
82#define MAX_CHAR_CODE	255
83
84/*
85 * FUNCTOR_COMPLETION causes trouble in connection with testing for
86 * the .eco header in procedure.c. Also, it does not work on Windows.
87 */
88#undef FUNCTOR_COMPLETION
89
90#ifdef FUNCTOR_COMPLETION
91#if defined(HAVE_READLINE)
92static char	**_complete_predicate(char *text, int start, int end);
93static char	*_find_matching_predicate(char *string, int state);
94#define Find_Matching_Atom(end, nst, pw, stop)
95#else
96static void	_find_matching_atom(unsigned char *end, stream_id nst, unsigned char **pw, unsigned char **stop);
97#define Find_Matching_Atom(end, nst, pw, stop)	_find_matching_atom(end, nst, &pw, &stop)
98#endif
99#endif
100
101#define Extend_Lex_Aux(nst, pw, stop)				\
102		pw = _extend_lex_aux(nst);			\
103		stop = StreamLexAux(nst) + StreamLexSize(nst);
104
105/*
106 * STATIC VARIABLE DEFINITIONS:
107 */
108static int 	p_set_chtab(value v1, type t1, value v2, type t2, value vm, type tm),
109		p_get_chtab(value v1, type t1, value v2, type t2, value vm, type tm),
110		p_get_syntax(value val1, type tag1, value val2, type tag2, value vm, type tm),
111		p_set_syntax(value val1, type tag1, value val2, type tag2, value vm, type tm),
112		p_copy_syntax(value vfrom, type tfrom, value vto, type tto),
113		p_read_token_(value vs, type ts, value v, type t, value vc, type tc, value vm, type tm);
114
115static unsigned char	*_extend_lex_aux(stream_id nst);
116static int	_skip_blanks(stream_id nst, syntax_desc *sd, unsigned char **p_pligne, int *p_cc, int *p_ctype);
117
118static dident	chname_[NBCH + 1];
119static dident	tname_[NBTK + 1];
120static dident	d_comma0_;
121static int	completion_idx,
122		completion_length,
123		completion_start;
124static dident	completion_dip;
125
126static syntax_desc	default_syntax_desc = {
127/* Here is the initial type distribution: */
128{
129/* nul soh stx etx eot enq ack bel  bs  ht  nl  vt  np  cr  so  si */
130    BS, BS, BS, BS, BS, BS, BS, BS, DL, BS, NL, BS, BS, BS, BS, BS,
131/* dle dc1 dc2 dc3 dc4 nak syn etb can  em sub esc  fs  gs  rs  us */
132    BS, KI, BS, BS, BS, KI, BS, BS, KI, BS, BS, BS, BS, BS, BS, BS,
133/*  sp   !   "   #   $   %   &   '   (   )   *    +   ,   -   .   / */
134    BS, SL, SQ, SY, SY, CM, SY, AQ, DS, DS, CM2, SY, DS, SY, SY, CM1,
135/*   0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ? */
136     N,  N,  N,  N,  N,  N,  N,  N,  N,  N, SY, SL, SY, SY, SY, SY,
137/*   @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O */
138    SY, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
139/*   P   Q   R   S   T   U   V   W   X   Y   Z   [   \  ]   ^   _ */
140    UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, DS, ES, DS, SY, UL,
141/*   `   a   b   c   d   e   f   g   h   i   j   k   l   m   n   o */
142    SY, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
143/*   p   q   r   s   t   u   v   w   x   y   z   {   |   }   ~ del */
144    LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, DS, DS, DS, SY, DL,
145/*  80	                       Latin-1                          8f */
146    BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
147/*  90	                                                        9f */
148    BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
149/*  a0	                                                        af */
150    BS, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY,
151/*  b0	                                                        bf */
152    SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY,
153/*  c0	                                                        cf */
154    UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
155/*  d0	                                                        df */
156    UC, UC, UC, UC, UC, UC, UC, SY, UC, UC, UC, UC, UC, UC, UC, LC,
157/*  e0	                                                        ef */
158    LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
159/*  f0	                                                        ff */
160    LC, LC, LC, LC, LC, LC, LC, SY, LC, LC, LC, LC, LC, LC, LC, LC,
161/*  EOI symbol */
162    RE},
163    NEWLINE_IN_QUOTES|NO_BLANKS,	/* options */
164    '"',				/* sq	   */
165    '\'',				/* aq	   */
166    '_',				/* ul	   */
167    '\\'				/* escape  */
168};
169
170
171static dident syntax_flags[SYNTAX_FLAGS]; /* the syntax flag names (dids) */
172
173/*
174 * EXTERNAL VARIABLE DECLARATIONS:
175 */
176
177/*
178 * EXTERNAL VARIABLE DEFINITIONS:
179 */
180
181syntax_desc	*default_syntax = &default_syntax_desc;
182
183/*
184 * FUNCTION DEFINITIONS:
185 */
186
187void
188lex_init(int flags)	/* initialization: setting the name of types */
189{
190    /*
191     * syntax_flags, chname_ and tname_ are read-only data
192     * and are replicated in every process
193     */
194
195    /* the array must correspond to the flag order in syntax */
196    syntax_flags[0] =	in_dict("nl_in_quotes",0);
197    syntax_flags[1] =	in_dict("limit_arg_precedence",0);
198    syntax_flags[2] =	in_dict("no_blanks",0);
199    syntax_flags[3] =	in_dict("bar_is_no_atom",0);
200    syntax_flags[4] =	in_dict("blanks_in_nil",0);	/* obsolete, no effect */
201    syntax_flags[5] =	in_dict("no_attributes",0);
202    syntax_flags[6] =	in_dict("$VAR",0);		/* obsolete */
203    syntax_flags[7] =	in_dict("nested_comments",0);
204    syntax_flags[8] =	in_dict("based_bignums",0);
205    syntax_flags[9] =	in_dict("dense_output",0);	/* obsolete */
206    syntax_flags[10] =	in_dict("no_array_subscripts",0);
207    syntax_flags[11] =	in_dict("doubled_quote_is_quote",0);
208    syntax_flags[12] =	in_dict("iso_escapes",0);
209    syntax_flags[13] =	in_dict("iso_base_prefix",0);
210    syntax_flags[14] =	in_dict("read_floats_as_breals",0);
211    syntax_flags[15] =	in_dict("no_curly_arguments",0);
212    syntax_flags[16] =	in_dict("blanks_after_sign",0);
213    syntax_flags[17] =	in_dict("var_functor_is_apply",0);
214    syntax_flags[18] =	in_dict("atom_subscripts",0);
215    syntax_flags[19] =	in_dict("general_subscripts",0);
216    syntax_flags[20] =	in_dict("curly_args_as_list",0);
217    syntax_flags[21] =	in_dict("float_needs_point",0);
218    syntax_flags[22] =	in_dict("bar_is_semicolon",0);
219    syntax_flags[23] =	in_dict("plus_is_no_sign",0);
220    syntax_flags[24] =	in_dict("iso_restrictions",0);
221
222    default_syntax_desc.char_class[EOB_MARK] = RE;
223
224    chname_[0] = in_dict("unused",0);
225    chname_[UC] = in_dict("upper_case",0);
226    chname_[UL] = in_dict("underline",0);
227    chname_[LC] = in_dict("lower_case",0);
228    chname_[N] = in_dict("digit",0);
229    chname_[BS] = in_dict("blank_space",0);
230    chname_[NL] = in_dict("end_of_line",0);
231    chname_[AQ] = in_dict("atom_quote",0);
232    chname_[SQ] = in_dict("string_quote",0);
233    chname_[SL] = in_dict("solo",0);
234    chname_[DS] = in_dict("special",0);
235    chname_[CM] = in_dict("line_comment",0);
236    chname_[LQ] = in_dict("list_quote",0);      /* should be codes_quote */
237    chname_[CQ] = in_dict("chars_quote",0);
238    chname_[RA] = in_dict("radix",0);
239    chname_[AS] = in_dict("ascii",0);
240    chname_[TS] = in_dict("terminator",0);
241    chname_[ES] = in_dict("escape",0);
242    chname_[CM1] = in_dict("first_comment",0);
243    chname_[CM2] = in_dict("second_comment",0);
244    chname_[SY] = in_dict("symbol",0);
245    chname_[NBCH] = in_dict("null",0);
246
247    tname_[NO_TOKEN] =	d_.err;
248    tname_[BLANK_SPACE] = d_.err;
249    tname_[EOI] =	d_.eof;
250    tname_[EOCL] =	in_dict("fullstop", 0);
251    tname_[IDENTIFIER] = d_.atom0;
252    tname_[QIDENTIFIER] = in_dict("quoted_atom", 0);
253    tname_[COMMA] =	in_dict("comma", 0);
254    tname_[BAR] =
255    tname_[CLOSING_SOLO] =
256    tname_[SOLO] =	in_dict("solo", 0);
257    tname_[SPACE_NUMBER] =
258    tname_[NUMBER] =	in_dict("number", 0);
259    tname_[STRING] =	d_.string0;
260    tname_[REFERENCE] =	d_.var0;
261    tname_[UREFERENCE] = in_dict("anonymous", 0);
262    tname_[CODES] =	in_dict("codes", 0);
263    tname_[CHARS] =	in_dict("chars", 0);
264    tname_[SPACE_SOLO] = in_dict("open_par", 0);
265
266    d_comma0_ = in_dict(",", 0);
267#if defined(FUNCTOR_COMPLETION) && defined(HAVE_READLINE)
268    {
269	extern char ** (*rl_attempted_completion_function)();
270
271	rl_attempted_completion_function = _complete_predicate;
272    }
273#endif
274
275    if (flags & INIT_SHARED)
276    {
277	(void) exported_built_in(in_dict("set_chtab_", 3), p_set_chtab, B_SAFE);
278	(void) exported_built_in(in_dict("get_chtab_", 3), p_get_chtab, B_UNSAFE|U_SIMPLE);
279	(void) local_built_in(in_dict("set_syntax_", 3), p_set_syntax, B_SAFE);
280	(void) b_built_in(in_dict("get_syntax_", 3), p_get_syntax, d_.kernel_sepia);
281	exported_built_in(in_dict("read_token_", 4),	p_read_token_, B_UNSAFE|U_GROUND) -> mode = BoundArg(2, CONSTANT) | BoundArg(3, CONSTANT);
282	(void) local_built_in(in_dict("copy_syntax", 2), p_copy_syntax, B_SAFE);
283    }
284}
285
286#define Set_TokenString(s, l) \
287	token->term.val.nint = (word)(l); \
288	token->string = (char *) (s);
289
290
291/* up to three characters of backup are needed, e.g. in "3e+a" */
292/* at eoi we don't advance pligne, so don't backup */
293#define Backup_(c,n) \
294	pligne -= (n) - ((c)==EOI_SYMBOL? 1: 0);
295
296#define EoB(nst) (StreamBuf(nst) + StreamCnt(nst))
297
298#define Get_Chr(c) \
299	c = *pligne++; \
300	if ((c) == EOB_MARK  &&  pligne > EoB(nst)) { \
301	    StreamPtr(nst) = pligne-1; \
302	    if (fill_buffer(nst) != PSUCCEED) { \
303		pligne = StreamPtr(nst); \
304	    	c = EOI_SYMBOL; \
305	    } else { \
306		pligne = StreamPtr(nst); \
307		c = *pligne++; \
308	    } \
309	}
310
311#define Get_Ch_Class(c,t) \
312	Get_Chr(c) \
313	t = sd->char_class[c];
314
315#ifdef FUNCTOR_COMPLETION
316#define Get_Ch_Class_And_Complete(c,t) \
317	c = *pligne++; \
318	if ((c) == EOB_MARK  &&  pligne > EoB(nst)) { \
319	    if (IsTty(nst) && StreamCnt(nst) != StreamSize(nst) && \
320		*(pw - 1) != '\n') { \
321		Find_Matching_Atom(pw, nst, pw, stop); \
322	    } \
323	    StreamPtr(nst) = pligne-1; \
324	    if (fill_buffer(nst) != PSUCCEED) { \
325		pligne = StreamPtr(nst); \
326	    	c = EOI_SYMBOL; \
327	    } else { \
328		pligne = StreamPtr(nst); \
329		c = *pligne++; \
330	    } \
331	} \
332	t = sd->char_class[c];
333#else
334#define Get_Ch_Class_And_Complete(c,t) \
335	Get_Ch_Class(c,t)
336#endif
337
338
339/*
340 * Compute the file position of the next/last character read
341 */
342#define CurrentOffset(nst,pligne) \
343	(StreamOffset(nst) + (pligne - StreamBuf(nst)))
344
345#define PreviousOffset(nst,pligne,cc) \
346	(CurrentOffset(nst,pligne) - BytesPerChar(cc))
347
348#define BytesPerChar(cc) 1
349
350
351int
352lex_an(	stream_id nst,		/* in: stream to read from */
353	syntax_desc *sd,	/* in: syntax descriptor */
354	token_desc *token	/* out: token descriptor */
355    )				/* returns: token or (negative) error code */
356{
357    unsigned char	*pligne = StreamPtr(nst);
358    unsigned char 	*pw, *stop;
359    int			tok = NO_TOKEN;
360    int			cc;
361    int			quote_char;
362    int			ctype;
363
364    token->string = (char *) 0;
365    token->pos.file = StreamName(nst);
366    token->pos.line = StreamLine(nst);
367    token->pos.from = CurrentOffset(nst,pligne);
368
369    if (!pligne) {
370    	tok = EOI;
371	goto _return_tok_;
372    }
373
374    Get_Ch_Class(cc, ctype);		/* read first character */
375
376_start_:		/* cc/ctype: current char, pligne: ptr to next char */
377    switch (ctype)
378    {
379    case RE:
380	tok = EOI;
381	break;
382
383    case BS:
384    case NL:
385    case CM:
386	tok = _skip_blanks(nst, sd, &pligne, &cc, &ctype);
387    	if (LexError(tok))
388	    break;
389	token->pos.line = StreamLine(nst);
390	token->pos.from = PreviousOffset(nst,pligne,cc);
391	goto _start_;		/* tok maybe BLANK_SPACE */
392
393    case CM1:
394	tok = _skip_blanks(nst, sd, &pligne, &cc, &ctype);
395    	if (LexError(tok))
396	    break;
397	else if (tok == BLANK_SPACE) {
398	    token->pos.line = StreamLine(nst);
399	    token->pos.from = PreviousOffset(nst,pligne,cc);
400	    goto _start_;	/* it was really a comment */
401	} else
402	    goto _symbol_;	/* was no comment, treat CM1 as symbol */
403
404    case AQ:
405	tok = QIDENTIFIER;
406	goto _quote_;
407    case SQ:
408	tok = STRING;
409	goto _quote_;
410    case CQ:
411	tok = CHARS;
412	goto _quote_;
413    case LQ:
414	tok = CODES;
415_quote_:
416	quote_char = cc;
417	pw = StreamLexAux(nst);
418	stop = pw + StreamLexSize(nst);
419	Get_Ch_Class(cc, ctype);
420	for(;;)
421	{
422	    int base, iresult, max_no, max_lc, max_uc;
423
424	    switch (ctype)
425	    {
426	    case RE:
427		Set_TokenString(StreamLexAux(nst), pw - StreamLexAux(nst));
428		tok = ENDOFFILE;
429		goto _return_tok_;
430
431	    case AQ:
432	    case LQ:
433	    case SQ:
434	    case CQ:
435		if (cc != quote_char) {
436		    *pw++ = cc;
437		    break;		/* other quote within */
438		}
439		if (sd->options & DOUBLED_QUOTE_IS_QUOTE)
440		{
441		    Get_Chr(cc);
442		    if (cc == quote_char)
443		    {
444			*pw++ = cc;
445			break;
446		    }
447		    Backup_(cc, 1);
448		}
449		/* check for consecutive strings */
450		if ((ctype != AQ) && !(sd->options & ISO_RESTRICTIONS))
451		{
452		    Get_Ch_Class(cc,ctype);
453		    if (LexError(_skip_blanks(nst, sd, &pligne, &cc, &ctype))) {
454			Set_TokenString(StreamLexAux(nst), pw - StreamLexAux(nst));
455			tok = ENDOFFILE;
456			goto _return_tok_;
457		    }
458		    if (cc == quote_char)
459		    	break;		/* skip doubled quote */
460		    Backup_(cc, 1);
461		}
462		*pw = 0;		/* end of quoted item */
463		Set_TokenString(StreamLexAux(nst), pw - StreamLexAux(nst));
464		goto _return_tok_;
465
466	    case ES: 	/* escape character: interpret next */
467		Get_Ch_Class(cc,ctype);
468		switch (ctype)
469		{
470		case RE:
471		    Set_TokenString(StreamLexAux(nst), pw - StreamLexAux(nst));
472		    tok = ENDOFFILE;
473		    goto _return_tok_;
474
475		case BS:	 /* ignore escaped line end \r\n */
476		    if (cc != '\r')
477			goto _return_ill_quoted_;
478		    Get_Ch_Class(cc, ctype);	/* next character */
479		    if (ctype != NL)
480			goto _return_ill_quoted_;
481		    /* fall through */
482
483		case NL:	/* ignore escaped line end \n */
484		    StreamLine(nst)++;
485		    /* don't call Get_Ch_Class_And_Complete() because
486		     * nothing was put in the lex_aux buffer */
487		    Get_Ch_Class(cc, ctype);	/* next character */
488		    continue;
489
490		case ES:
491		case AQ:
492		case LQ:
493		case CQ:
494		case SQ:
495		    *pw++ = cc;		/* just take cc as it is */
496		    break;
497
498		case LC:
499		    switch (cc)
500		    {
501		    case 'a': *pw++ = 0007; break;	/* alert */
502		    case 'b': *pw++ = '\b'; break;	/* backspace */
503		    case 't': *pw++ = '\t'; break;	/* tab */
504		    case 'n': *pw++ = '\n'; break;	/* newline */
505		    case 'v': *pw++ = 0013; break;	/* vertical tab */
506		    case 'f': *pw++ = '\f'; break;	/* form feed */
507		    case 'r': *pw++ = '\r'; break;	/* return */
508
509		    case 'e':				/* escape */
510			if (sd->options & ISO_ESCAPES) goto _return_ill_quoted_;
511			*pw++ = 0033; break;
512
513		    case 'd':				/* delete */
514			if (sd->options & ISO_ESCAPES) goto _return_ill_quoted_;
515			*pw++ = 0177; break;
516
517		    case 's':				/* space */
518			if (sd->options & ISO_ESCAPES) goto _return_ill_quoted_;
519			*pw++ = ' '; break;
520
521		    case 'c':		/* Quintus/Sicstus feature */
522			if (sd->options & ISO_ESCAPES) goto _return_ill_quoted_;
523			do {
524			    Get_Ch_Class(cc, ctype);
525			} while (ctype == BS || ctype == NL);
526			continue;
527
528		    case 'x':
529			base = 16; max_no = '9'; max_lc = 'f'; max_uc = 'F';
530			Get_Ch_Class(cc, ctype)
531			if (ctype == ES)
532			    goto _return_ill_quoted_;
533_iso_numeric_escape_:
534			for (iresult=0;;) {
535			    if (cc>='0' && cc<=max_no) cc -= '0';
536			    else if (cc>='a' && cc<=max_lc) cc = cc - 'a' + 10;
537			    else if (cc>='A' && cc<=max_uc) cc = cc - 'A' + 10;
538			    else if (ctype == ES)
539				break;
540			    else
541				goto _return_ill_quoted_;
542			    if ((unsigned )iresult <= MAX_CHAR_CODE/base &&
543				    ((unsigned ) (iresult * base) <= MAX_CHAR_CODE - cc))
544				iresult = iresult * base + cc;
545			    else goto _return_ill_quoted_;	/* overflow */
546			    Get_Ch_Class(cc, ctype)
547			}
548			*pw++ = iresult;
549			break;
550
551		    default: goto _return_ill_quoted_;
552		    }
553		    break;
554
555		case N:
556		    if (sd->options & ISO_ESCAPES)
557		    {
558			/* variable length, require terminating \ */
559			base = 8; max_no = '7'; max_lc = 0; max_uc = 0;
560		    	goto _iso_numeric_escape_;
561		    }
562		    switch (cc)
563		    {
564		    case '0':
565		    case '1':
566		    case '2':
567		    case '3':	/* check for 3 octal digits */
568			{
569			    int val = cc - '0';
570			    Get_Chr(cc);			/* second */
571			    if (!octal(cc)) {
572				goto _return_ill_quoted_;
573			    }
574			    val = (val << 3) + (cc - '0');
575			    Get_Chr(cc);			/* third */
576			    if (!octal(cc)) {
577				goto _return_ill_quoted_;
578			    }
579			    val = (val << 3) + (cc - '0');
580			    *pw++ = val;
581			    break;
582			}
583		    default: goto _return_ill_quoted_;
584		    }
585		    break;
586
587		default:
588		    goto _return_ill_quoted_;
589		}
590		break;
591
592	    case BS:
593		/* ISO does not allow tabs etc in quoted items */
594		if (cc != ' '  &&  (sd->options & ISO_ESCAPES))
595		    goto _return_ill_quoted_;
596		*pw++ = cc;
597		break;
598
599	    case NL:
600		StreamLine(nst)++;
601		if (!(sd->options & NEWLINE_IN_QUOTES))
602		    goto _return_ill_quoted_;
603		/* fall through */
604
605	    default:
606		*pw++ = cc;
607		break;
608	    }
609	    Get_Ch_Class_And_Complete(cc, ctype);	/* next character */
610	    if (pw == stop) {
611		Extend_Lex_Aux(nst, pw, stop);
612	    }
613	} /* end for(;;) */
614
615
616    case UC:	/* uppercase */
617    	tok = REFERENCE;
618	goto _name_;
619    case UL:	/* special prefix for variables */
620    	tok = UREFERENCE;
621	goto _name_;
622    case LC:	/* a lower case symbol */
623    	tok = IDENTIFIER;
624_name_:
625	pw = StreamLexAux(nst);
626	stop = pw + StreamLexSize(nst);
627	*pw++ = cc;
628	Get_Ch_Class(cc, ctype);
629	for(;;)
630	{
631	    if (Alphanum(ctype))
632	    {
633		if (pw == stop) {
634		    Extend_Lex_Aux(nst, pw, stop);
635		}
636		*pw++ = cc;
637		Get_Ch_Class_And_Complete(cc, ctype);
638	    } else {
639	    	break;
640	    }
641	}
642	Backup_(cc, 1);
643	*pw = 0;
644	Set_TokenString(StreamLexAux(nst), pw - StreamLexAux(nst));
645	if (tok == UREFERENCE && (pw - StreamLexAux(nst)) > 1)
646	    tok = REFERENCE;
647	break;
648
649
650    case SY:
651    case ES:
652    case CM2:
653_symbol_:
654	pw = StreamLexAux(nst);
655	stop = pw + StreamLexSize(nst);
656	*pw++ = cc;
657	Get_Ch_Class(cc, ctype);
658	for(;;)
659	{
660	    if (Symbol(ctype) && ctype != RE)
661	    {
662		if (pw == stop) {
663		    Extend_Lex_Aux(nst, pw, stop);
664		}
665		*pw++ = cc;
666		Get_Ch_Class_And_Complete(cc, ctype);
667	    } else {
668	    	break;
669	    }
670	}
671#ifdef ISO_FULLSTOP
672	Backup_(cc, 1);
673	if ((pw - StreamLexAux(nst)) == 1  &&  *StreamLexAux(nst) == '.'
674	    && (ctype == BS || ctype == NL || ctype == RE || ctype == CM))
675	{
676	    Make_Atom(&token->term, d_.eocl);
677	    tok = EOCL;				/* full stop */
678	} else {
679	    *pw = 0;
680	    Set_TokenString(StreamLexAux(nst), pw - StreamLexAux(nst));
681	    tok = IDENTIFIER;
682	}
683#else
684	if ((pw - StreamLexAux(nst)) == 1  &&  *StreamLexAux(nst) == '.'
685	    && (ctype == BS || ctype == NL || ctype == RE || ctype == CM))
686	{
687	    if (ctype == RE || ctype == CM)
688	    {
689		Backup_(cc, 1);
690	    }
691	    else if (ctype == NL)
692	    {
693		StreamLine(nst)++;
694	    }
695	    Make_Atom(&token->term, d_.eocl);
696	    tok = EOCL;				/* full stop */
697	} else {
698	    Backup_(cc, 1);
699	    *pw = 0;
700	    Set_TokenString(StreamLexAux(nst), pw - StreamLexAux(nst));
701	    tok = IDENTIFIER;
702	}
703#endif
704	break;
705
706
707    case AS:				/* ascii-quote */
708	Get_Ch_Class(cc,ctype);
709	if (ctype ==  RE)
710	    goto _start_;
711	if (ctype == NL)
712	    StreamLine(nst)++;
713	Make_Integer(&token->term, cc);
714	tok = NUMBER;
715	break;
716
717
718    case N:
719	pligne = string_to_number((char *) pligne - 1, &token->term, nst, sd);
720	tok = token->term.tag.kernel == TEND ? BAD_NUMERIC_CONSTANT :
721		tok == BLANK_SPACE ? SPACE_NUMBER :
722		NUMBER;
723	goto _return_tok_;
724
725
726    case DS:
727	switch(cc)
728	{
729	case  '(':
730	case  '[':
731	case  '{':
732	    Make_Integer(&token->term, cc);
733	    tok = (tok == BLANK_SPACE) ? SPACE_SOLO : SOLO;
734	    break;
735
736	case  ',':
737	    Make_Atom(&token->term, d_comma0_);
738	    tok = COMMA;
739	    break;
740
741	case  '|':
742	    StreamLexAux(nst)[0] = cc;
743	    StreamLexAux(nst)[1] = 0;
744	    Set_TokenString(StreamLexAux(nst), 1);
745	    tok = BAR;
746	    break;
747
748	case  ')':
749	case  ']':
750	case  '}':
751	    Make_Integer(&token->term, cc);
752	    tok = CLOSING_SOLO;
753	    break;
754
755	default:
756	    Make_Integer(&token->term, cc);
757	    tok = SOLO;
758	    break;
759	}
760	break;
761
762
763    case TS:		/* terminator character (non-Prolog extension) */
764	Make_Atom(&token->term, d_.eocl);
765	tok = EOCL;				/* full stop */
766	break;
767
768    case SL:		/* like SY, but every character is its own token */
769	StreamLexAux(nst)[0] = cc;
770	StreamLexAux(nst)[1] = 0;
771	Set_TokenString(StreamLexAux(nst), 1);
772	tok = IDENTIFIER;
773	break;
774
775
776    default:
777	Make_Integer(&token->term, cc);
778	tok = SOLO;
779	break;
780
781    } /* end switch */
782
783_return_tok_:
784    StreamPtr(nst) = pligne;
785    token->pos.to = CurrentOffset(nst,pligne);
786    token->class = tok;
787    return tok;
788
789_return_ill_quoted_:
790    Set_TokenString(StreamLexAux(nst), pw - StreamLexAux(nst));
791    tok = ILL_QUOTED;
792    goto _return_tok_;
793}
794
795
796/*
797 * Return the next non-blank and non-comment character.
798 * pligne, cc, ctype are maintained as in lex_an()
799 *
800 * Return values:
801 *	NO_TOKEN	no blank space was skipped
802 *	BLANK_SPACE	some blank space was skipped
803 *	ENDOFFILE	error (cc,ctype not updated)
804 */
805static int
806_skip_blanks(stream_id nst, syntax_desc *sd, unsigned char **p_pligne, int *p_cc, int *p_ctype)
807{
808    unsigned char	*pligne = *p_pligne;
809    int			ret = NO_TOKEN;
810    int			cc = *p_cc;
811    int			ctype = *p_ctype;
812    int			cc2, ctype2, depth;
813
814    for(;;)
815    {
816	switch (ctype)
817	{
818	case NL:
819	    StreamLine(nst)++;
820	    /* fall through */
821	case BS:
822	    ret = BLANK_SPACE;
823	    break;
824
825	case CM:			/* comment until end of line */
826	    ret = BLANK_SPACE;
827	    do {
828		Get_Ch_Class(cc,ctype);
829	    } while (ctype != NL  &&  ctype != RE);
830	    continue;
831
832	case CM1:			/* C-style comment */
833	    Get_Ch_Class(cc2,ctype2);	/* lookahead */
834	    if (ctype2 == CM2)
835	    {
836		ret = BLANK_SPACE;		/* it's definitely a comment */
837		Get_Ch_Class(cc,ctype);
838		for (depth = 1; depth > 0; )
839		{
840		    switch (ctype)
841		    {
842		    case RE:		/* EOF within comment not allowed */
843			*p_pligne = pligne;
844			return ENDOFFILE;
845
846		    case NL:		/* don't forget to count lines */
847			StreamLine(nst)++;
848			break;
849
850		    case CM1:		/* possible nested comment */
851			if (sd->options & NESTED_COMMENTS)
852			{
853			    Get_Ch_Class(cc,ctype);
854			    if (ctype != CM2)
855				continue;
856			    depth++;
857			}
858			break;
859
860		    case CM2:		/* possible end of comment */
861			Get_Ch_Class(cc,ctype);
862			if (ctype != CM1)
863			    continue;
864			depth--;
865			break;
866		    }
867		    Get_Ch_Class(cc,ctype);
868		}
869		continue;		/* end of comment */
870	    }
871	    Backup_(cc2, 1);
872	    /* no comment, fall through */
873
874	default:
875	    *p_cc = cc;
876	    *p_ctype = ctype;
877	    *p_pligne = pligne;
878	    return ret;
879	}
880	Get_Ch_Class(cc,ctype);
881    }
882}
883
884
885/*
886 * Check if an atom needs to be quoted. This is called from the
887 * write_atom() routine. Return values:
888 *	IDENTIFIER - no quotes needed
889 *	QIDENTIFIER - quotes needed
890 *	BAR - may need quotes (all arities)
891 *	EOCL - dot may need quotes (all arities)
892 *	COMMA - may need quotes (only returned for ,/2)
893 */
894int
895ec_need_quotes(dident d, syntax_desc *sd)
896{
897    register unsigned char	*name = (unsigned char *) DidName(d);
898    register int		rest = (int) DidLength(d);
899    register int		c;
900
901    if (rest-- == 0)
902	return QIDENTIFIER;
903
904    switch (sd->char_class[c = *name++])
905    {
906    case LC:			/* atoms starting with lower case	*/
907	while (rest--)
908	{
909	    c = *name++;
910	    if (!Alphanum(sd->char_class[c]))
911		return QIDENTIFIER;
912	}
913	return IDENTIFIER;
914
915    case SY:			/* symbol atoms: . may need quotes	*/
916	if (c == '.' && rest == 0)
917	    return EOCL;
918	/* else fall through */
919    case ES:
920    case CM2:
921	while (rest--)
922	{
923	    c = *name++;
924_need_quotes1_:
925	    switch (sd->char_class[c])
926	    {
927	    case CM1:
928	    case CM2:
929	    case ES:
930	    case SY:
931		break;
932	    default:
933		return QIDENTIFIER;
934	    }
935	}
936	return IDENTIFIER;
937
938    case CM1:			/* begin of comment must be quoted	*/
939	if (rest--)
940	{
941	    c = *name++;
942	    if (sd->char_class[c] == CM2)
943		return QIDENTIFIER;
944	    else
945		goto _need_quotes1_;
946	}
947	else return IDENTIFIER;
948
949    case DS:
950	switch (c)
951	{
952	case '{':		/* {} needs no quotes	*/
953	    if (rest == 1 && *name == '}')
954		return IDENTIFIER;
955	    else return QIDENTIFIER;
956	case '[':		/* [] needs no quotes	*/
957	    if (rest == 1 && *name == ']')
958		return IDENTIFIER;
959	    else return QIDENTIFIER;
960	case ',':		/* ,/2 sometimes needs no quotes	*/
961	    if (d == d_.comma)
962		return COMMA;
963	    else return QIDENTIFIER;
964	case '|':		/* | needs quotes only inside lists	*/
965	    if (rest == 0)
966		return BAR;
967	    else return QIDENTIFIER;
968	default:
969	    return QIDENTIFIER;
970	}
971
972    case SL:
973	if (rest == 0)
974	    return IDENTIFIER;	/* ! and ; don't need quotes	*/
975	else return QIDENTIFIER;
976
977    case TS:
978	return QIDENTIFIER;
979    }
980    return QIDENTIFIER;
981}
982
983
984static unsigned char *
985_extend_lex_aux(stream_id nst)
986{
987    register long		n = StreamLexSize(nst);
988
989    StreamLexAux(nst) = (unsigned char *) hg_resize((generic_ptr) (StreamLexAux(nst)), (int)(n + n));
990    StreamLexSize(nst) = n + n;
991    return StreamLexAux(nst) + n;
992}
993
994#ifdef FUNCTOR_COMPLETION
995#if defined(HAVE_READLINE)
996/*ARGSUSED*/
997static char **
998_complete_predicate(char *text, int start, int end)
999{
1000    char		**matches;
1001    register char	*s;
1002    int			i = 0;
1003    extern char		**completion_matches();
1004    extern char		*rl_line_buffer;
1005
1006    matches = (char **)NULL;
1007
1008    /* strip spaces so that we know if we match a predicate or not */
1009    for (s = rl_line_buffer; (*s == ' ' || *s == '\t'); s++)
1010	i++;
1011    if (i == start)
1012	completion_start = 0;
1013    else {
1014	completion_start = start;
1015	for (s = rl_line_buffer + start - 1; s >= rl_line_buffer; s--) {
1016	    if (*s == '[')
1017		return matches;		/* matching filenames */
1018	    else if (*s != ' ' && *s != '\t' && *s != '\'' && *s != '"')
1019		break;
1020	}
1021    }
1022    matches = completion_matches(text, _find_matching_predicate);
1023    return matches;
1024}
1025
1026static char *
1027_find_matching_predicate(char *string, int state)
1028{
1029    char		*s1, *s2;
1030    extern char		*sprintf();
1031    int			search_arity;
1032    int			length;
1033
1034    if (state == 0) {
1035	completion_idx = 0;
1036	completion_length = strlen(string);
1037    }
1038    if (string[completion_length - 1] == '/') {
1039	/* We have the whole name and search for arity only */
1040	search_arity = 1;
1041	length = completion_length - 1;
1042    } else {
1043	search_arity = 0;
1044	length = completion_length;
1045    }
1046    while (next_functor(&completion_idx, &completion_dip))
1047    {
1048	if (s1 = DidName(completion_dip))
1049	{
1050	    if (strncmp(string, s1, length) == 0 &&
1051		((!search_arity && completion_start) ||
1052		    visible_procedure(completion_dip,
1053			d_.default_module, tdict, PRI_DONTIMPORT)))
1054	    {
1055		if (search_arity) {
1056		    if (strlen(s1) == length) {
1057			s2 = (char *) hp_alloc(length + 4);
1058			(void) strcpy(s2, s1);
1059			s2[length] = '/';
1060			(void) sprintf(s2 + length + 1,
1061			    "%d", DidArity(completion_dip));
1062			return s2;
1063		    }
1064		} else {
1065		    s2 = (char *) hp_alloc(strlen(s1) + 1);
1066		    (void) strcpy(s2, s1);
1067		    return s2;
1068		}
1069	    }
1070	}
1071    }
1072    Set_Bip_Error(0);
1073    return (char *) 0;
1074}
1075#else
1076
1077static int
1078_prefix_length(char *s1, char *s2)
1079{
1080    register char	*p = s1;
1081
1082    while (*s1++ == *s2++)
1083	;
1084    return (s1 - p) - 1;
1085}
1086
1087static int
1088_complete(unsigned char *string, unsigned char *end, char **out)
1089{
1090    int				length = 0;
1091    register unsigned char	*s1;
1092    register unsigned char	*s2;
1093    char			*found_string;
1094    int				match;
1095    int				idx = 0;
1096    dident			dip;
1097
1098    while (next_functor(&idx, &dip))
1099    {
1100	if (s1 = (unsigned char *) DidName(dip))
1101	{
1102	    s2 = string;
1103	    while (s2 < end)
1104		if (*s2 != *s1++)
1105		    break;
1106		else
1107		    s2++;
1108	    if (s2 == end)		/* Found one */
1109	    {
1110		if (!length)
1111		{
1112		    found_string = DidName(dip);
1113		    length = strlen(found_string);
1114		}
1115		else if (DidName(dip) != found_string)
1116		{
1117		    match = _prefix_length(DidName(dip), found_string);
1118		    if (match < length)
1119			length = match;
1120		}
1121	    }
1122	}
1123    }
1124    if (length > end - string) {
1125	    *out = found_string + (end - string);
1126	    return length - (end - string);
1127    }
1128    else
1129	return 0;
1130}
1131
1132/*ARGSUSED*/
1133static void
1134_find_matching_atom(
1135	unsigned char	*end,
1136	stream_id	nst,
1137	unsigned char	**pw,
1138	unsigned char	**stop)
1139{
1140    char	*p;
1141    int		sl;
1142
1143    if (sl = _complete(StreamLexAux(nst), end, &p))
1144    {
1145#ifdef HAVE_PUSHBACK
1146	while (sl--)
1147	    pushback_char((int) (StreamUnit(nst)), p++);
1148#else
1149	(void) write((int) (StreamUnit(nst)), p, sl);
1150	if (*pw + sl >= *stop) {
1151	    Extend_Lex_Aux(nst, *pw, *stop);
1152	}
1153	while (sl--)
1154	    *(*pw)++ = *p++;
1155#endif
1156    }
1157    else
1158	(void) write((int) (StreamUnit(nst)), "\007", 1);
1159    StreamMode(nst) |= DONT_PROMPT;
1160}
1161#endif
1162#endif
1163
1164
1165syntax_desc *
1166copy_syntax_desc(syntax_desc *sd)
1167{
1168    syntax_desc		*newsd;
1169
1170    newsd = (syntax_desc *) hg_alloc_size(sizeof(syntax_desc));
1171    *newsd = *sd;
1172    return newsd;
1173}
1174
1175/*ARGSUSED*/
1176static int
1177p_copy_syntax(value vfrom, type tfrom, value vto, type tto)
1178{
1179    module_item		*from, *to;
1180
1181    from = ModuleItem(vfrom.did);
1182    to = ModuleItem(vto.did);
1183
1184    hg_free_size((generic_ptr) to->syntax, sizeof(syntax_desc));
1185    to->syntax = copy_syntax_desc(from->syntax);
1186    Succeed_;
1187}
1188
1189/*
1190 * get_chtab_(+Character, ?CharacterClass, Module)
1191 */
1192static int
1193p_get_chtab(value v1, type t1, value v2, type t2, value vm, type tm)
1194{
1195    Check_Integer(t1);
1196    Check_Output_Atom(t2);
1197    Check_Module_And_Access(vm, tm)
1198    if (v1.nint < 0 || v1.nint > 255)
1199    {
1200	Bip_Error(RANGE_ERROR);
1201    }
1202    Return_Unify_Atom(v2,t2,(dident)chname_[ModuleSyntax(vm.did)->char_class[(unsigned char)v1.nint]]);
1203}
1204
1205/*
1206 * set_chtab_(+Character, +CharacterClass, Module)
1207 */
1208static int
1209p_set_chtab(value v1, type t1, value v2, type t2, value vm, type tm)
1210{
1211    unsigned char	c;	/* to hold the concerned character */
1212    int			new_cc;
1213    syntax_desc		*sd;
1214
1215    Check_Integer(t1);
1216    Check_Atom(t2);
1217    Check_Module_And_Access(vm, tm)
1218    if (v1.nint < 0 || v1.nint > 255)
1219    {
1220	Bip_Error(RANGE_ERROR);
1221    }
1222    c = (unsigned char) v1.nint;
1223    sd = ModuleSyntax(vm.did);
1224
1225    /* Then try to find the character class among the known ones */
1226    for(new_cc = 1; new_cc <= NBCH && v2.did != chname_[new_cc]; new_cc++)
1227	;
1228    if (new_cc > NBCH) { Bip_Error(RANGE_ERROR) }	/* Not found */
1229
1230    /* Check if we are redefining the current AQ, SQ or ES character.
1231     * For writing, we always need an AQ and SQ, hence they may only be
1232     * redefined if there is an alternative one that can be used instead.
1233     * Having no ES character is allowed.
1234     */
1235    if ((unsigned char)new_cc != sd->char_class[c]
1236     && (sd->current_sq_char == c || sd->current_aq_char == c ||
1237	 sd->current_escape == c  || sd->current_ul_char == c))
1238    {	int j;
1239	unsigned char cc = sd->char_class[c];
1240
1241	for(j = 0; j <= 255; j++)	/* scan through all characters */
1242	{
1243	    if (sd->char_class[j] == cc  &&  (int)c != j)
1244	    {
1245		switch(cc)		/* found an alternative character j */
1246		{
1247		case AQ:	sd->current_aq_char = j; break;
1248		case SQ:	sd->current_sq_char = j; break;
1249		case UL:	sd->current_ul_char = j; break;
1250		case ES:	sd->current_escape = j; break;
1251		}
1252		break;
1253	    }
1254	}
1255	if (j > 255)
1256	    if (cc == ES)
1257		sd->current_escape = -1; /* no longer an ES character	*/
1258	    else
1259	    {
1260		Bip_Error(ONE_SQ_AQ)	/* these quotes are needed	*/
1261	    }
1262    }
1263
1264    sd->char_class[c] = (unsigned char) new_cc; /* now redefine the character */
1265    switch(new_cc)			/* might be the new current_...	*/
1266    {
1267    case AQ:    sd->current_aq_char = c; break;
1268    case SQ:    sd->current_sq_char = c; break;
1269    case ES:    sd->current_escape = c; break;
1270    case UL:    sd->current_ul_char = c; break;
1271    }
1272    Succeed_;
1273}
1274
1275
1276/*
1277 * get_syntax_(?Flag, Remember, Module) - backtrack over syntax flags
1278 *
1279 * internal use only !
1280 */
1281/*ARGSUSED*/
1282static int
1283p_get_syntax(value val1, type tag1, value val2, type tag2, value vm, type tm)
1284{
1285    value	vi;
1286    int		syntax;
1287
1288    /* no check on tag1 ! */
1289    /* Check_Integer(tag2); not needed */
1290
1291    syntax = ModuleSyntax(vm.did)->options;
1292    vi.nint = val2.nint;
1293    while (vi.nint < SYNTAX_FLAGS)
1294    {
1295	if (syntax & (1 << vi.nint++))
1296	{
1297	    Remember(2, vi, tag2);
1298	    Return_Unify_Atom(val1, tag1, syntax_flags[vi.nint-1]);
1299	}
1300    }
1301    Cut_External;
1302    Fail_;
1303}
1304
1305/*	read_token_(Stream, Token, Class, Module)	*/
1306/*ARGSUSED*/
1307static int
1308p_read_token_(value vs, type ts, value v, type t, value vc, type tc, value vm, type tm)
1309{
1310    int		res;
1311    char	*s;
1312    token_desc	token;
1313    stream_id	nst = get_stream_id(vs,ts, SREAD, &res);
1314    register word len;
1315    syntax_desc	*sd = ModuleSyntax(vm.did);
1316    dident	tname;
1317    Prepare_Requests;
1318
1319    if (!IsRef(t) && IsCompound(t))
1320    {
1321	Bip_Error(TYPE_ERROR)
1322    }
1323    if (nst == NO_STREAM)
1324    {
1325	Bip_Error(res)
1326    }
1327    if (!IsReadStream(nst))
1328    {
1329	Bip_Error(STREAM_MODE);
1330    }
1331    Check_Module_And_Access(vm, tm)
1332    if (StreamMode(nst) & REPROMPT_ONLY)
1333	StreamMode(nst) |= DONT_PROMPT;
1334
1335    (void) lex_an(nst, sd, &token);
1336    tname = LexError(token.class) ? d_.err : tname_[token.class];
1337    switch(token.class)
1338    {
1339	case COMMA:
1340	case EOCL:
1341	    break;
1342
1343	case REFERENCE:
1344	case UREFERENCE:
1345	case STRING:
1346	case CODES:
1347	case CHARS:
1348	case BAR:
1349	default:		/* LexError() */
1350	    len = token.term.val.nint;
1351	    Make_Stack_String(len, token.term.val, s)
1352	    Copy_Bytes(s, token.string, len + 1);
1353 	    token.term.tag.kernel = TSTRG;
1354            break;
1355
1356	case ENDOFFILE:		/* we don't have the string */
1357	    Make_Stack_String(0, token.term.val, s)
1358	    *s = 0;
1359 	    token.term.tag.kernel = TSTRG;
1360            break;
1361
1362	case SOLO:
1363	case CLOSING_SOLO:
1364	case SPACE_SOLO:
1365	{
1366	    char c = (char) token.term.val.nint;
1367	    Make_Stack_String(1, token.term.val, s)
1368	    s[0] = c;
1369	    s[1] = 0;
1370 	    token.term.tag.kernel = TSTRG;
1371            break;
1372	}
1373
1374	case NUMBER:
1375	case SPACE_NUMBER:
1376	    if (IsInterval(token.term.tag))
1377	    {
1378		Unmark_Interval_Raw(token.term.val.ptr);
1379	    }
1380	    tname = tag_desc[tag_desc[TagType(token.term.tag)].super].type_name;
1381	    break;
1382
1383	case IDENTIFIER:
1384	case QIDENTIFIER:
1385	    token.term.val.did = enter_dict_n(token.string, token.term.val.nint, 0);
1386	    token.term.tag.kernel = token.term.val.did == d_.nil ? TNIL : TDICT;
1387	    break;
1388
1389	case EOI:
1390	    if (StreamMode(nst) & MEOF ) {
1391		Bip_Error(IsSoftEofStream(nst) ? PEOF : READ_PAST_EOF);
1392	    }
1393	    else
1394		StreamMode(nst) |= MEOF;
1395	    Bip_Error(PEOF);
1396    }
1397    Request_Unify_Pw(v, t, token.term.val, token.term.tag);
1398    Request_Unify_Atom(vc, tc, tname);
1399    Return_Unify;
1400}
1401
1402
1403/*** the subsequent BIPs fail on error and set the global variable ***/
1404
1405#undef Bip_Error
1406#define Bip_Error(N) Bip_Error_Fail(N)
1407
1408/*
1409 * set_syntax(+flag, +val) - set or reset a syntax flag, fails on error
1410 *
1411 * internal use only !
1412 */
1413/*ARGSUSED*/
1414static int
1415p_set_syntax(value val1, type tag1, value val2, type tag2, value vm, type tm)
1416{
1417    int		i, flag;
1418    syntax_desc	*sd;
1419
1420    Check_Atom(tag1);
1421    Check_Atom(tag2);
1422    sd = ModuleSyntax(vm.did);
1423
1424    for (i=0, flag=1; i < SYNTAX_FLAGS; i++, flag <<= 1)
1425    {
1426	if (val1.did == syntax_flags[i])
1427	{
1428	    if (val2.did == d_.on)
1429		sd->options |= flag;
1430	    else if (val2.did == d_.off)
1431		sd->options &= ~flag;
1432	    else { Bip_Error(RANGE_ERROR); }
1433	    Succeed_;
1434	}
1435    }
1436    Bip_Error(RANGE_ERROR);
1437}
1438
1439
1440#ifdef HAVE_INFINITY
1441extern double infinity();
1442#else
1443#ifdef HUGE_VAL
1444#define infinity() HUGE_VAL
1445#else
1446#ifdef HUGE
1447#define infinity() HUGE
1448#else
1449#define infinity() 1.0e310
1450#endif
1451#endif
1452#endif
1453
1454/*
1455 * char *string_to_number(start, result, nst, sd)
1456 *
1457 * 	Auxiliary function used to convert a string (pointed to by start)
1458 *	to a number. The result is a prolog word in *result
1459 *	and is either a TINT, TBIG, TRAT, TDBL or TIVL.
1460 *	If the tag is TEND there has been a conversion error.
1461 *	The return value is the pointer to the next character after
1462 *	the number.
1463 *	The function can be used both for parsing from a stream (nst)
1464 *	or for parsing a string (when nst == NULL).
1465 *	StreamPtr is updated according to the return value.
1466 *	This function is independent of character classes, except
1467 *      for the escape sequences.
1468 *	For better backward compatibility, based integers are
1469 *	not parsed as bignums (otherwise 16'ffffffff would be a bignum),
1470 *	unless the based_bignums syntax option is active.
1471 */
1472
1473#define Init_S2N() \
1474    if (nst) { \
1475	aux = StreamLexAux(nst); \
1476	stop = StreamLexAux(nst) + StreamLexSize(nst); \
1477    }
1478
1479#define Reset_Start() \
1480	if (nst) aux = StreamLexAux(nst); \
1481	else start = (char *) t;
1482
1483/* up to three characters of backup are needed, e.g. in "3e+a" */
1484#define Push_Back() \
1485	--t; \
1486	if (nst) { \
1487	    *(--aux) = 0; \
1488	}
1489
1490#define Get_Ch(c) \
1491	c = *t++; \
1492	if (nst) { \
1493	    if (!c) { \
1494		StreamPtr(nst) = t-1; \
1495		(void) fill_buffer(nst); \
1496		t = StreamPtr(nst); \
1497		c = *t++; \
1498	    } \
1499	    if (aux == stop) { Extend_Lex_Aux(nst, aux, stop) } \
1500	    *aux++ = c; \
1501	}
1502
1503#define NEG	1
1504#define BIG	2
1505#define FLOAT	4
1506#define IVL	8
1507#define PRECISE	16
1508
1509char *
1510string_to_number(char *start, pword *result, stream_id nst, syntax_desc *sd)
1511{
1512    unsigned register char *t;		/* next character to read */
1513    unsigned register char *aux;	/* next location in LexAux */
1514    unsigned char *stop;		/* end of LexAux */
1515    register int c;			/* current character */
1516    int	flags = 0;			/* to remember established facts */
1517    int base = 10;			/* radix for number reading */
1518    register uword iresult = 0;		/* accumulator for integer value */
1519    double f, low_f;			/* the float result */
1520    int float_digits = 0;
1521    int syntax;
1522
1523    Init_S2N();
1524    t = (unsigned char *) start;
1525    if (!sd) sd = default_syntax;
1526    syntax = sd->options;
1527
1528_start_:
1529    Get_Ch(c)
1530    switch(c) { 		/* check for optional sign */
1531    case '-':
1532	flags |= NEG;
1533        /*fall through*/
1534    case '+':
1535	Get_Ch(c)
1536        if (syntax & BLANK_AFTER_SIGN) {        /* allow optional space? */
1537            while (sd->char_class[c] == BS) {
1538                Get_Ch(c)
1539            }
1540        }
1541    }
1542
1543    if (!isdigit(c))		/* read digits */
1544	goto return_err;	/* can't happen in the lexer */
1545
1546    do {
1547	++float_digits;
1548	if (!(flags & BIG))
1549	{
1550	    c -= '0';
1551	    if (iresult <= MAX_S_WORD/10 && ((iresult *= 10) <= MAX_S_WORD - c))
1552		iresult += c;
1553	    else flags |= BIG;	/*  word overflow */
1554	}
1555	Get_Ch(c)
1556    } while (isdigit(c));
1557
1558    if (c == '\'') {			/* based integer */
1559	if ((flags & BIG) || iresult < 0 || iresult > 36
1560            || (iresult < 10 && float_digits > 1) || float_digits > 2)
1561	{
1562	    goto return_int;
1563	}
1564	base = iresult;
1565	if (base == 0)			/* character code */
1566	{
1567	    int max_no, max_lc, max_uc;
1568            Get_Ch(c);
1569            switch(sd->char_class[c]) {
1570            case ES:
1571                Get_Ch(c);
1572                switch(sd->char_class[c]) {
1573                case AQ:        /* 0'\' */
1574                case SQ:        /* 0'\" or 0'\` */
1575                case LQ:        /* 0'\" */
1576                case CQ:        /* 0'\" */
1577                case ES:        /* 0'\\ */
1578                    break;
1579
1580                case LC:        /* 0'\a ... 0'\v */
1581                    switch(c) {
1582                    case 'a': c = 0007; goto _return_c_;	/* alert */
1583                    case 'b': c = '\b'; goto _return_c_;	/* backspace */
1584                    case 't': c = '\t'; goto _return_c_;	/* tab */
1585                    case 'n': c = '\n'; goto _return_c_;	/* newline */
1586                    case 'v': c = 0013; goto _return_c_;	/* vertical tab */
1587                    case 'f': c = '\f'; goto _return_c_;	/* form feed */
1588                    case 'r': c = '\r'; goto _return_c_;	/* return */
1589                    case 'e':                   /* escape */
1590                        if (syntax & ISO_ESCAPES) goto return_int3;
1591                        c = 0033; goto _return_c_;
1592                    case 'd':                   /* delete */
1593                        if (syntax & ISO_ESCAPES) goto return_int3;
1594                        c = 0177; goto _return_c_;
1595                    case 's':                   /* space */
1596                        if (syntax & ISO_ESCAPES) goto return_int3;
1597                        c = ' '; goto _return_c_;
1598		    case 'x':                   /* ISO hex constant */
1599			base = 16; max_no = '9'; max_lc = 'f'; max_uc = 'F';
1600			Get_Ch(c)
1601			if (sd->char_class[c] != ES)
1602			    goto _iso_numeric_escape_;
1603			Push_Back();		/* the premature ES */
1604			break;
1605                    }
1606		    goto _unknown_escape_;
1607
1608		case N:
1609                    if (!(syntax & ISO_ESCAPES)) goto _unknown_escape_;
1610		    base = 8; max_no = '7'; max_lc = 0; max_uc = 0;
1611_iso_numeric_escape_:
1612		    /* because of unlimited length of this sequence, we cannot push
1613		     * it all back on error - we leave legal prefix consumed */
1614		    for (iresult=0;;) {
1615			if (c>='0' && c<=max_no) c -= '0';
1616			else if (c>='a' && c<=max_lc) c = c - 'a' + 10;
1617			else if (c>='A' && c<=max_uc) c = c - 'A' + 10;
1618			else if (sd->char_class[c] == ES)
1619			    break;
1620			else {
1621			    Push_Back();		/* the bad char */
1622			    goto return_err;
1623			}
1624			if ((unsigned )iresult <= MAX_CHAR_CODE/base &&
1625				((unsigned ) (iresult * base) <= MAX_CHAR_CODE - c))
1626			    iresult = iresult * base + c;
1627			else goto return_err;	/* overflow */
1628			Get_Ch(c)
1629		    }
1630		    result->val.nint = (word) iresult;
1631		    result->tag.kernel = TINT;
1632		    goto return_ok;
1633
1634                default:        /* unrecognised 0'\? escape sequence */
1635_unknown_escape_:
1636                    if (syntax & ISO_ESCAPES) goto return_int3;
1637                    /* backward comp: allow plain 0'\ for backslash */
1638                    Push_Back();		/* the bad char */
1639                    c = '\\';
1640                    break;
1641                }
1642                break;
1643
1644            case BS:
1645		if (c == ' ') break;
1646		/*fall through*/
1647            case NL:    /* 0'<layout> not allowed in ISO */
1648                if (syntax & ISO_ESCAPES) goto return_int2; /* (iresult) */
1649                break;
1650
1651            case AQ:    /* 0'' */
1652                if (syntax & ISO_ESCAPES && syntax & DOUBLED_QUOTE_IS_QUOTE) {
1653                    Get_Ch(c);
1654                    if (sd->char_class[c] != AQ) goto return_int3;
1655                    /* 0''' */
1656                }
1657                break;
1658
1659            case SQ:    /* 0'" or 0'` */
1660            case LQ:    /* 0'" */
1661            case CQ:    /* 0'" */
1662                break;
1663            }
1664_return_c_:
1665	    result->val.nint = (word) c;
1666	    result->tag.kernel = TINT;
1667	    goto return_ok;
1668	}
1669	if (syntax & ISO_BASE_PREFIX)
1670	    goto return_int;	/* (flags, iresult) */
1671
1672_based_number_:				/* (base,iresult) */
1673	{
1674	    int max_no = base < 10 ? '0'+base-1 : '9';
1675	    int max_lc = 'a' + base-11;
1676	    int max_uc = 'A' + base-11;
1677	    Reset_Start()
1678	    Get_Ch(c)
1679	    if (!(c>='0' && c<=max_no || c>='a' && c<=max_lc || c>='A' && c<=max_uc))
1680	    {
1681		Push_Back();		/* the bad digit */
1682		goto return_int;	/* (flags, iresult) */
1683	    }
1684	    for (iresult=0;;) {
1685		if (c>='0' && c<=max_no) c -= '0';
1686		else if (c>='a' && c<=max_lc) c = c - 'a' + 10;
1687		else if (c>='A' && c<=max_uc) c = c - 'A' + 10;
1688		else break;
1689		if (!(flags & BIG))
1690		{
1691		    if (iresult <= MAX_U_WORD/base &&
1692			    (iresult * base <= MAX_U_WORD - c))
1693			iresult = iresult * base + c;
1694		    else flags |= BIG;	/* word overflow */
1695		}
1696		Get_Ch(c)
1697	    }
1698	    if (syntax & BASED_BIGNUMS)
1699	    {
1700	      if (!(flags & BIG) && iresult > MAX_S_WORD)
1701		    flags |= BIG;
1702	    }
1703	    else if (flags & BIG) {
1704		Push_Back();		/* the delimiter */
1705		goto return_err;
1706	    }
1707	    goto return_int;
1708	}
1709    }
1710    else if(c == '.')			/* could be a float */
1711    {
1712	int first;
1713	Get_Ch(c)			/* first after point */
1714	if (!isdigit(c))
1715	{
1716	    Push_Back();		/* the non-digit */
1717            goto return_int;		/* it was no decimal point */
1718	}
1719	++float_digits;
1720	flags |= FLOAT;			/* definitely a float */
1721	first = c;
1722	Get_Ch(c)
1723	if (!isdigit(c)) {		/* only one fractional digit */
1724	    if (first == '0' || first == '5')
1725		flags |= PRECISE;
1726	} else {
1727	    do {
1728		++float_digits;
1729		Get_Ch(c)		/* read remaining digits */
1730	    } while (isdigit(c));
1731	}
1732	if (c == 'e' || c == 'E')	/* exponent is now optional */
1733	    flags &= ~PRECISE;		/* conservative assumption */
1734	else if (c == 'I')		/* check for Inf */
1735	{
1736	    Get_Ch(c)
1737	    if (c == 'n')
1738	    {
1739		Get_Ch(c)
1740		if (c == 'f') goto return_infinity;
1741		Push_Back();		/* the f position*/
1742	    }
1743	    Push_Back();		/* the n position*/
1744	    goto return_real;
1745	}
1746	else if (c == 'N')		/* check for NaN */
1747	{
1748	    Get_Ch(c)
1749	    if (c == 'a')
1750	    {
1751		Get_Ch(c)
1752		if (c == 'N') goto return_nan;
1753		Push_Back();		/* the N position*/
1754	    }
1755	    Push_Back();		/* the a position*/
1756	    goto return_real;
1757	}
1758	else				/* no exponent */
1759	    goto return_real;
1760	/* go read exponent */
1761    }
1762    else if ((c == 'e' || c == 'E') && !(syntax & FLOAT_NEEDS_POINT))
1763    	;
1764    else if (c == '_')			/* could be a rational */
1765    {
1766	Get_Ch(c)
1767	if (!isdigit(c))
1768	{
1769#ifdef ALT_RAT_SYNTAX
1770	    if (c != '/')		/* allow for an optional '/' */
1771	    {
1772		Push_Back();		/* the non-digit */
1773		goto return_int;	/* just an integer */
1774	    }
1775	    Get_Ch(c)
1776#endif
1777	    if (!isdigit(c))
1778	    {
1779		Push_Back();		/* the non-digit */
1780#ifdef ALT_RAT_SYNTAX
1781		Push_Back();		/* the '/' */
1782#endif
1783		goto return_int;	/* just an integer */
1784	    }
1785	}
1786	do {				/* definitely a rational */
1787	    Get_Ch(c)
1788	} while (isdigit(c));
1789	goto return_rat;
1790    }
1791    else if (syntax&ISO_BASE_PREFIX && iresult==0 && float_digits==1)
1792    {
1793	switch (c) {
1794	case 'b': base =  2; goto _based_number_; /* (base,iresult) */
1795	case 'o': base =  8; goto _based_number_; /* (base,iresult) */
1796	case 'x': base = 16; goto _based_number_; /* (base,iresult) */
1797	}
1798	goto return_int;		/* integer or bignum */
1799    }
1800    else
1801	goto return_int;		/* integer or bignum */
1802
1803    Get_Ch(c)				/* read exponent */
1804    if (c == '-' || c == '+')		/* optional exponent sign */
1805    {
1806	Get_Ch(c)
1807	if (!isdigit(c))
1808	{
1809	    Push_Back();		/* the non-digit */
1810	    Push_Back();		/* the sign */
1811	    if (flags & FLOAT)
1812		goto return_real;
1813	    else
1814		goto return_int;
1815	}
1816    }
1817    else if (!isdigit(c))		/* one or more digits */
1818    {
1819	Push_Back();			/* the non-digit */
1820	if (flags & FLOAT)
1821	    goto return_real;
1822	else
1823	    goto return_int;
1824    }
1825    /* flags |= FLOAT;			definitely a float */
1826    do {
1827	Get_Ch(c)
1828    } while (isdigit(c));
1829
1830return_real:				/* we have a valid real */
1831    Push_Back();			/* pushback the delimiter */
1832    if (nst) start = (char *) StreamLexAux(nst);
1833    f = atof(start);
1834#ifdef ATOF_NEGZERO_BUG
1835    /* some versions of atof() don't properly create negative zeros */
1836    if (f == 0.0  &&  1.0/f > 0.0  &&  flags & NEG) f = -f;
1837#endif
1838
1839return_f:				/* f */
1840
1841    if (flags & IVL)			/* second half of interval? */
1842    {
1843	if (!GoodFloat(f) || !GoodFloat(low_f))
1844	    goto return_err;
1845	/*
1846	 * When called from the lexer we allow to return an illformed (raw)
1847	 * interval (lwb > upb) because we don't see a possibly leading
1848	 * minus sign!
1849	 */
1850	if (!nst && low_f > f)
1851	    goto return_err;
1852	Make_Interval(result, low_f, f);
1853	if (nst)
1854	{
1855	    /* this flag is used in _ivl_chgsign() and
1856	     * reset in the parser or in read_token */
1857	    Mark_Interval_Raw(result->val.ptr);
1858	}
1859	goto return_ok;
1860    }
1861
1862    Get_Ch(c)				/* check for float interval separator */
1863    if (c == '_')
1864    {
1865	Get_Ch(c)
1866	if (c == '_')
1867	{
1868	    low_f = f;
1869	    Reset_Start()
1870	    flags = IVL;
1871	    goto _start_;		/* go read the second float */
1872	}
1873	Push_Back();			/* the non-underscore */
1874    }
1875    Push_Back();
1876
1877    if (syntax & FLOATS_AS_BREALS)
1878    {
1879	if (!GoodFloat(f))
1880	{
1881	    goto return_err;
1882	}
1883	if (!(flags & PRECISE) || float_digits > 15)
1884	{
1885	    low_f = ec_ieee_down(f);
1886	    f = ec_ieee_up(f);
1887	}
1888	else
1889	{
1890	    low_f = f;
1891	}
1892	Make_Interval(result, low_f, f);
1893    }
1894    else
1895    {
1896	Make_Double(result, f)
1897    }
1898
1899    goto return_ok;
1900
1901return_infinity:			/* we have an infinity */
1902    f = flags & NEG ? -infinity() : infinity();
1903    goto return_f;
1904
1905return_rat:				/* (start, base) */
1906    Push_Back();			/* pushback the delimiter */
1907    if (flags & IVL) goto return_err;
1908    if (nst) start = (char *) StreamLexAux(nst);
1909    if (tag_desc[TRAT].from_string(start, result, base) != PSUCCEED)
1910	goto return_err;
1911    goto return_ok;
1912
1913return_int3:
1914    Push_Back();
1915return_int2:
1916    Push_Back();
1917return_int:				/* (flags, iresult, start, base) */
1918    Push_Back();			/* pushback the delimiter */
1919    if (flags & IVL) goto return_err;
1920    if (flags & BIG)
1921    {
1922	if (nst) start = (char *) StreamLexAux(nst);
1923	if (tag_desc[TBIG].from_string(start, result, base) != PSUCCEED)
1924	    goto return_err;
1925    }
1926    else	/* integer */
1927    {
1928	result->val.nint = flags & NEG ? (word)-iresult : (word)iresult;
1929	result->tag.kernel = TINT;
1930    }
1931
1932return_ok:
1933    if (nst) StreamPtr(nst) = t;
1934    return (char *) t;
1935
1936return_nan:				/* we have a NaN */
1937    {
1938	ieee_double nan;
1939	if (nst) start = (char *) StreamLexAux(nst);
1940	nan.as_dbl = atof(start);
1941	nan.as_struct.mant1 |= 0x7FF00000;	/* change it into a NaN */
1942	/*
1943	 * Note that signaling NaNs are immediately turned into quiet NaNs
1944	 * here, usually by setting the top bit in the significand.
1945	 * E.g. 1.2NaN turns into 1.7NaN, nothing we can do about that.
1946	 */
1947	f = nan.as_dbl;
1948	if (!GoodFloat(f))	/* catch 1.0NaN, which is 1.0Inf */
1949	    goto return_f;
1950    }
1951
1952return_err:
1953    result->tag.kernel = TEND;
1954    if (nst)
1955	result->val.nint = aux - StreamLexAux(nst);
1956    if (nst) StreamPtr(nst) = t;
1957    return (char *) t;
1958}
1959
1960/* CAUTION: Bip_Error() is redefined to Bip_Error_Fail() */
1961