1/* BEGIN LICENSE BLOCK 2 * Version: CMPL 1.1 3 * 4 * The contents of this file are subject to the Cisco-style Mozilla Public 5 * License Version 1.1 (the "License"); you may not use this file except 6 * in compliance with the License. You may obtain a copy of the License 7 * at www.eclipse-clp.org/license. 8 * 9 * Software distributed under the License is distributed on an "AS IS" 10 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 11 * the License for the specific language governing rights and limitations 12 * under the License. 13 * 14 * The Original Code is The ECLiPSe Constraint Logic Programming System. 15 * The Initial Developer of the Original Code is Cisco Systems, Inc. 16 * Portions created by the Initial Developer are 17 * Copyright (C) 1989-2006 Cisco Systems, Inc. All Rights Reserved. 18 * 19 * Contributor(s): 20 * 21 * END LICENSE BLOCK */ 22 23/* 24 * VERSION $Id: lex.c,v 1.17 2015/01/14 01:31:09 jschimpf Exp $ 25 */ 26 27/* 28 * IDENTIFICATION lex.c 29 * 30 * 31 * 32 * AUTHOR VERSION DATE REASON 33 * Jorge Bocca 34 * Pierre Dufresne 35 * Micha Meier 36 */ 37 38/* 39 * INCLUDES: 40 */ 41#include <math.h> 42 43#include "config.h" 44#include "sepia.h" /* to be able to have built-ins */ 45#include "types.h" /* to have the standard types (for BIP) */ 46#include "embed.h" 47#include "mem.h" /* to use in_dict and DidName */ 48#include "error.h" /* the BIP return values and standard errors */ 49#include "dict.h" 50#include "lex.h" /* the values returned lex_an */ 51#include "ec_io.h" 52#include "emu_export.h" 53#include "module.h" 54#include "property.h" /* for MODULE_PROP */ 55#include "os_support.h" 56#include "rounding_control.h" 57 58#ifdef HAVE_STRING_H 59#include <string.h> 60#else 61extern char *strcpy(); 62#endif 63 64#ifdef HAVE_CTYPE_H 65#include <ctype.h> 66#endif 67 68#ifdef STDC_HEADERS 69#include <stdlib.h> 70#else 71extern double atof(); 72#endif 73 74/* 75 * DEFINES: 76 */ 77 78/* The maximum numeric value of a character constant in a string. 79 * This should eventually be changed to depend on the stream encoding, 80 * e.g. 8-bit:255, utf8:2147483647 81 */ 82#define MAX_CHAR_CODE 255 83 84/* 85 * FUNCTOR_COMPLETION causes trouble in connection with testing for 86 * the .eco header in procedure.c. Also, it does not work on Windows. 87 */ 88#undef FUNCTOR_COMPLETION 89 90#ifdef FUNCTOR_COMPLETION 91#if defined(HAVE_READLINE) 92static char **_complete_predicate(char *text, int start, int end); 93static char *_find_matching_predicate(char *string, int state); 94#define Find_Matching_Atom(end, nst, pw, stop) 95#else 96static void _find_matching_atom(unsigned char *end, stream_id nst, unsigned char **pw, unsigned char **stop); 97#define Find_Matching_Atom(end, nst, pw, stop) _find_matching_atom(end, nst, &pw, &stop) 98#endif 99#endif 100 101#define Extend_Lex_Aux(nst, pw, stop) \ 102 pw = _extend_lex_aux(nst); \ 103 stop = StreamLexAux(nst) + StreamLexSize(nst); 104 105/* 106 * STATIC VARIABLE DEFINITIONS: 107 */ 108static int p_set_chtab(value v1, type t1, value v2, type t2, value vm, type tm), 109 p_get_chtab(value v1, type t1, value v2, type t2, value vm, type tm), 110 p_get_syntax(value val1, type tag1, value val2, type tag2, value vm, type tm), 111 p_set_syntax(value val1, type tag1, value val2, type tag2, value vm, type tm), 112 p_copy_syntax(value vfrom, type tfrom, value vto, type tto), 113 p_read_token_(value vs, type ts, value v, type t, value vc, type tc, value vm, type tm); 114 115static unsigned char *_extend_lex_aux(stream_id nst); 116static int _skip_blanks(stream_id nst, syntax_desc *sd, unsigned char **p_pligne, int *p_cc, int *p_ctype); 117 118static dident chname_[NBCH + 1]; 119static dident tname_[NBTK + 1]; 120static dident d_comma0_; 121static int completion_idx, 122 completion_length, 123 completion_start; 124static dident completion_dip; 125 126static syntax_desc default_syntax_desc = { 127/* Here is the initial type distribution: */ 128{ 129/* nul soh stx etx eot enq ack bel bs ht nl vt np cr so si */ 130 BS, BS, BS, BS, BS, BS, BS, BS, DL, BS, NL, BS, BS, BS, BS, BS, 131/* dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us */ 132 BS, KI, BS, BS, BS, KI, BS, BS, KI, BS, BS, BS, BS, BS, BS, BS, 133/* sp ! " # $ % & ' ( ) * + , - . / */ 134 BS, SL, SQ, SY, SY, CM, SY, AQ, DS, DS, CM2, SY, DS, SY, SY, CM1, 135/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ 136 N, N, N, N, N, N, N, N, N, N, SY, SL, SY, SY, SY, SY, 137/* @ A B C D E F G H I J K L M N O */ 138 SY, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, 139/* P Q R S T U V W X Y Z [ \ ] ^ _ */ 140 UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, DS, ES, DS, SY, UL, 141/* ` a b c d e f g h i j k l m n o */ 142 SY, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, 143/* p q r s t u v w x y z { | } ~ del */ 144 LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, DS, DS, DS, SY, DL, 145/* 80 Latin-1 8f */ 146 BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, 147/* 90 9f */ 148 BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, 149/* a0 af */ 150 BS, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, 151/* b0 bf */ 152 SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, SY, 153/* c0 cf */ 154 UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, 155/* d0 df */ 156 UC, UC, UC, UC, UC, UC, UC, SY, UC, UC, UC, UC, UC, UC, UC, LC, 157/* e0 ef */ 158 LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, 159/* f0 ff */ 160 LC, LC, LC, LC, LC, LC, LC, SY, LC, LC, LC, LC, LC, LC, LC, LC, 161/* EOI symbol */ 162 RE}, 163 NEWLINE_IN_QUOTES|NO_BLANKS, /* options */ 164 '"', /* sq */ 165 '\'', /* aq */ 166 '_', /* ul */ 167 '\\' /* escape */ 168}; 169 170 171static dident syntax_flags[SYNTAX_FLAGS]; /* the syntax flag names (dids) */ 172 173/* 174 * EXTERNAL VARIABLE DECLARATIONS: 175 */ 176 177/* 178 * EXTERNAL VARIABLE DEFINITIONS: 179 */ 180 181syntax_desc *default_syntax = &default_syntax_desc; 182 183/* 184 * FUNCTION DEFINITIONS: 185 */ 186 187void 188lex_init(int flags) /* initialization: setting the name of types */ 189{ 190 /* 191 * syntax_flags, chname_ and tname_ are read-only data 192 * and are replicated in every process 193 */ 194 195 /* the array must correspond to the flag order in syntax */ 196 syntax_flags[0] = in_dict("nl_in_quotes",0); 197 syntax_flags[1] = in_dict("limit_arg_precedence",0); 198 syntax_flags[2] = in_dict("no_blanks",0); 199 syntax_flags[3] = in_dict("bar_is_no_atom",0); 200 syntax_flags[4] = in_dict("blanks_in_nil",0); /* obsolete, no effect */ 201 syntax_flags[5] = in_dict("no_attributes",0); 202 syntax_flags[6] = in_dict("$VAR",0); /* obsolete */ 203 syntax_flags[7] = in_dict("nested_comments",0); 204 syntax_flags[8] = in_dict("based_bignums",0); 205 syntax_flags[9] = in_dict("dense_output",0); /* obsolete */ 206 syntax_flags[10] = in_dict("no_array_subscripts",0); 207 syntax_flags[11] = in_dict("doubled_quote_is_quote",0); 208 syntax_flags[12] = in_dict("iso_escapes",0); 209 syntax_flags[13] = in_dict("iso_base_prefix",0); 210 syntax_flags[14] = in_dict("read_floats_as_breals",0); 211 syntax_flags[15] = in_dict("no_curly_arguments",0); 212 syntax_flags[16] = in_dict("blanks_after_sign",0); 213 syntax_flags[17] = in_dict("var_functor_is_apply",0); 214 syntax_flags[18] = in_dict("atom_subscripts",0); 215 syntax_flags[19] = in_dict("general_subscripts",0); 216 syntax_flags[20] = in_dict("curly_args_as_list",0); 217 syntax_flags[21] = in_dict("float_needs_point",0); 218 syntax_flags[22] = in_dict("bar_is_semicolon",0); 219 syntax_flags[23] = in_dict("plus_is_no_sign",0); 220 syntax_flags[24] = in_dict("iso_restrictions",0); 221 222 default_syntax_desc.char_class[EOB_MARK] = RE; 223 224 chname_[0] = in_dict("unused",0); 225 chname_[UC] = in_dict("upper_case",0); 226 chname_[UL] = in_dict("underline",0); 227 chname_[LC] = in_dict("lower_case",0); 228 chname_[N] = in_dict("digit",0); 229 chname_[BS] = in_dict("blank_space",0); 230 chname_[NL] = in_dict("end_of_line",0); 231 chname_[AQ] = in_dict("atom_quote",0); 232 chname_[SQ] = in_dict("string_quote",0); 233 chname_[SL] = in_dict("solo",0); 234 chname_[DS] = in_dict("special",0); 235 chname_[CM] = in_dict("line_comment",0); 236 chname_[LQ] = in_dict("list_quote",0); /* should be codes_quote */ 237 chname_[CQ] = in_dict("chars_quote",0); 238 chname_[RA] = in_dict("radix",0); 239 chname_[AS] = in_dict("ascii",0); 240 chname_[TS] = in_dict("terminator",0); 241 chname_[ES] = in_dict("escape",0); 242 chname_[CM1] = in_dict("first_comment",0); 243 chname_[CM2] = in_dict("second_comment",0); 244 chname_[SY] = in_dict("symbol",0); 245 chname_[NBCH] = in_dict("null",0); 246 247 tname_[NO_TOKEN] = d_.err; 248 tname_[BLANK_SPACE] = d_.err; 249 tname_[EOI] = d_.eof; 250 tname_[EOCL] = in_dict("fullstop", 0); 251 tname_[IDENTIFIER] = d_.atom0; 252 tname_[QIDENTIFIER] = in_dict("quoted_atom", 0); 253 tname_[COMMA] = in_dict("comma", 0); 254 tname_[BAR] = 255 tname_[CLOSING_SOLO] = 256 tname_[SOLO] = in_dict("solo", 0); 257 tname_[SPACE_NUMBER] = 258 tname_[NUMBER] = in_dict("number", 0); 259 tname_[STRING] = d_.string0; 260 tname_[REFERENCE] = d_.var0; 261 tname_[UREFERENCE] = in_dict("anonymous", 0); 262 tname_[CODES] = in_dict("codes", 0); 263 tname_[CHARS] = in_dict("chars", 0); 264 tname_[SPACE_SOLO] = in_dict("open_par", 0); 265 266 d_comma0_ = in_dict(",", 0); 267#if defined(FUNCTOR_COMPLETION) && defined(HAVE_READLINE) 268 { 269 extern char ** (*rl_attempted_completion_function)(); 270 271 rl_attempted_completion_function = _complete_predicate; 272 } 273#endif 274 275 if (flags & INIT_SHARED) 276 { 277 (void) exported_built_in(in_dict("set_chtab_", 3), p_set_chtab, B_SAFE); 278 (void) exported_built_in(in_dict("get_chtab_", 3), p_get_chtab, B_UNSAFE|U_SIMPLE); 279 (void) local_built_in(in_dict("set_syntax_", 3), p_set_syntax, B_SAFE); 280 (void) b_built_in(in_dict("get_syntax_", 3), p_get_syntax, d_.kernel_sepia); 281 exported_built_in(in_dict("read_token_", 4), p_read_token_, B_UNSAFE|U_GROUND) -> mode = BoundArg(2, CONSTANT) | BoundArg(3, CONSTANT); 282 (void) local_built_in(in_dict("copy_syntax", 2), p_copy_syntax, B_SAFE); 283 } 284} 285 286#define Set_TokenString(s, l) \ 287 token->term.val.nint = (word)(l); \ 288 token->string = (char *) (s); 289 290 291/* up to three characters of backup are needed, e.g. in "3e+a" */ 292/* at eoi we don't advance pligne, so don't backup */ 293#define Backup_(c,n) \ 294 pligne -= (n) - ((c)==EOI_SYMBOL? 1: 0); 295 296#define EoB(nst) (StreamBuf(nst) + StreamCnt(nst)) 297 298#define Get_Chr(c) \ 299 c = *pligne++; \ 300 if ((c) == EOB_MARK && pligne > EoB(nst)) { \ 301 StreamPtr(nst) = pligne-1; \ 302 if (fill_buffer(nst) != PSUCCEED) { \ 303 pligne = StreamPtr(nst); \ 304 c = EOI_SYMBOL; \ 305 } else { \ 306 pligne = StreamPtr(nst); \ 307 c = *pligne++; \ 308 } \ 309 } 310 311#define Get_Ch_Class(c,t) \ 312 Get_Chr(c) \ 313 t = sd->char_class[c]; 314 315#ifdef FUNCTOR_COMPLETION 316#define Get_Ch_Class_And_Complete(c,t) \ 317 c = *pligne++; \ 318 if ((c) == EOB_MARK && pligne > EoB(nst)) { \ 319 if (IsTty(nst) && StreamCnt(nst) != StreamSize(nst) && \ 320 *(pw - 1) != '\n') { \ 321 Find_Matching_Atom(pw, nst, pw, stop); \ 322 } \ 323 StreamPtr(nst) = pligne-1; \ 324 if (fill_buffer(nst) != PSUCCEED) { \ 325 pligne = StreamPtr(nst); \ 326 c = EOI_SYMBOL; \ 327 } else { \ 328 pligne = StreamPtr(nst); \ 329 c = *pligne++; \ 330 } \ 331 } \ 332 t = sd->char_class[c]; 333#else 334#define Get_Ch_Class_And_Complete(c,t) \ 335 Get_Ch_Class(c,t) 336#endif 337 338 339/* 340 * Compute the file position of the next/last character read 341 */ 342#define CurrentOffset(nst,pligne) \ 343 (StreamOffset(nst) + (pligne - StreamBuf(nst))) 344 345#define PreviousOffset(nst,pligne,cc) \ 346 (CurrentOffset(nst,pligne) - BytesPerChar(cc)) 347 348#define BytesPerChar(cc) 1 349 350 351int 352lex_an( stream_id nst, /* in: stream to read from */ 353 syntax_desc *sd, /* in: syntax descriptor */ 354 token_desc *token /* out: token descriptor */ 355 ) /* returns: token or (negative) error code */ 356{ 357 unsigned char *pligne = StreamPtr(nst); 358 unsigned char *pw, *stop; 359 int tok = NO_TOKEN; 360 int cc; 361 int quote_char; 362 int ctype; 363 364 token->string = (char *) 0; 365 token->pos.file = StreamName(nst); 366 token->pos.line = StreamLine(nst); 367 token->pos.from = CurrentOffset(nst,pligne); 368 369 if (!pligne) { 370 tok = EOI; 371 goto _return_tok_; 372 } 373 374 Get_Ch_Class(cc, ctype); /* read first character */ 375 376_start_: /* cc/ctype: current char, pligne: ptr to next char */ 377 switch (ctype) 378 { 379 case RE: 380 tok = EOI; 381 break; 382 383 case BS: 384 case NL: 385 case CM: 386 tok = _skip_blanks(nst, sd, &pligne, &cc, &ctype); 387 if (LexError(tok)) 388 break; 389 token->pos.line = StreamLine(nst); 390 token->pos.from = PreviousOffset(nst,pligne,cc); 391 goto _start_; /* tok maybe BLANK_SPACE */ 392 393 case CM1: 394 tok = _skip_blanks(nst, sd, &pligne, &cc, &ctype); 395 if (LexError(tok)) 396 break; 397 else if (tok == BLANK_SPACE) { 398 token->pos.line = StreamLine(nst); 399 token->pos.from = PreviousOffset(nst,pligne,cc); 400 goto _start_; /* it was really a comment */ 401 } else 402 goto _symbol_; /* was no comment, treat CM1 as symbol */ 403 404 case AQ: 405 tok = QIDENTIFIER; 406 goto _quote_; 407 case SQ: 408 tok = STRING; 409 goto _quote_; 410 case CQ: 411 tok = CHARS; 412 goto _quote_; 413 case LQ: 414 tok = CODES; 415_quote_: 416 quote_char = cc; 417 pw = StreamLexAux(nst); 418 stop = pw + StreamLexSize(nst); 419 Get_Ch_Class(cc, ctype); 420 for(;;) 421 { 422 int base, iresult, max_no, max_lc, max_uc; 423 424 switch (ctype) 425 { 426 case RE: 427 Set_TokenString(StreamLexAux(nst), pw - StreamLexAux(nst)); 428 tok = ENDOFFILE; 429 goto _return_tok_; 430 431 case AQ: 432 case LQ: 433 case SQ: 434 case CQ: 435 if (cc != quote_char) { 436 *pw++ = cc; 437 break; /* other quote within */ 438 } 439 if (sd->options & DOUBLED_QUOTE_IS_QUOTE) 440 { 441 Get_Chr(cc); 442 if (cc == quote_char) 443 { 444 *pw++ = cc; 445 break; 446 } 447 Backup_(cc, 1); 448 } 449 /* check for consecutive strings */ 450 if ((ctype != AQ) && !(sd->options & ISO_RESTRICTIONS)) 451 { 452 Get_Ch_Class(cc,ctype); 453 if (LexError(_skip_blanks(nst, sd, &pligne, &cc, &ctype))) { 454 Set_TokenString(StreamLexAux(nst), pw - StreamLexAux(nst)); 455 tok = ENDOFFILE; 456 goto _return_tok_; 457 } 458 if (cc == quote_char) 459 break; /* skip doubled quote */ 460 Backup_(cc, 1); 461 } 462 *pw = 0; /* end of quoted item */ 463 Set_TokenString(StreamLexAux(nst), pw - StreamLexAux(nst)); 464 goto _return_tok_; 465 466 case ES: /* escape character: interpret next */ 467 Get_Ch_Class(cc,ctype); 468 switch (ctype) 469 { 470 case RE: 471 Set_TokenString(StreamLexAux(nst), pw - StreamLexAux(nst)); 472 tok = ENDOFFILE; 473 goto _return_tok_; 474 475 case BS: /* ignore escaped line end \r\n */ 476 if (cc != '\r') 477 goto _return_ill_quoted_; 478 Get_Ch_Class(cc, ctype); /* next character */ 479 if (ctype != NL) 480 goto _return_ill_quoted_; 481 /* fall through */ 482 483 case NL: /* ignore escaped line end \n */ 484 StreamLine(nst)++; 485 /* don't call Get_Ch_Class_And_Complete() because 486 * nothing was put in the lex_aux buffer */ 487 Get_Ch_Class(cc, ctype); /* next character */ 488 continue; 489 490 case ES: 491 case AQ: 492 case LQ: 493 case CQ: 494 case SQ: 495 *pw++ = cc; /* just take cc as it is */ 496 break; 497 498 case LC: 499 switch (cc) 500 { 501 case 'a': *pw++ = 0007; break; /* alert */ 502 case 'b': *pw++ = '\b'; break; /* backspace */ 503 case 't': *pw++ = '\t'; break; /* tab */ 504 case 'n': *pw++ = '\n'; break; /* newline */ 505 case 'v': *pw++ = 0013; break; /* vertical tab */ 506 case 'f': *pw++ = '\f'; break; /* form feed */ 507 case 'r': *pw++ = '\r'; break; /* return */ 508 509 case 'e': /* escape */ 510 if (sd->options & ISO_ESCAPES) goto _return_ill_quoted_; 511 *pw++ = 0033; break; 512 513 case 'd': /* delete */ 514 if (sd->options & ISO_ESCAPES) goto _return_ill_quoted_; 515 *pw++ = 0177; break; 516 517 case 's': /* space */ 518 if (sd->options & ISO_ESCAPES) goto _return_ill_quoted_; 519 *pw++ = ' '; break; 520 521 case 'c': /* Quintus/Sicstus feature */ 522 if (sd->options & ISO_ESCAPES) goto _return_ill_quoted_; 523 do { 524 Get_Ch_Class(cc, ctype); 525 } while (ctype == BS || ctype == NL); 526 continue; 527 528 case 'x': 529 base = 16; max_no = '9'; max_lc = 'f'; max_uc = 'F'; 530 Get_Ch_Class(cc, ctype) 531 if (ctype == ES) 532 goto _return_ill_quoted_; 533_iso_numeric_escape_: 534 for (iresult=0;;) { 535 if (cc>='0' && cc<=max_no) cc -= '0'; 536 else if (cc>='a' && cc<=max_lc) cc = cc - 'a' + 10; 537 else if (cc>='A' && cc<=max_uc) cc = cc - 'A' + 10; 538 else if (ctype == ES) 539 break; 540 else 541 goto _return_ill_quoted_; 542 if ((unsigned )iresult <= MAX_CHAR_CODE/base && 543 ((unsigned ) (iresult * base) <= MAX_CHAR_CODE - cc)) 544 iresult = iresult * base + cc; 545 else goto _return_ill_quoted_; /* overflow */ 546 Get_Ch_Class(cc, ctype) 547 } 548 *pw++ = iresult; 549 break; 550 551 default: goto _return_ill_quoted_; 552 } 553 break; 554 555 case N: 556 if (sd->options & ISO_ESCAPES) 557 { 558 /* variable length, require terminating \ */ 559 base = 8; max_no = '7'; max_lc = 0; max_uc = 0; 560 goto _iso_numeric_escape_; 561 } 562 switch (cc) 563 { 564 case '0': 565 case '1': 566 case '2': 567 case '3': /* check for 3 octal digits */ 568 { 569 int val = cc - '0'; 570 Get_Chr(cc); /* second */ 571 if (!octal(cc)) { 572 goto _return_ill_quoted_; 573 } 574 val = (val << 3) + (cc - '0'); 575 Get_Chr(cc); /* third */ 576 if (!octal(cc)) { 577 goto _return_ill_quoted_; 578 } 579 val = (val << 3) + (cc - '0'); 580 *pw++ = val; 581 break; 582 } 583 default: goto _return_ill_quoted_; 584 } 585 break; 586 587 default: 588 goto _return_ill_quoted_; 589 } 590 break; 591 592 case BS: 593 /* ISO does not allow tabs etc in quoted items */ 594 if (cc != ' ' && (sd->options & ISO_ESCAPES)) 595 goto _return_ill_quoted_; 596 *pw++ = cc; 597 break; 598 599 case NL: 600 StreamLine(nst)++; 601 if (!(sd->options & NEWLINE_IN_QUOTES)) 602 goto _return_ill_quoted_; 603 /* fall through */ 604 605 default: 606 *pw++ = cc; 607 break; 608 } 609 Get_Ch_Class_And_Complete(cc, ctype); /* next character */ 610 if (pw == stop) { 611 Extend_Lex_Aux(nst, pw, stop); 612 } 613 } /* end for(;;) */ 614 615 616 case UC: /* uppercase */ 617 tok = REFERENCE; 618 goto _name_; 619 case UL: /* special prefix for variables */ 620 tok = UREFERENCE; 621 goto _name_; 622 case LC: /* a lower case symbol */ 623 tok = IDENTIFIER; 624_name_: 625 pw = StreamLexAux(nst); 626 stop = pw + StreamLexSize(nst); 627 *pw++ = cc; 628 Get_Ch_Class(cc, ctype); 629 for(;;) 630 { 631 if (Alphanum(ctype)) 632 { 633 if (pw == stop) { 634 Extend_Lex_Aux(nst, pw, stop); 635 } 636 *pw++ = cc; 637 Get_Ch_Class_And_Complete(cc, ctype); 638 } else { 639 break; 640 } 641 } 642 Backup_(cc, 1); 643 *pw = 0; 644 Set_TokenString(StreamLexAux(nst), pw - StreamLexAux(nst)); 645 if (tok == UREFERENCE && (pw - StreamLexAux(nst)) > 1) 646 tok = REFERENCE; 647 break; 648 649 650 case SY: 651 case ES: 652 case CM2: 653_symbol_: 654 pw = StreamLexAux(nst); 655 stop = pw + StreamLexSize(nst); 656 *pw++ = cc; 657 Get_Ch_Class(cc, ctype); 658 for(;;) 659 { 660 if (Symbol(ctype) && ctype != RE) 661 { 662 if (pw == stop) { 663 Extend_Lex_Aux(nst, pw, stop); 664 } 665 *pw++ = cc; 666 Get_Ch_Class_And_Complete(cc, ctype); 667 } else { 668 break; 669 } 670 } 671#ifdef ISO_FULLSTOP 672 Backup_(cc, 1); 673 if ((pw - StreamLexAux(nst)) == 1 && *StreamLexAux(nst) == '.' 674 && (ctype == BS || ctype == NL || ctype == RE || ctype == CM)) 675 { 676 Make_Atom(&token->term, d_.eocl); 677 tok = EOCL; /* full stop */ 678 } else { 679 *pw = 0; 680 Set_TokenString(StreamLexAux(nst), pw - StreamLexAux(nst)); 681 tok = IDENTIFIER; 682 } 683#else 684 if ((pw - StreamLexAux(nst)) == 1 && *StreamLexAux(nst) == '.' 685 && (ctype == BS || ctype == NL || ctype == RE || ctype == CM)) 686 { 687 if (ctype == RE || ctype == CM) 688 { 689 Backup_(cc, 1); 690 } 691 else if (ctype == NL) 692 { 693 StreamLine(nst)++; 694 } 695 Make_Atom(&token->term, d_.eocl); 696 tok = EOCL; /* full stop */ 697 } else { 698 Backup_(cc, 1); 699 *pw = 0; 700 Set_TokenString(StreamLexAux(nst), pw - StreamLexAux(nst)); 701 tok = IDENTIFIER; 702 } 703#endif 704 break; 705 706 707 case AS: /* ascii-quote */ 708 Get_Ch_Class(cc,ctype); 709 if (ctype == RE) 710 goto _start_; 711 if (ctype == NL) 712 StreamLine(nst)++; 713 Make_Integer(&token->term, cc); 714 tok = NUMBER; 715 break; 716 717 718 case N: 719 pligne = string_to_number((char *) pligne - 1, &token->term, nst, sd); 720 tok = token->term.tag.kernel == TEND ? BAD_NUMERIC_CONSTANT : 721 tok == BLANK_SPACE ? SPACE_NUMBER : 722 NUMBER; 723 goto _return_tok_; 724 725 726 case DS: 727 switch(cc) 728 { 729 case '(': 730 case '[': 731 case '{': 732 Make_Integer(&token->term, cc); 733 tok = (tok == BLANK_SPACE) ? SPACE_SOLO : SOLO; 734 break; 735 736 case ',': 737 Make_Atom(&token->term, d_comma0_); 738 tok = COMMA; 739 break; 740 741 case '|': 742 StreamLexAux(nst)[0] = cc; 743 StreamLexAux(nst)[1] = 0; 744 Set_TokenString(StreamLexAux(nst), 1); 745 tok = BAR; 746 break; 747 748 case ')': 749 case ']': 750 case '}': 751 Make_Integer(&token->term, cc); 752 tok = CLOSING_SOLO; 753 break; 754 755 default: 756 Make_Integer(&token->term, cc); 757 tok = SOLO; 758 break; 759 } 760 break; 761 762 763 case TS: /* terminator character (non-Prolog extension) */ 764 Make_Atom(&token->term, d_.eocl); 765 tok = EOCL; /* full stop */ 766 break; 767 768 case SL: /* like SY, but every character is its own token */ 769 StreamLexAux(nst)[0] = cc; 770 StreamLexAux(nst)[1] = 0; 771 Set_TokenString(StreamLexAux(nst), 1); 772 tok = IDENTIFIER; 773 break; 774 775 776 default: 777 Make_Integer(&token->term, cc); 778 tok = SOLO; 779 break; 780 781 } /* end switch */ 782 783_return_tok_: 784 StreamPtr(nst) = pligne; 785 token->pos.to = CurrentOffset(nst,pligne); 786 token->class = tok; 787 return tok; 788 789_return_ill_quoted_: 790 Set_TokenString(StreamLexAux(nst), pw - StreamLexAux(nst)); 791 tok = ILL_QUOTED; 792 goto _return_tok_; 793} 794 795 796/* 797 * Return the next non-blank and non-comment character. 798 * pligne, cc, ctype are maintained as in lex_an() 799 * 800 * Return values: 801 * NO_TOKEN no blank space was skipped 802 * BLANK_SPACE some blank space was skipped 803 * ENDOFFILE error (cc,ctype not updated) 804 */ 805static int 806_skip_blanks(stream_id nst, syntax_desc *sd, unsigned char **p_pligne, int *p_cc, int *p_ctype) 807{ 808 unsigned char *pligne = *p_pligne; 809 int ret = NO_TOKEN; 810 int cc = *p_cc; 811 int ctype = *p_ctype; 812 int cc2, ctype2, depth; 813 814 for(;;) 815 { 816 switch (ctype) 817 { 818 case NL: 819 StreamLine(nst)++; 820 /* fall through */ 821 case BS: 822 ret = BLANK_SPACE; 823 break; 824 825 case CM: /* comment until end of line */ 826 ret = BLANK_SPACE; 827 do { 828 Get_Ch_Class(cc,ctype); 829 } while (ctype != NL && ctype != RE); 830 continue; 831 832 case CM1: /* C-style comment */ 833 Get_Ch_Class(cc2,ctype2); /* lookahead */ 834 if (ctype2 == CM2) 835 { 836 ret = BLANK_SPACE; /* it's definitely a comment */ 837 Get_Ch_Class(cc,ctype); 838 for (depth = 1; depth > 0; ) 839 { 840 switch (ctype) 841 { 842 case RE: /* EOF within comment not allowed */ 843 *p_pligne = pligne; 844 return ENDOFFILE; 845 846 case NL: /* don't forget to count lines */ 847 StreamLine(nst)++; 848 break; 849 850 case CM1: /* possible nested comment */ 851 if (sd->options & NESTED_COMMENTS) 852 { 853 Get_Ch_Class(cc,ctype); 854 if (ctype != CM2) 855 continue; 856 depth++; 857 } 858 break; 859 860 case CM2: /* possible end of comment */ 861 Get_Ch_Class(cc,ctype); 862 if (ctype != CM1) 863 continue; 864 depth--; 865 break; 866 } 867 Get_Ch_Class(cc,ctype); 868 } 869 continue; /* end of comment */ 870 } 871 Backup_(cc2, 1); 872 /* no comment, fall through */ 873 874 default: 875 *p_cc = cc; 876 *p_ctype = ctype; 877 *p_pligne = pligne; 878 return ret; 879 } 880 Get_Ch_Class(cc,ctype); 881 } 882} 883 884 885/* 886 * Check if an atom needs to be quoted. This is called from the 887 * write_atom() routine. Return values: 888 * IDENTIFIER - no quotes needed 889 * QIDENTIFIER - quotes needed 890 * BAR - may need quotes (all arities) 891 * EOCL - dot may need quotes (all arities) 892 * COMMA - may need quotes (only returned for ,/2) 893 */ 894int 895ec_need_quotes(dident d, syntax_desc *sd) 896{ 897 register unsigned char *name = (unsigned char *) DidName(d); 898 register int rest = (int) DidLength(d); 899 register int c; 900 901 if (rest-- == 0) 902 return QIDENTIFIER; 903 904 switch (sd->char_class[c = *name++]) 905 { 906 case LC: /* atoms starting with lower case */ 907 while (rest--) 908 { 909 c = *name++; 910 if (!Alphanum(sd->char_class[c])) 911 return QIDENTIFIER; 912 } 913 return IDENTIFIER; 914 915 case SY: /* symbol atoms: . may need quotes */ 916 if (c == '.' && rest == 0) 917 return EOCL; 918 /* else fall through */ 919 case ES: 920 case CM2: 921 while (rest--) 922 { 923 c = *name++; 924_need_quotes1_: 925 switch (sd->char_class[c]) 926 { 927 case CM1: 928 case CM2: 929 case ES: 930 case SY: 931 break; 932 default: 933 return QIDENTIFIER; 934 } 935 } 936 return IDENTIFIER; 937 938 case CM1: /* begin of comment must be quoted */ 939 if (rest--) 940 { 941 c = *name++; 942 if (sd->char_class[c] == CM2) 943 return QIDENTIFIER; 944 else 945 goto _need_quotes1_; 946 } 947 else return IDENTIFIER; 948 949 case DS: 950 switch (c) 951 { 952 case '{': /* {} needs no quotes */ 953 if (rest == 1 && *name == '}') 954 return IDENTIFIER; 955 else return QIDENTIFIER; 956 case '[': /* [] needs no quotes */ 957 if (rest == 1 && *name == ']') 958 return IDENTIFIER; 959 else return QIDENTIFIER; 960 case ',': /* ,/2 sometimes needs no quotes */ 961 if (d == d_.comma) 962 return COMMA; 963 else return QIDENTIFIER; 964 case '|': /* | needs quotes only inside lists */ 965 if (rest == 0) 966 return BAR; 967 else return QIDENTIFIER; 968 default: 969 return QIDENTIFIER; 970 } 971 972 case SL: 973 if (rest == 0) 974 return IDENTIFIER; /* ! and ; don't need quotes */ 975 else return QIDENTIFIER; 976 977 case TS: 978 return QIDENTIFIER; 979 } 980 return QIDENTIFIER; 981} 982 983 984static unsigned char * 985_extend_lex_aux(stream_id nst) 986{ 987 register long n = StreamLexSize(nst); 988 989 StreamLexAux(nst) = (unsigned char *) hg_resize((generic_ptr) (StreamLexAux(nst)), (int)(n + n)); 990 StreamLexSize(nst) = n + n; 991 return StreamLexAux(nst) + n; 992} 993 994#ifdef FUNCTOR_COMPLETION 995#if defined(HAVE_READLINE) 996/*ARGSUSED*/ 997static char ** 998_complete_predicate(char *text, int start, int end) 999{ 1000 char **matches; 1001 register char *s; 1002 int i = 0; 1003 extern char **completion_matches(); 1004 extern char *rl_line_buffer; 1005 1006 matches = (char **)NULL; 1007 1008 /* strip spaces so that we know if we match a predicate or not */ 1009 for (s = rl_line_buffer; (*s == ' ' || *s == '\t'); s++) 1010 i++; 1011 if (i == start) 1012 completion_start = 0; 1013 else { 1014 completion_start = start; 1015 for (s = rl_line_buffer + start - 1; s >= rl_line_buffer; s--) { 1016 if (*s == '[') 1017 return matches; /* matching filenames */ 1018 else if (*s != ' ' && *s != '\t' && *s != '\'' && *s != '"') 1019 break; 1020 } 1021 } 1022 matches = completion_matches(text, _find_matching_predicate); 1023 return matches; 1024} 1025 1026static char * 1027_find_matching_predicate(char *string, int state) 1028{ 1029 char *s1, *s2; 1030 extern char *sprintf(); 1031 int search_arity; 1032 int length; 1033 1034 if (state == 0) { 1035 completion_idx = 0; 1036 completion_length = strlen(string); 1037 } 1038 if (string[completion_length - 1] == '/') { 1039 /* We have the whole name and search for arity only */ 1040 search_arity = 1; 1041 length = completion_length - 1; 1042 } else { 1043 search_arity = 0; 1044 length = completion_length; 1045 } 1046 while (next_functor(&completion_idx, &completion_dip)) 1047 { 1048 if (s1 = DidName(completion_dip)) 1049 { 1050 if (strncmp(string, s1, length) == 0 && 1051 ((!search_arity && completion_start) || 1052 visible_procedure(completion_dip, 1053 d_.default_module, tdict, PRI_DONTIMPORT))) 1054 { 1055 if (search_arity) { 1056 if (strlen(s1) == length) { 1057 s2 = (char *) hp_alloc(length + 4); 1058 (void) strcpy(s2, s1); 1059 s2[length] = '/'; 1060 (void) sprintf(s2 + length + 1, 1061 "%d", DidArity(completion_dip)); 1062 return s2; 1063 } 1064 } else { 1065 s2 = (char *) hp_alloc(strlen(s1) + 1); 1066 (void) strcpy(s2, s1); 1067 return s2; 1068 } 1069 } 1070 } 1071 } 1072 Set_Bip_Error(0); 1073 return (char *) 0; 1074} 1075#else 1076 1077static int 1078_prefix_length(char *s1, char *s2) 1079{ 1080 register char *p = s1; 1081 1082 while (*s1++ == *s2++) 1083 ; 1084 return (s1 - p) - 1; 1085} 1086 1087static int 1088_complete(unsigned char *string, unsigned char *end, char **out) 1089{ 1090 int length = 0; 1091 register unsigned char *s1; 1092 register unsigned char *s2; 1093 char *found_string; 1094 int match; 1095 int idx = 0; 1096 dident dip; 1097 1098 while (next_functor(&idx, &dip)) 1099 { 1100 if (s1 = (unsigned char *) DidName(dip)) 1101 { 1102 s2 = string; 1103 while (s2 < end) 1104 if (*s2 != *s1++) 1105 break; 1106 else 1107 s2++; 1108 if (s2 == end) /* Found one */ 1109 { 1110 if (!length) 1111 { 1112 found_string = DidName(dip); 1113 length = strlen(found_string); 1114 } 1115 else if (DidName(dip) != found_string) 1116 { 1117 match = _prefix_length(DidName(dip), found_string); 1118 if (match < length) 1119 length = match; 1120 } 1121 } 1122 } 1123 } 1124 if (length > end - string) { 1125 *out = found_string + (end - string); 1126 return length - (end - string); 1127 } 1128 else 1129 return 0; 1130} 1131 1132/*ARGSUSED*/ 1133static void 1134_find_matching_atom( 1135 unsigned char *end, 1136 stream_id nst, 1137 unsigned char **pw, 1138 unsigned char **stop) 1139{ 1140 char *p; 1141 int sl; 1142 1143 if (sl = _complete(StreamLexAux(nst), end, &p)) 1144 { 1145#ifdef HAVE_PUSHBACK 1146 while (sl--) 1147 pushback_char((int) (StreamUnit(nst)), p++); 1148#else 1149 (void) write((int) (StreamUnit(nst)), p, sl); 1150 if (*pw + sl >= *stop) { 1151 Extend_Lex_Aux(nst, *pw, *stop); 1152 } 1153 while (sl--) 1154 *(*pw)++ = *p++; 1155#endif 1156 } 1157 else 1158 (void) write((int) (StreamUnit(nst)), "\007", 1); 1159 StreamMode(nst) |= DONT_PROMPT; 1160} 1161#endif 1162#endif 1163 1164 1165syntax_desc * 1166copy_syntax_desc(syntax_desc *sd) 1167{ 1168 syntax_desc *newsd; 1169 1170 newsd = (syntax_desc *) hg_alloc_size(sizeof(syntax_desc)); 1171 *newsd = *sd; 1172 return newsd; 1173} 1174 1175/*ARGSUSED*/ 1176static int 1177p_copy_syntax(value vfrom, type tfrom, value vto, type tto) 1178{ 1179 module_item *from, *to; 1180 1181 from = ModuleItem(vfrom.did); 1182 to = ModuleItem(vto.did); 1183 1184 hg_free_size((generic_ptr) to->syntax, sizeof(syntax_desc)); 1185 to->syntax = copy_syntax_desc(from->syntax); 1186 Succeed_; 1187} 1188 1189/* 1190 * get_chtab_(+Character, ?CharacterClass, Module) 1191 */ 1192static int 1193p_get_chtab(value v1, type t1, value v2, type t2, value vm, type tm) 1194{ 1195 Check_Integer(t1); 1196 Check_Output_Atom(t2); 1197 Check_Module_And_Access(vm, tm) 1198 if (v1.nint < 0 || v1.nint > 255) 1199 { 1200 Bip_Error(RANGE_ERROR); 1201 } 1202 Return_Unify_Atom(v2,t2,(dident)chname_[ModuleSyntax(vm.did)->char_class[(unsigned char)v1.nint]]); 1203} 1204 1205/* 1206 * set_chtab_(+Character, +CharacterClass, Module) 1207 */ 1208static int 1209p_set_chtab(value v1, type t1, value v2, type t2, value vm, type tm) 1210{ 1211 unsigned char c; /* to hold the concerned character */ 1212 int new_cc; 1213 syntax_desc *sd; 1214 1215 Check_Integer(t1); 1216 Check_Atom(t2); 1217 Check_Module_And_Access(vm, tm) 1218 if (v1.nint < 0 || v1.nint > 255) 1219 { 1220 Bip_Error(RANGE_ERROR); 1221 } 1222 c = (unsigned char) v1.nint; 1223 sd = ModuleSyntax(vm.did); 1224 1225 /* Then try to find the character class among the known ones */ 1226 for(new_cc = 1; new_cc <= NBCH && v2.did != chname_[new_cc]; new_cc++) 1227 ; 1228 if (new_cc > NBCH) { Bip_Error(RANGE_ERROR) } /* Not found */ 1229 1230 /* Check if we are redefining the current AQ, SQ or ES character. 1231 * For writing, we always need an AQ and SQ, hence they may only be 1232 * redefined if there is an alternative one that can be used instead. 1233 * Having no ES character is allowed. 1234 */ 1235 if ((unsigned char)new_cc != sd->char_class[c] 1236 && (sd->current_sq_char == c || sd->current_aq_char == c || 1237 sd->current_escape == c || sd->current_ul_char == c)) 1238 { int j; 1239 unsigned char cc = sd->char_class[c]; 1240 1241 for(j = 0; j <= 255; j++) /* scan through all characters */ 1242 { 1243 if (sd->char_class[j] == cc && (int)c != j) 1244 { 1245 switch(cc) /* found an alternative character j */ 1246 { 1247 case AQ: sd->current_aq_char = j; break; 1248 case SQ: sd->current_sq_char = j; break; 1249 case UL: sd->current_ul_char = j; break; 1250 case ES: sd->current_escape = j; break; 1251 } 1252 break; 1253 } 1254 } 1255 if (j > 255) 1256 if (cc == ES) 1257 sd->current_escape = -1; /* no longer an ES character */ 1258 else 1259 { 1260 Bip_Error(ONE_SQ_AQ) /* these quotes are needed */ 1261 } 1262 } 1263 1264 sd->char_class[c] = (unsigned char) new_cc; /* now redefine the character */ 1265 switch(new_cc) /* might be the new current_... */ 1266 { 1267 case AQ: sd->current_aq_char = c; break; 1268 case SQ: sd->current_sq_char = c; break; 1269 case ES: sd->current_escape = c; break; 1270 case UL: sd->current_ul_char = c; break; 1271 } 1272 Succeed_; 1273} 1274 1275 1276/* 1277 * get_syntax_(?Flag, Remember, Module) - backtrack over syntax flags 1278 * 1279 * internal use only ! 1280 */ 1281/*ARGSUSED*/ 1282static int 1283p_get_syntax(value val1, type tag1, value val2, type tag2, value vm, type tm) 1284{ 1285 value vi; 1286 int syntax; 1287 1288 /* no check on tag1 ! */ 1289 /* Check_Integer(tag2); not needed */ 1290 1291 syntax = ModuleSyntax(vm.did)->options; 1292 vi.nint = val2.nint; 1293 while (vi.nint < SYNTAX_FLAGS) 1294 { 1295 if (syntax & (1 << vi.nint++)) 1296 { 1297 Remember(2, vi, tag2); 1298 Return_Unify_Atom(val1, tag1, syntax_flags[vi.nint-1]); 1299 } 1300 } 1301 Cut_External; 1302 Fail_; 1303} 1304 1305/* read_token_(Stream, Token, Class, Module) */ 1306/*ARGSUSED*/ 1307static int 1308p_read_token_(value vs, type ts, value v, type t, value vc, type tc, value vm, type tm) 1309{ 1310 int res; 1311 char *s; 1312 token_desc token; 1313 stream_id nst = get_stream_id(vs,ts, SREAD, &res); 1314 register word len; 1315 syntax_desc *sd = ModuleSyntax(vm.did); 1316 dident tname; 1317 Prepare_Requests; 1318 1319 if (!IsRef(t) && IsCompound(t)) 1320 { 1321 Bip_Error(TYPE_ERROR) 1322 } 1323 if (nst == NO_STREAM) 1324 { 1325 Bip_Error(res) 1326 } 1327 if (!IsReadStream(nst)) 1328 { 1329 Bip_Error(STREAM_MODE); 1330 } 1331 Check_Module_And_Access(vm, tm) 1332 if (StreamMode(nst) & REPROMPT_ONLY) 1333 StreamMode(nst) |= DONT_PROMPT; 1334 1335 (void) lex_an(nst, sd, &token); 1336 tname = LexError(token.class) ? d_.err : tname_[token.class]; 1337 switch(token.class) 1338 { 1339 case COMMA: 1340 case EOCL: 1341 break; 1342 1343 case REFERENCE: 1344 case UREFERENCE: 1345 case STRING: 1346 case CODES: 1347 case CHARS: 1348 case BAR: 1349 default: /* LexError() */ 1350 len = token.term.val.nint; 1351 Make_Stack_String(len, token.term.val, s) 1352 Copy_Bytes(s, token.string, len + 1); 1353 token.term.tag.kernel = TSTRG; 1354 break; 1355 1356 case ENDOFFILE: /* we don't have the string */ 1357 Make_Stack_String(0, token.term.val, s) 1358 *s = 0; 1359 token.term.tag.kernel = TSTRG; 1360 break; 1361 1362 case SOLO: 1363 case CLOSING_SOLO: 1364 case SPACE_SOLO: 1365 { 1366 char c = (char) token.term.val.nint; 1367 Make_Stack_String(1, token.term.val, s) 1368 s[0] = c; 1369 s[1] = 0; 1370 token.term.tag.kernel = TSTRG; 1371 break; 1372 } 1373 1374 case NUMBER: 1375 case SPACE_NUMBER: 1376 if (IsInterval(token.term.tag)) 1377 { 1378 Unmark_Interval_Raw(token.term.val.ptr); 1379 } 1380 tname = tag_desc[tag_desc[TagType(token.term.tag)].super].type_name; 1381 break; 1382 1383 case IDENTIFIER: 1384 case QIDENTIFIER: 1385 token.term.val.did = enter_dict_n(token.string, token.term.val.nint, 0); 1386 token.term.tag.kernel = token.term.val.did == d_.nil ? TNIL : TDICT; 1387 break; 1388 1389 case EOI: 1390 if (StreamMode(nst) & MEOF ) { 1391 Bip_Error(IsSoftEofStream(nst) ? PEOF : READ_PAST_EOF); 1392 } 1393 else 1394 StreamMode(nst) |= MEOF; 1395 Bip_Error(PEOF); 1396 } 1397 Request_Unify_Pw(v, t, token.term.val, token.term.tag); 1398 Request_Unify_Atom(vc, tc, tname); 1399 Return_Unify; 1400} 1401 1402 1403/*** the subsequent BIPs fail on error and set the global variable ***/ 1404 1405#undef Bip_Error 1406#define Bip_Error(N) Bip_Error_Fail(N) 1407 1408/* 1409 * set_syntax(+flag, +val) - set or reset a syntax flag, fails on error 1410 * 1411 * internal use only ! 1412 */ 1413/*ARGSUSED*/ 1414static int 1415p_set_syntax(value val1, type tag1, value val2, type tag2, value vm, type tm) 1416{ 1417 int i, flag; 1418 syntax_desc *sd; 1419 1420 Check_Atom(tag1); 1421 Check_Atom(tag2); 1422 sd = ModuleSyntax(vm.did); 1423 1424 for (i=0, flag=1; i < SYNTAX_FLAGS; i++, flag <<= 1) 1425 { 1426 if (val1.did == syntax_flags[i]) 1427 { 1428 if (val2.did == d_.on) 1429 sd->options |= flag; 1430 else if (val2.did == d_.off) 1431 sd->options &= ~flag; 1432 else { Bip_Error(RANGE_ERROR); } 1433 Succeed_; 1434 } 1435 } 1436 Bip_Error(RANGE_ERROR); 1437} 1438 1439 1440#ifdef HAVE_INFINITY 1441extern double infinity(); 1442#else 1443#ifdef HUGE_VAL 1444#define infinity() HUGE_VAL 1445#else 1446#ifdef HUGE 1447#define infinity() HUGE 1448#else 1449#define infinity() 1.0e310 1450#endif 1451#endif 1452#endif 1453 1454/* 1455 * char *string_to_number(start, result, nst, sd) 1456 * 1457 * Auxiliary function used to convert a string (pointed to by start) 1458 * to a number. The result is a prolog word in *result 1459 * and is either a TINT, TBIG, TRAT, TDBL or TIVL. 1460 * If the tag is TEND there has been a conversion error. 1461 * The return value is the pointer to the next character after 1462 * the number. 1463 * The function can be used both for parsing from a stream (nst) 1464 * or for parsing a string (when nst == NULL). 1465 * StreamPtr is updated according to the return value. 1466 * This function is independent of character classes, except 1467 * for the escape sequences. 1468 * For better backward compatibility, based integers are 1469 * not parsed as bignums (otherwise 16'ffffffff would be a bignum), 1470 * unless the based_bignums syntax option is active. 1471 */ 1472 1473#define Init_S2N() \ 1474 if (nst) { \ 1475 aux = StreamLexAux(nst); \ 1476 stop = StreamLexAux(nst) + StreamLexSize(nst); \ 1477 } 1478 1479#define Reset_Start() \ 1480 if (nst) aux = StreamLexAux(nst); \ 1481 else start = (char *) t; 1482 1483/* up to three characters of backup are needed, e.g. in "3e+a" */ 1484#define Push_Back() \ 1485 --t; \ 1486 if (nst) { \ 1487 *(--aux) = 0; \ 1488 } 1489 1490#define Get_Ch(c) \ 1491 c = *t++; \ 1492 if (nst) { \ 1493 if (!c) { \ 1494 StreamPtr(nst) = t-1; \ 1495 (void) fill_buffer(nst); \ 1496 t = StreamPtr(nst); \ 1497 c = *t++; \ 1498 } \ 1499 if (aux == stop) { Extend_Lex_Aux(nst, aux, stop) } \ 1500 *aux++ = c; \ 1501 } 1502 1503#define NEG 1 1504#define BIG 2 1505#define FLOAT 4 1506#define IVL 8 1507#define PRECISE 16 1508 1509char * 1510string_to_number(char *start, pword *result, stream_id nst, syntax_desc *sd) 1511{ 1512 unsigned register char *t; /* next character to read */ 1513 unsigned register char *aux; /* next location in LexAux */ 1514 unsigned char *stop; /* end of LexAux */ 1515 register int c; /* current character */ 1516 int flags = 0; /* to remember established facts */ 1517 int base = 10; /* radix for number reading */ 1518 register uword iresult = 0; /* accumulator for integer value */ 1519 double f, low_f; /* the float result */ 1520 int float_digits = 0; 1521 int syntax; 1522 1523 Init_S2N(); 1524 t = (unsigned char *) start; 1525 if (!sd) sd = default_syntax; 1526 syntax = sd->options; 1527 1528_start_: 1529 Get_Ch(c) 1530 switch(c) { /* check for optional sign */ 1531 case '-': 1532 flags |= NEG; 1533 /*fall through*/ 1534 case '+': 1535 Get_Ch(c) 1536 if (syntax & BLANK_AFTER_SIGN) { /* allow optional space? */ 1537 while (sd->char_class[c] == BS) { 1538 Get_Ch(c) 1539 } 1540 } 1541 } 1542 1543 if (!isdigit(c)) /* read digits */ 1544 goto return_err; /* can't happen in the lexer */ 1545 1546 do { 1547 ++float_digits; 1548 if (!(flags & BIG)) 1549 { 1550 c -= '0'; 1551 if (iresult <= MAX_S_WORD/10 && ((iresult *= 10) <= MAX_S_WORD - c)) 1552 iresult += c; 1553 else flags |= BIG; /* word overflow */ 1554 } 1555 Get_Ch(c) 1556 } while (isdigit(c)); 1557 1558 if (c == '\'') { /* based integer */ 1559 if ((flags & BIG) || iresult < 0 || iresult > 36 1560 || (iresult < 10 && float_digits > 1) || float_digits > 2) 1561 { 1562 goto return_int; 1563 } 1564 base = iresult; 1565 if (base == 0) /* character code */ 1566 { 1567 int max_no, max_lc, max_uc; 1568 Get_Ch(c); 1569 switch(sd->char_class[c]) { 1570 case ES: 1571 Get_Ch(c); 1572 switch(sd->char_class[c]) { 1573 case AQ: /* 0'\' */ 1574 case SQ: /* 0'\" or 0'\` */ 1575 case LQ: /* 0'\" */ 1576 case CQ: /* 0'\" */ 1577 case ES: /* 0'\\ */ 1578 break; 1579 1580 case LC: /* 0'\a ... 0'\v */ 1581 switch(c) { 1582 case 'a': c = 0007; goto _return_c_; /* alert */ 1583 case 'b': c = '\b'; goto _return_c_; /* backspace */ 1584 case 't': c = '\t'; goto _return_c_; /* tab */ 1585 case 'n': c = '\n'; goto _return_c_; /* newline */ 1586 case 'v': c = 0013; goto _return_c_; /* vertical tab */ 1587 case 'f': c = '\f'; goto _return_c_; /* form feed */ 1588 case 'r': c = '\r'; goto _return_c_; /* return */ 1589 case 'e': /* escape */ 1590 if (syntax & ISO_ESCAPES) goto return_int3; 1591 c = 0033; goto _return_c_; 1592 case 'd': /* delete */ 1593 if (syntax & ISO_ESCAPES) goto return_int3; 1594 c = 0177; goto _return_c_; 1595 case 's': /* space */ 1596 if (syntax & ISO_ESCAPES) goto return_int3; 1597 c = ' '; goto _return_c_; 1598 case 'x': /* ISO hex constant */ 1599 base = 16; max_no = '9'; max_lc = 'f'; max_uc = 'F'; 1600 Get_Ch(c) 1601 if (sd->char_class[c] != ES) 1602 goto _iso_numeric_escape_; 1603 Push_Back(); /* the premature ES */ 1604 break; 1605 } 1606 goto _unknown_escape_; 1607 1608 case N: 1609 if (!(syntax & ISO_ESCAPES)) goto _unknown_escape_; 1610 base = 8; max_no = '7'; max_lc = 0; max_uc = 0; 1611_iso_numeric_escape_: 1612 /* because of unlimited length of this sequence, we cannot push 1613 * it all back on error - we leave legal prefix consumed */ 1614 for (iresult=0;;) { 1615 if (c>='0' && c<=max_no) c -= '0'; 1616 else if (c>='a' && c<=max_lc) c = c - 'a' + 10; 1617 else if (c>='A' && c<=max_uc) c = c - 'A' + 10; 1618 else if (sd->char_class[c] == ES) 1619 break; 1620 else { 1621 Push_Back(); /* the bad char */ 1622 goto return_err; 1623 } 1624 if ((unsigned )iresult <= MAX_CHAR_CODE/base && 1625 ((unsigned ) (iresult * base) <= MAX_CHAR_CODE - c)) 1626 iresult = iresult * base + c; 1627 else goto return_err; /* overflow */ 1628 Get_Ch(c) 1629 } 1630 result->val.nint = (word) iresult; 1631 result->tag.kernel = TINT; 1632 goto return_ok; 1633 1634 default: /* unrecognised 0'\? escape sequence */ 1635_unknown_escape_: 1636 if (syntax & ISO_ESCAPES) goto return_int3; 1637 /* backward comp: allow plain 0'\ for backslash */ 1638 Push_Back(); /* the bad char */ 1639 c = '\\'; 1640 break; 1641 } 1642 break; 1643 1644 case BS: 1645 if (c == ' ') break; 1646 /*fall through*/ 1647 case NL: /* 0'<layout> not allowed in ISO */ 1648 if (syntax & ISO_ESCAPES) goto return_int2; /* (iresult) */ 1649 break; 1650 1651 case AQ: /* 0'' */ 1652 if (syntax & ISO_ESCAPES && syntax & DOUBLED_QUOTE_IS_QUOTE) { 1653 Get_Ch(c); 1654 if (sd->char_class[c] != AQ) goto return_int3; 1655 /* 0''' */ 1656 } 1657 break; 1658 1659 case SQ: /* 0'" or 0'` */ 1660 case LQ: /* 0'" */ 1661 case CQ: /* 0'" */ 1662 break; 1663 } 1664_return_c_: 1665 result->val.nint = (word) c; 1666 result->tag.kernel = TINT; 1667 goto return_ok; 1668 } 1669 if (syntax & ISO_BASE_PREFIX) 1670 goto return_int; /* (flags, iresult) */ 1671 1672_based_number_: /* (base,iresult) */ 1673 { 1674 int max_no = base < 10 ? '0'+base-1 : '9'; 1675 int max_lc = 'a' + base-11; 1676 int max_uc = 'A' + base-11; 1677 Reset_Start() 1678 Get_Ch(c) 1679 if (!(c>='0' && c<=max_no || c>='a' && c<=max_lc || c>='A' && c<=max_uc)) 1680 { 1681 Push_Back(); /* the bad digit */ 1682 goto return_int; /* (flags, iresult) */ 1683 } 1684 for (iresult=0;;) { 1685 if (c>='0' && c<=max_no) c -= '0'; 1686 else if (c>='a' && c<=max_lc) c = c - 'a' + 10; 1687 else if (c>='A' && c<=max_uc) c = c - 'A' + 10; 1688 else break; 1689 if (!(flags & BIG)) 1690 { 1691 if (iresult <= MAX_U_WORD/base && 1692 (iresult * base <= MAX_U_WORD - c)) 1693 iresult = iresult * base + c; 1694 else flags |= BIG; /* word overflow */ 1695 } 1696 Get_Ch(c) 1697 } 1698 if (syntax & BASED_BIGNUMS) 1699 { 1700 if (!(flags & BIG) && iresult > MAX_S_WORD) 1701 flags |= BIG; 1702 } 1703 else if (flags & BIG) { 1704 Push_Back(); /* the delimiter */ 1705 goto return_err; 1706 } 1707 goto return_int; 1708 } 1709 } 1710 else if(c == '.') /* could be a float */ 1711 { 1712 int first; 1713 Get_Ch(c) /* first after point */ 1714 if (!isdigit(c)) 1715 { 1716 Push_Back(); /* the non-digit */ 1717 goto return_int; /* it was no decimal point */ 1718 } 1719 ++float_digits; 1720 flags |= FLOAT; /* definitely a float */ 1721 first = c; 1722 Get_Ch(c) 1723 if (!isdigit(c)) { /* only one fractional digit */ 1724 if (first == '0' || first == '5') 1725 flags |= PRECISE; 1726 } else { 1727 do { 1728 ++float_digits; 1729 Get_Ch(c) /* read remaining digits */ 1730 } while (isdigit(c)); 1731 } 1732 if (c == 'e' || c == 'E') /* exponent is now optional */ 1733 flags &= ~PRECISE; /* conservative assumption */ 1734 else if (c == 'I') /* check for Inf */ 1735 { 1736 Get_Ch(c) 1737 if (c == 'n') 1738 { 1739 Get_Ch(c) 1740 if (c == 'f') goto return_infinity; 1741 Push_Back(); /* the f position*/ 1742 } 1743 Push_Back(); /* the n position*/ 1744 goto return_real; 1745 } 1746 else if (c == 'N') /* check for NaN */ 1747 { 1748 Get_Ch(c) 1749 if (c == 'a') 1750 { 1751 Get_Ch(c) 1752 if (c == 'N') goto return_nan; 1753 Push_Back(); /* the N position*/ 1754 } 1755 Push_Back(); /* the a position*/ 1756 goto return_real; 1757 } 1758 else /* no exponent */ 1759 goto return_real; 1760 /* go read exponent */ 1761 } 1762 else if ((c == 'e' || c == 'E') && !(syntax & FLOAT_NEEDS_POINT)) 1763 ; 1764 else if (c == '_') /* could be a rational */ 1765 { 1766 Get_Ch(c) 1767 if (!isdigit(c)) 1768 { 1769#ifdef ALT_RAT_SYNTAX 1770 if (c != '/') /* allow for an optional '/' */ 1771 { 1772 Push_Back(); /* the non-digit */ 1773 goto return_int; /* just an integer */ 1774 } 1775 Get_Ch(c) 1776#endif 1777 if (!isdigit(c)) 1778 { 1779 Push_Back(); /* the non-digit */ 1780#ifdef ALT_RAT_SYNTAX 1781 Push_Back(); /* the '/' */ 1782#endif 1783 goto return_int; /* just an integer */ 1784 } 1785 } 1786 do { /* definitely a rational */ 1787 Get_Ch(c) 1788 } while (isdigit(c)); 1789 goto return_rat; 1790 } 1791 else if (syntax&ISO_BASE_PREFIX && iresult==0 && float_digits==1) 1792 { 1793 switch (c) { 1794 case 'b': base = 2; goto _based_number_; /* (base,iresult) */ 1795 case 'o': base = 8; goto _based_number_; /* (base,iresult) */ 1796 case 'x': base = 16; goto _based_number_; /* (base,iresult) */ 1797 } 1798 goto return_int; /* integer or bignum */ 1799 } 1800 else 1801 goto return_int; /* integer or bignum */ 1802 1803 Get_Ch(c) /* read exponent */ 1804 if (c == '-' || c == '+') /* optional exponent sign */ 1805 { 1806 Get_Ch(c) 1807 if (!isdigit(c)) 1808 { 1809 Push_Back(); /* the non-digit */ 1810 Push_Back(); /* the sign */ 1811 if (flags & FLOAT) 1812 goto return_real; 1813 else 1814 goto return_int; 1815 } 1816 } 1817 else if (!isdigit(c)) /* one or more digits */ 1818 { 1819 Push_Back(); /* the non-digit */ 1820 if (flags & FLOAT) 1821 goto return_real; 1822 else 1823 goto return_int; 1824 } 1825 /* flags |= FLOAT; definitely a float */ 1826 do { 1827 Get_Ch(c) 1828 } while (isdigit(c)); 1829 1830return_real: /* we have a valid real */ 1831 Push_Back(); /* pushback the delimiter */ 1832 if (nst) start = (char *) StreamLexAux(nst); 1833 f = atof(start); 1834#ifdef ATOF_NEGZERO_BUG 1835 /* some versions of atof() don't properly create negative zeros */ 1836 if (f == 0.0 && 1.0/f > 0.0 && flags & NEG) f = -f; 1837#endif 1838 1839return_f: /* f */ 1840 1841 if (flags & IVL) /* second half of interval? */ 1842 { 1843 if (!GoodFloat(f) || !GoodFloat(low_f)) 1844 goto return_err; 1845 /* 1846 * When called from the lexer we allow to return an illformed (raw) 1847 * interval (lwb > upb) because we don't see a possibly leading 1848 * minus sign! 1849 */ 1850 if (!nst && low_f > f) 1851 goto return_err; 1852 Make_Interval(result, low_f, f); 1853 if (nst) 1854 { 1855 /* this flag is used in _ivl_chgsign() and 1856 * reset in the parser or in read_token */ 1857 Mark_Interval_Raw(result->val.ptr); 1858 } 1859 goto return_ok; 1860 } 1861 1862 Get_Ch(c) /* check for float interval separator */ 1863 if (c == '_') 1864 { 1865 Get_Ch(c) 1866 if (c == '_') 1867 { 1868 low_f = f; 1869 Reset_Start() 1870 flags = IVL; 1871 goto _start_; /* go read the second float */ 1872 } 1873 Push_Back(); /* the non-underscore */ 1874 } 1875 Push_Back(); 1876 1877 if (syntax & FLOATS_AS_BREALS) 1878 { 1879 if (!GoodFloat(f)) 1880 { 1881 goto return_err; 1882 } 1883 if (!(flags & PRECISE) || float_digits > 15) 1884 { 1885 low_f = ec_ieee_down(f); 1886 f = ec_ieee_up(f); 1887 } 1888 else 1889 { 1890 low_f = f; 1891 } 1892 Make_Interval(result, low_f, f); 1893 } 1894 else 1895 { 1896 Make_Double(result, f) 1897 } 1898 1899 goto return_ok; 1900 1901return_infinity: /* we have an infinity */ 1902 f = flags & NEG ? -infinity() : infinity(); 1903 goto return_f; 1904 1905return_rat: /* (start, base) */ 1906 Push_Back(); /* pushback the delimiter */ 1907 if (flags & IVL) goto return_err; 1908 if (nst) start = (char *) StreamLexAux(nst); 1909 if (tag_desc[TRAT].from_string(start, result, base) != PSUCCEED) 1910 goto return_err; 1911 goto return_ok; 1912 1913return_int3: 1914 Push_Back(); 1915return_int2: 1916 Push_Back(); 1917return_int: /* (flags, iresult, start, base) */ 1918 Push_Back(); /* pushback the delimiter */ 1919 if (flags & IVL) goto return_err; 1920 if (flags & BIG) 1921 { 1922 if (nst) start = (char *) StreamLexAux(nst); 1923 if (tag_desc[TBIG].from_string(start, result, base) != PSUCCEED) 1924 goto return_err; 1925 } 1926 else /* integer */ 1927 { 1928 result->val.nint = flags & NEG ? (word)-iresult : (word)iresult; 1929 result->tag.kernel = TINT; 1930 } 1931 1932return_ok: 1933 if (nst) StreamPtr(nst) = t; 1934 return (char *) t; 1935 1936return_nan: /* we have a NaN */ 1937 { 1938 ieee_double nan; 1939 if (nst) start = (char *) StreamLexAux(nst); 1940 nan.as_dbl = atof(start); 1941 nan.as_struct.mant1 |= 0x7FF00000; /* change it into a NaN */ 1942 /* 1943 * Note that signaling NaNs are immediately turned into quiet NaNs 1944 * here, usually by setting the top bit in the significand. 1945 * E.g. 1.2NaN turns into 1.7NaN, nothing we can do about that. 1946 */ 1947 f = nan.as_dbl; 1948 if (!GoodFloat(f)) /* catch 1.0NaN, which is 1.0Inf */ 1949 goto return_f; 1950 } 1951 1952return_err: 1953 result->tag.kernel = TEND; 1954 if (nst) 1955 result->val.nint = aux - StreamLexAux(nst); 1956 if (nst) StreamPtr(nst) = t; 1957 return (char *) t; 1958} 1959 1960/* CAUTION: Bip_Error() is redefined to Bip_Error_Fail() */ 1961