1/* BEGIN LICENSE BLOCK 2 * Version: CMPL 1.1 3 * 4 * The contents of this file are subject to the Cisco-style Mozilla Public 5 * License Version 1.1 (the "License"); you may not use this file except 6 * in compliance with the License. You may obtain a copy of the License 7 * at www.eclipse-clp.org/license. 8 * 9 * Software distributed under the License is distributed on an "AS IS" 10 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 11 * the License for the specific language governing rights and limitations 12 * under the License. 13 * 14 * The Original Code is The ECLiPSe Constraint Logic Programming System. 15 * The Initial Developer of the Original Code is Cisco Systems, Inc. 16 * Portions created by the Initial Developer are 17 * Copyright (C) 1989-2006 Cisco Systems, Inc. All Rights Reserved. 18 * 19 * Contributor(s): 20 * 21 * END LICENSE BLOCK */ 22 23/*---------------------------------------------------------------------- 24 * System: ECLiPSe Constraint Logic Programming System 25 * Version: $Id: read.c,v 1.11 2013/03/08 13:47:19 jschimpf Exp $ 26 * 27 * Content: ECLiPSe parser 28 * Author: Joachim Schimpf, IC-Parc 29 * Micha Meier, ECRC (some macro transformation code) 30 * 31 * History: 32 * This is a complete rewrite of the original Sepia parser (written 33 * by Dominique Henry de Villeneuve) and retains very little of the 34 * original code. The new code is structurally based on Richard O'Keefe's 35 * public domain Prolog parser (read.pl), however, it is completely 36 * deterministic and therefore has a few restrictions wrt the resolution 37 * of ambiguities. 38 * 39 * TODO: 40 * - reduce the overhead of operator lookups, and avoid multiple lookups 41 * - optionally parse {a,b} as {}(a,b) as in Mercury 42 * - parse (Term)(Args) as apply(Term, Args), similar to Mercury's 43 * (X^T)(A,B,C) as ''((X^T),A,B,C) 44 * 45 * Syntax extensions: 46 * 47 * Parse: as: syntax_option: 48 * 49 * X[Args] subscript(X, [Args]) no_array_subscripts 50 * f(a)[Args] subscript(X, [Args]) no_array_subscripts 51 * a[Args] subscript(a, [Args]) atom_subscripts 52 * (...)[Args] subscript(..., [Args]) general_subscripts 53 * [Xs][Args] subscript([Xs], [Args]) general_subscripts 54 * Subscript[Args] subscript(Subscript, [Args]) general_subscripts 55 * 56 * X{Args} X 'with attributes' [Args] no_attributes 57 * a{Args} a with [Args] no_curly_arguments 58 * a{} a with [] no_curly_arguments 59 60 * {Args} {}(Args) not curly_args_as_list 61 * {Args} {}([Args]) curly_args_as_list 62 * 63 * X(Args) apply(X, [Args]) var_functor_is_apply 64 * 65 * f(a){Args} unused 66 * f(a)(Args) unused 67 * 123[Args] unused 68 * 123{Args} unused 69 * 123(Args) unused 70 * 71 * (the atom-bracket sequences are recognised only where the 72 * atom is not a prefix/infix). 73 * 74 * Call hierarchy of the parser: 75 * 76 * ec_read_term 77 * _read_next_term % top term or right arg of infix/prefix 78 * _read_next_term 79 * _read_list % reads a normal list in [] syntax 80 * _read_next_term 81 * _read_struct % reads structure in canonical syntax 82 * _read_next_term 83 * _read_sequence_until % reads bracketed comma-sequence 84 * _read_next_term 85 * _read_after_term % infix/postfix/subscript/delimiter 86 * _read_next_term 87 * _read_after_term 88 * _read_list % reads a [] subscript list 89 * 90 * 91 * 92 * 93 * Annotated terms 94 * --------------- 95 * When invoked with the LAYOUT_PLEASE option, the parser returns an 96 * annotated term instead of the plain parsed term. In an annotated 97 * term, every subterm is wrapped into a annotated_term/4 structure: 98 * 99 * :- export struct(annotated_term( 100 * term, % var,atomic,compound 101 * type, % term type (see below) 102 * from, to % source position (integer) 103 * )). 104 * 105 * The type field describes the type of the parsed term and is one of 106 * the following: 107 * 108 * integer 109 * float 110 * rational 111 * breal 112 * atom 113 * string term is a string or a char_code list 114 * anonymous term is an anonymous variable 115 * var(NameAtom) term is a variable with the given name 116 * compound term is compound (with annotated subterms) 117 * 118 * In the case of atomic terms and variables, the term field simply 119 * contains the plain parsed term. For compound terms, the term field 120 * contains a structure whose functor is the functor of the plain term, 121 * but whose arguments are annotated versions of the plain term arguments. 122 * E.g. the source term 123 * 3 124 * is parsed as 125 * annotated_term(3, integer, ...) 126 * 127 * The source term 128 * foo(bar, X, _, 3) 129 * is parsed as 130 * annotated_term(foo( 131 * annotated_term(bar, 132 * atom, ...), 133 * annotated_term(_, 134 * anonymous, ...), 135 * annotated_term(3, 136 * integer, ...)), 137 * compound, ...) 138 * The source term 139 * [1,2] 140 * is parsed as 141 * annotated_term(.( 142 * annotated_term(1, 143 * integer, ...), 144 * annotated_term(.( 145 * annotated_term(2, 146 * integer, ...), 147 * annotated_term([], 148 * atom, ...)), 149 * compound, ...)), 150 * compound, ...) 151 * 152 * 153 * The from/to fields of an annotated term describe a "source position" 154 * of the term. Every term/subterm is represented by one (sometimes two 155 * consecutive) tokens in the source, defined as follows: 156 * 157 * - atoms, strings and unsigned numbers are represented by their 158 * corresponding IDENTIFIER, NUMBER or STRING token. 159 * - signed numbers are represented by two consecutive tokens (sign+number) 160 * - compound terms in canonical notation are represented by two consecutive 161 * tokens (functor and opening parenthesis) 162 * - compound terms in operator syntax are represented by the operator's 163 * IDENTIFIER token 164 * - lists: a proper list [a,b] has subterms 165 * [a,b] represented by the [ token, 166 * [b] represented by the , token, 167 * [] represented by the ] token, 168 * a represented by itself, 169 * b represented by itself. 170 * a general list [a,b|T] has subterms 171 * [a,b|T] represented by the [ token, 172 * [b|T] represented by the , token, 173 * T represented by itself, 174 * a represented by itself, 175 * b represented by itself. 176 * Note that the | and ] tokens do not represent any term. 177 * - special notations: 178 * X[Args] 179 * subscript(X, [...]) represented by the [ token, 180 * X,Args represented by itself, 181 * X{Args} 182 * 'with attributes'(X,[Args]) represented by { token, 183 * (alternatively: X{ tokens) 184 * X,Args represented by themselves 185 * a{Args} 186 * with(a,[Args]) represented by the { token 187 * (alternatively: a{ tokens) 188 * a,Args represented by themselves 189 * X(Args) 190 * apply(X,[Args]) represented by the ( token 191 * X,Args represented by themselves 192 * In all these cases, the commas represent nothing. 193 * 194 * The source position of a term is the union of the source positions of 195 * the representing tokens. 196 *----------------------------------------------------------------------*/ 197 198 199#include "config.h" 200#include "sepia.h" 201#include "types.h" 202#include "embed.h" 203#include "error.h" 204#include "mem.h" 205#include "dict.h" 206#include "lex.h" 207#include "emu_export.h" 208#include "ec_io.h" 209#include "read.h" 210#include "module.h" 211#include "property.h" 212 213#ifdef HAVE_STRING_H 214#include <string.h> 215#endif 216 217 218/* 219 * EXTERNALS 220 */ 221 222extern pword *p_meta_arity_; 223 224 225/* 226 * TYPES 227 */ 228 229typedef struct s_varword /* variable stack */ 230{ 231 char *str; 232 pword *ptr; 233 int lock; 234 struct s_varword *next; 235} vword; 236 237 238typedef struct parse_desc { 239 240 /* in: */ 241 stream_id nst; /* stream we are reading from */ 242 syntax_desc *sd; /* module syntax descriptor */ 243 dident module; /* caller module (for op,macro) */ 244 type module_tag; /* caller module tag */ 245 int options; /* parser options */ 246 int max_arg_prec; /* maximium argument precedence */ 247 248 /* internal: */ 249 token_desc token, /* current token */ 250 prev_token, /* previous token */ 251 next_token; /* next token */ 252 253 vword *var_table; /* hash table for variable names */ 254 int var_table_size; /* the table's size */ 255 word counter; /* its generation counter */ 256 257 int macro; /* term may contain a macro */ 258 pword *var_list_tail; /* tail of varlist (readvar) */ 259 260 temp_area string_store; /* temp store for strings */ 261} parse_desc; 262 263 264/* 265 * STATIC VARIABLES 266 */ 267 268static dident d_comma0_; 269static dident d_bar0_; 270static dident d_annotated_term_; 271static dident d_anonymous_; 272 273 274/* 275 * FUNCTIONS 276 */ 277 278static parse_desc 279 *_alloc_parse_env(int caller, stream_id nst, dident module, type mod_tag); 280 281static vword * 282 _var_table_entry(parse_desc *pd, char *varname, word lenght); 283 284int 285 do_trafo(pword *), 286 p_read3(value vs, type ts, value v, type t, value vm, type tm); 287 288static int 289 _pread3(value v, type t, stream_id nst, value vm, type tm), 290 p_read2(value v, type t, value vm, type tm), 291 p_read_annotated_raw(value vs, type ts, value v, type t, value vf, type tf, value vm, type tm), 292 p_readvar(value vs, type ts, value v, type t, value vv, type tv, value vm, type tm); 293 294static uword 295 hashfunction(char *id); 296 297static vword 298 *_alloc_vword(register parse_desc *pd); 299 300static int 301 _transf_attribute(register pword *pw, pword *r, int def), 302 _read_next_term(parse_desc *pd, int context_prec, int context_flags, 303 pword *result), 304 _read_after_term(parse_desc *pd, int context_prec, int context_flags, 305 int term_prec, pword *result); 306 307 308/* 309 * CONSTANTS OF THE PARSER 310*/ 311 312/* size of the variable hash table (should be a prime) */ 313#define NUMBER_VAR 1009 314 315 316 317/* 318 * Values for context_flags 319 * The *_TERMINATES flags mean that COMMA/BAR terminate a term 320 * unconditionally, i.e. overriding the normal precedence rules 321 * (this is used when a subterm is a list or structure argument). 322 * The SUBSCRIPTABLE flag means the term may be followed by a subscript. 323 */ 324 325#define COMMA_TERMINATES 0x01 /* list elements or structure fields */ 326#define BAR_TERMINATES 0x02 /* list elements only */ 327#define SUBSCRIPTABLE 0x04 /* term can be followed by subscript */ 328#define PREBINFIRST 0x08 /* first argument of prefix binary op */ 329#define FZINC_SUBSCRIPTABLE 0x10 /* subscripts after atoms */ 330#define ZINC_SUBSCRIPTABLE 0x20 /* subscripts after almost everything */ 331#define ATTRIBUTABLE 0x40 /* term can be followed by attributes */ 332#define ARGOFOP 0x80 /* argument of an operator */ 333 334 335/* 336 * Interface with the lexer 337 * 338 * The current token is always cached in the parse_desc. 339 * Normally, we advance to the next token by calling Next_Token(). 340 * When lookahead is needed, we use Lookahead_Next_Token() instead, 341 * and later go back by calling Prev_Token(). 342 */ 343 344#define Next_Token(pd) \ 345 if (pd->next_token.class == NO_TOKEN) { \ 346 (void) lex_an(pd->nst, pd->sd, &pd->token); \ 347 } else { \ 348 pd->token = pd->next_token; \ 349 pd->next_token.class = NO_TOKEN; \ 350 } 351 352#define Lookahead_Next_Token(pd) \ 353 pd->prev_token = pd->token; \ 354 if (pd->token.string == (char*) StreamLexAux(pd->nst)) { \ 355 pd->prev_token.string = TempAlloc(pd->string_store, pd->token.term.val.nint + 1); \ 356 Copy_Bytes(pd->prev_token.string, pd->token.string, pd->token.term.val.nint + 1); \ 357 } \ 358 Next_Token(pd) 359 360#define Prev_Token(pd) \ 361 pd->next_token = pd->token; \ 362 if (pd->token.string == (char*) StreamLexAux(pd->nst)) { \ 363 pd->next_token.string = TempAlloc(pd->string_store, pd->token.term.val.nint + 1); \ 364 Copy_Bytes(pd->next_token.string, pd->token.string, pd->token.term.val.nint + 1); \ 365 } \ 366 pd->token = pd->prev_token; 367 368#define IsClass(pd,cl) ((pd)->token.class == (cl)) 369#define IsChar(pd,char) ((pd)->token.term.val.nint == (char)) 370#define IsToken(pd,cl,char) (IsClass(pd,cl) && IsChar(pd,char)) 371 372#define TokenString(pd) \ 373 (pd->token.string) 374 375#define TokenStringLen(pd) \ 376 (pd->token.term.val.nint) 377 378#define Save_Token_String(pd, s, l) \ 379 l = TokenStringLen(pd); \ 380 if (TokenString(pd) == (char*) StreamLexAux(pd->nst)) { \ 381 s = TempAlloc(pd->string_store, l + 1); \ 382 Copy_Bytes(s, TokenString(pd), l + 1); \ 383 } else { \ 384 s = TokenString(pd); \ 385 } 386 387#define Make_Ident_Token(pd, s, l) \ 388 pd->token.string = (s); \ 389 pd->token.term.val.nint = (l); \ 390 pd->token.class = IDENTIFIER; 391 392/* 393 * Macros (read/clause/goal-macros) 394 * While parsing the term, we check whether we come across any items that 395 * _may_ have a macro transformation defined (no matter which), and set a flag. 396 * If any, we do a macro-expansion pass over the term after it has been parsed. 397 * This is done in Prolog (by calling expand_macros_/3). 398 */ 399 400#define Flag_Type_Macro(pd, type) \ 401 { if (DidMacro(TransfDid(type))) pd->macro = 1; } 402 403#define Flag_Did_Macro(pd, wdid) \ 404 { if (DidMacro(wdid)) pd->macro = 1; } 405 406 407/* 408 * Term construction: 409 * The Build_XXX macros/functions construct the ECLiPSe terms, wrapped 410 * into a term descriptor, if requested. 411 */ 412 413#define TERM_TERM 1 414#define TERM_TYPE 2 415#define TERM_FILE 3 416#define TERM_LINE 4 417#define TERM_FROM 5 418#define TERM_TO 6 419#define TERM_ARITY 6 420 421 422static source_pos_t no_pos_ = {D_UNKNOWN,0,0,0}; 423 424#define Merge_Source_Pos(p1,p2,paux) \ 425 paux.file = p1.file; \ 426 paux.line = p1.line; \ 427 paux.from = p1.from; \ 428 paux.to = p2.to; 429 430 431/* 432 * Term construction 433 */ 434 435/* 436 * Construct the annotated_term/4 descriptive wrapper, if requested 437 * annotated_term(Term, <dtype>, <pos.from>, <pos.to>) 438 */ 439 440#define Make_Term_Wrapper(pw, _pw, dtype, pos) \ 441 if (!(pd->options & LAYOUT_PLEASE)) { \ 442 _pw = (pw); \ 443 } else { \ 444 _pw = TG; \ 445 Make_Struct(pw, TG); \ 446 Push_Struct_Frame(d_annotated_term_); \ 447 Make_Atom(_pw+TERM_TYPE, dtype); \ 448 Make_Atom(_pw+TERM_FILE, (pos).file); \ 449 Make_Integer(_pw+TERM_LINE, (pos).line); \ 450 Make_Integer(_pw+TERM_FROM, (pos).from); \ 451 Make_Integer(_pw+TERM_TO, (pos).to); \ 452 _pw += TERM_TERM; \ 453 } 454 455/* 456 * Construct a annotated_term/4 descriptive wrapper for a variable, if requested 457 * annotated_term(X, var('X'), <pos.from>, <pos.to>) 458 */ 459#define Make_Var_Wrapper(pw, pvar, pos) \ 460 if (!(pd->options & LAYOUT_PLEASE)) { \ 461 pvar = (pw); \ 462 } else { \ 463 pword *_pw = TG; \ 464 Make_Struct(pw, TG); \ 465 Push_Struct_Frame(d_annotated_term_); \ 466 pvar = _pw+TERM_TERM; \ 467 Make_Struct(_pw+TERM_TYPE, TG); \ 468 Make_Atom(_pw+TERM_FILE, (pos).file); \ 469 Make_Integer(_pw+TERM_LINE, (pos).line); \ 470 Make_Integer(_pw+TERM_FROM, (pos).from); \ 471 Make_Integer(_pw+TERM_TO, (pos).to); \ 472 _pw = TG; \ 473 Push_Struct_Frame(d_.var); \ 474 Make_Atom(_pw+1, enter_dict_n(TokenString(pd), TokenStringLen(pd), 0));\ 475 } 476 477/* 478 * This macro copies the pwords at 'from' to 'to', except that in the case of 479 * self-reference it creates a new self-reference at 'to'. This assumes that 480 * there are no references to the location 'from' that could become dangling! 481 */ 482#define Move_Pword(from, to) \ 483 (to)->tag.all = (from)->tag.all; \ 484 if (IsRef((from)->tag) && (from)->val.ptr == (from)) { \ 485 (to)->val.ptr = (to); \ 486 } else { \ 487 (to)->val.all = (from)->val.all; \ 488 } 489 490#define Build_List(pw,phead,pos) { \ 491 pword *_pw; \ 492 Make_Term_Wrapper(pw, _pw, d_.compound0, pos); \ 493 phead = TG; \ 494 Make_List(_pw, TG); \ 495 Push_List_Frame(); \ 496 } 497 498#define Build_Nil(pw,pos) { \ 499 pword *_pw; \ 500 Make_Term_Wrapper(pw, _pw, d_.atom0, pos); \ 501 Make_Nil(_pw); \ 502 } 503 504#define Build_Integer(pw,n,pos) { \ 505 pword *_pw; \ 506 Make_Term_Wrapper(pw, _pw, d_.integer0, pos); \ 507 Make_Integer(_pw, n); \ 508 } 509 510#define Build_Struct(pw, pfct, d,pos) { \ 511 pword *_pw; \ 512 Flag_Type_Macro(pd, TCOMP); \ 513 Flag_Did_Macro(pd, d); \ 514 Make_Term_Wrapper(pw, _pw, d_.compound0, pos); \ 515 Make_Struct(_pw, TG); \ 516 pfct = TG; \ 517 Push_Struct_Frame(d); \ 518 } 519 520#define Build_Struct_Or_List(pw, pfct, d,pos) { \ 521 pword *_pw; \ 522 Flag_Type_Macro(pd, TCOMP); \ 523 Flag_Did_Macro(pd, d); \ 524 Make_Term_Wrapper(pw, _pw, d_.compound0, pos); \ 525 if ((d) == d_.list) { \ 526 Make_List(_pw, TG); \ 527 pfct = TG-1; \ 528 Push_List_Frame(); \ 529 } else { \ 530 Make_Struct(_pw, TG); \ 531 pfct = TG; \ 532 Push_Struct_Frame(d); \ 533 } \ 534 } 535 536#define Build_Atom_Or_Nil(pw, d,pos) { \ 537 pword *_pw; \ 538 Flag_Type_Macro(pd, TDICT); \ 539 Flag_Did_Macro(pd, d); \ 540 Make_Term_Wrapper(pw, _pw, d_.atom0, pos); \ 541 _pw->val.did = d; \ 542 _pw->tag.kernel = ((d) == d_.nil) ? TNIL : TDICT;\ 543 } 544 545#define Build_Number_From_Token(pd, pw) { \ 546 pword *_pw; \ 547 Make_Term_Wrapper(pw, _pw, \ 548 tag_desc[tag_desc[TagType(pd->token.term.tag)].super].type_name,\ 549 pd->token.pos);\ 550 Flag_Type_Macro(pd, TagType(pd->token.term.tag)); \ 551 if (IsInterval(pd->token.term.tag)) { \ 552 Unmark_Interval_Raw(pd->token.term.val.ptr); \ 553 if (!GoodInterval(pd->token.term.val.ptr)) \ 554 return BAD_NUMERIC_CONSTANT; \ 555 } \ 556 *_pw = pd->token.term; \ 557 } 558 559#define Build_String_From_Token(pd, pw) { \ 560 word len1 = TokenStringLen(pd) + 1; \ 561 pword *_pw; \ 562 Flag_Type_Macro(pd, TSTRG); \ 563 Make_Term_Wrapper(pw, _pw, d_.string0, pd->token.pos); \ 564 _pw->val.ptr = TG; \ 565 _pw->tag.kernel = TSTRG; \ 566 Push_Buffer(len1); \ 567 Copy_Bytes(StringStart(_pw->val), TokenString(pd), len1);\ 568 } 569 570 571static void 572_build_list_from_token(parse_desc *pd, pword *pw) 573{ 574 int i; 575 Flag_Type_Macro(pd, TINT); 576 Flag_Type_Macro(pd, TDICT); 577 Flag_Did_Macro(pd, d_.nil); 578 Flag_Type_Macro(pd, TCOMP); 579 Flag_Did_Macro(pd, d_.list); 580 if (pd->token.class == CODES) { 581 for(i=0; i<TokenStringLen(pd); ++i) 582 { 583 pword *phead; 584 Build_List(pw, phead, pd->token.pos); 585 Build_Integer(phead, TokenString(pd)[i], pd->token.pos); 586 pw = phead+1; 587 } 588 } else /*if (pd->token.class == CHARS)*/ { 589 for(i=0; i<TokenStringLen(pd); ++i) 590 { 591 pword *phead; 592 dident char_did = enter_dict_n(TokenString(pd)+i, 1, 0); 593 Build_List(pw, phead, pd->token.pos); 594 Build_Atom_Or_Nil(phead, char_did, pd->token.pos); 595 pw = phead+1; 596 } 597 } 598 Build_Nil(pw, pd->token.pos); 599} 600 601 602static pword * 603_make_variable_from_token(parse_desc *pd, pword *pvar) 604{ 605 dident did0 = D_UNKNOWN; 606 /* 607 * Non-anonymous variables are always allocated separately and referenced 608 * from all their occurrences. The self-reference is never moved, and 609 * the references can simply be copied. This is somewhat suboptimal since 610 * one occurrence could have the self-reference in-place, but then that 611 * occurrence could not be moved easily because it would be referred by 612 * other occurrences and by the name table (see usage of Move_Pword()). 613 */ 614 Make_Ref(pvar, TG); 615 pvar = TG++; 616 Check_Gc; 617 if (pd->options & VARNAMES_PLEASE) { 618 did0 = enter_dict_n(TokenString(pd), TokenStringLen(pd), 0); 619 Make_NamedVar(pvar, did0); 620 } else { 621 Make_Var(pvar); 622 } 623 if (pd->var_list_tail) /* need list element for readvar */ 624 { 625 pword *pw = TG; 626 Make_List(pd->var_list_tail, TG); 627 Push_List_Frame(); /* list element */ 628 Push_List_Frame(); /* ['Name'|Var] pair */ 629 Make_List(&pw[0], &pw[2]); 630 pd->var_list_tail = &pw[1]; 631 if (did0 == D_UNKNOWN) 632 did0 = enter_dict_n(TokenString(pd), TokenStringLen(pd), 0); 633 Make_Atom(&pw[2], did0); 634 Make_Ref(&pw[3], pvar); 635 } 636 return pvar; 637} 638 639 640/* 641 * Lookahead function: _delimiter_follows() 642 * 643 * Succeeds if a delimiter follows, i.e. something that can definitely 644 * not start a term. This is used to disambiguate between infix and postfix: 645 * when a delimiter follows, only the postfix interpretation is possible. 646 */ 647 648static int 649_delimiter_follows(parse_desc *pd) 650{ 651 int res; 652 Lookahead_Next_Token(pd); 653 switch(pd->token.class) 654 { 655 case EOI: 656 case EOCL: 657 case COMMA: 658 case BAR: 659 case CLOSING_SOLO: 660 res = 1; 661 break; 662 663 default: 664 res = 0; 665 break; 666 } 667 Prev_Token(pd); 668 return res; 669} 670 671 672/* 673 * Lookahead function: _cant_follow_prefix() 674 * 675 * This is one of the more tricky bits of the whole parser. The 676 * function returns true if the current token cannot possibly follow a 677 * prefix operator with precedence oprec/rprec (in that case, we may 678 * still be able to parse the term by ignoring the prefix-property and 679 * interpreting the potential prefix as a plain atom). If the current 680 * token happens to be an identifier, we look ahead a second token to 681 * get better ambiguity resolution. 682 */ 683 684/* 685 * Conservative check for tokens that can start but not follow a term 686 */ 687#define CantFollowTerm(class) (\ 688 (class)==NUMBER || (class)==SPACE_NUMBER || (class)==REFERENCE \ 689 || (class)==UREFERENCE || (class)==STRING || (class)==CODES \ 690 || (class)==CHARS || (class)==SPACE_SOLO) 691 692#define IsDelimiter(class) (\ 693 (class)==EOI || (class)==EOCL || (class)==COMMA || (class)==BAR \ 694 || (class)==CLOSING_SOLO)\ 695 696static int 697_cant_follow_prefix(parse_desc *pd, int context_flags, 698 int oprec, int rprec, int prefix_arity) 699{ 700 opi *pre_op, *follow_op; 701 dident did0; 702 int status, class; 703 704 class = pd->token.class; 705 switch(class) 706 { 707 case EOCL: 708 case EOI: 709 case CLOSING_SOLO: 710 return 1; 711 712 case COMMA: 713 if (context_flags & COMMA_TERMINATES) 714 return 1; 715 did0 = d_comma0_; 716 goto _check_precedence_; 717 718 case BAR: 719 if ((context_flags & BAR_TERMINATES) || (pd->sd->options & BAR_IS_NO_ATOM)) 720 return 1; 721 did0 = d_bar0_; 722 goto _check_precedence_; 723 724 case IDENTIFIER: 725 case QIDENTIFIER: 726 /* 727 * An atom which is not an operator CAN follow the prefix 728 */ 729 did0 = check_did_n(TokenString(pd), TokenStringLen(pd), 0); 730 if (did0 == D_UNKNOWN || !DidIsOp(did0)) 731 return 0; 732 733_check_precedence_: /* (did0,class) */ 734 735 if (pd->sd->options & ISO_RESTRICTIONS) 736 return 0; 737 738 /* 739 * A functor-term CAN follow the prefix 740 */ 741 Lookahead_Next_Token(pd); /* Prev_Token(pd) must follow! */ 742 if (IsClass(pd, SOLO) && (IsChar(pd, '(') || IsChar(pd, '{'))) 743 goto _return_0_; 744 745 /* 746 * A signed number CAN follow the prefix 747 */ 748 if (class==IDENTIFIER && (did0 == d_.minus0 || did0 == d_.plus0) && IsClass(pd,NUMBER)) 749 goto _return_0_; 750 751 /* 752 * prefix with lower priority CAN follow first prefix 753 */ 754 pre_op = visible_prefix_op(did0, pd->module, pd->module_tag, &status); 755 if (pre_op && (GetOpiPreced(pre_op) <= rprec)) 756 goto _return_0_; 757 758 if (prefix_arity == 1) 759 { 760 /* 761 * An infix/postfix with higher precedence CAN'T follow the prefix, 762 * i.e. forces the prefix to be interpreted as an atom. 763 * Moreover, if we have a sequence 764 * prefix infix NEXT 765 * and NEXT is a token that can't follow a complete term, this 766 * forces the prefix to be interpreted as an atom (otherwise it 767 * would be a syntax error anyway), eg: local / 2 768 * 769 * Examples: 770 * fy9 yfx10 3 -> (fy9) yfx10 3 771 * fy9 yfx10 foo -> (fy9) yfx10 foo 772 * fy9 yfx9 3 -> (fy9) yfx9 3 773 * fy9 yfx9 foo -> (fy9) yfx9 foo i.e. prefer infix 774 * fy10 yfx9 3 -> (fy10) yfx9 3 775 * fy10 yfx9 foo -> fy10 (yfx9) foo 776 * fy9 yf10 -> (fy9) yf10 777 * fy9 yf9 -> fy9 (yf9) i.e. prefer prefix 778 * fy10 yf9 -> fy10 (yf9) 779 */ 780 if (((follow_op = visible_infix_op(did0, pd->module, pd->module_tag, &status)) 781 && (oprec <= InfixLeftPrecedence(follow_op) 782 || CantFollowTerm(pd->token.class))) 783 || ((follow_op = visible_postfix_op(did0, pd->module, pd->module_tag, &status)) 784 && oprec < PostfixLeftPrecedence(follow_op)) 785 ) 786 { 787 Prev_Token(pd); 788 return 1; 789 } 790 } 791 else /* if (prefix_arity == 2) */ 792 { 793 /* 794 * A sequence 795 * prefix2 infix NEXT 796 * is ambiguous (independent of NEXT, either prefix2 of infix can 797 * be the functor). We could disambiguate using the precedence, 798 * but my feeling is that in practice one always wants the infix 799 * in such a case, e.g. 800 * some / 2 801 * In a sequence 802 * prefix2 postfix delimiter 803 * there is no choice but to prefer the postfix, and for a general 804 * prefix2 postfix NEXT 805 * we prefer the postfix only if it binds weaker than the prefix 806 * (analogous to prefix/infix and prefix/postfix disambiguation). 807 */ 808 if (((follow_op = visible_infix_op(did0, pd->module, pd->module_tag, &status)) 809 /* && (oprec <= InfixLeftPrecedence(follow_op)) */ ) 810 || ((follow_op = visible_postfix_op(did0, pd->module, pd->module_tag, &status)) 811 && (oprec < PostfixLeftPrecedence(follow_op) 812 || IsDelimiter(pd->token.class))) 813 ) 814 { 815 Prev_Token(pd); 816 return 1; 817 } 818 } 819 820_return_0_: 821 Prev_Token(pd); 822 return 0; 823 824 825 default: 826 return 0; 827 } 828} 829 830 831/* 832 * Parse a standard list, i.e. everything following [ up until ] 833 */ 834 835static int 836_read_list(parse_desc *pd, pword *result, source_pos_t *ppos) 837{ 838 source_pos_t pos = *ppos; 839 840 for(;;) 841 { 842 int status; 843 pword *pw = TG; 844 845 Build_List(result, pw, pos); 846 status = _read_next_term(pd, pd->max_arg_prec, COMMA_TERMINATES|BAR_TERMINATES, pw); 847 Return_If_Error(status); 848 result = &pw[1]; 849 850 switch(pd->token.class) 851 { 852 case BAR: 853 Next_Token(pd); 854 status = _read_next_term(pd, pd->max_arg_prec, COMMA_TERMINATES|BAR_TERMINATES, result); 855 Return_If_Error(status); 856 if (!IsToken(pd, CLOSING_SOLO, ']')) 857 return PUNCTUATION; 858 Next_Token(pd); 859 Flag_Type_Macro(pd, TCOMP); 860 Flag_Did_Macro(pd, d_.list); 861 return PSUCCEED; 862 863 case COMMA: 864 pos = pd->token.pos; 865 Next_Token(pd); 866 break; 867 868 case CLOSING_SOLO: 869 if (IsChar(pd, ']')) 870 { 871 Build_Nil(result, pd->token.pos); 872 Next_Token(pd); 873 Flag_Type_Macro(pd, TDICT); 874 Flag_Did_Macro(pd, d_.nil); 875 Flag_Type_Macro(pd, TCOMP); 876 Flag_Did_Macro(pd, d_.list); 877 return PSUCCEED; 878 } 879 return UNCLOSBR; 880 881 case EOI: 882 return ENDOFFILE; 883 884 case EOCL: 885 return ENDOFCLAUSE; 886 887 default: 888 return LexError(pd->token.class) ? pd->token.class : UNEXPECTED; 889 } 890 } 891} 892 893 894/* 895 * Parse a comma-separated sequence up until the solo-char terminator 896 * (currently either a closing round or curly bracket) and return it as a list 897 */ 898 899static int 900_read_sequence_until(parse_desc *pd, pword *result, int terminator) 901{ 902 for(;;) 903 { 904 int status; 905 pword *pw = TG; 906 907 Build_List(result, pw, no_pos_); 908 status = _read_next_term(pd, pd->max_arg_prec, COMMA_TERMINATES, pw); 909 Return_If_Error(status); 910 result = &pw[1]; 911 912 switch(pd->token.class) 913 { 914 case COMMA: 915 Next_Token(pd); 916 break; 917 918 case CLOSING_SOLO: 919 if (IsChar(pd, terminator)) 920 { 921 Build_Nil(result, no_pos_); 922 Next_Token(pd); 923 return PSUCCEED; 924 } 925 return UNCLOSBR; 926 927 case EOI: 928 return ENDOFFILE; 929 930 case EOCL: 931 return ENDOFCLAUSE; 932 933 default: 934 return LexError(pd->token.class) ? pd->token.class : UNEXPECTED; 935 } 936 } 937} 938 939 940/* 941 * Parse a standard structure, i.e. everything after f( up until ) 942 */ 943 944static int 945_read_struct(parse_desc *pd, char *name, uword length, pword *result, 946 source_pos_t *ppos) 947{ 948 int status; 949 dident functor; 950 pword all_args; 951 pword *tail = &all_args; 952 pword *pw; 953 uword i; 954 955 for(i=1; ; ++i) 956 { 957 pw = TG; 958 Make_List(tail, pw); 959 Push_List_Frame(); 960 tail = pw+1; 961 962 status = _read_next_term(pd, pd->max_arg_prec, COMMA_TERMINATES, pw); 963 Return_If_Error(status); 964 965 switch(pd->token.class) 966 { 967 case COMMA: 968 Next_Token(pd); 969 continue; 970 971 case CLOSING_SOLO: 972 if (IsChar(pd, ')')) 973 { 974 Make_Nil(tail); 975 Next_Token(pd); 976 break; 977 } 978 return UNCLOSBR; 979 980 case EOI: 981 return ENDOFFILE; 982 983 case EOCL: 984 return ENDOFCLAUSE; 985 986 default: 987 return LexError(pd->token.class) ? pd->token.class : UNEXPECTED; 988 } 989 break; 990 } 991 992 /* create the structure from the argument list (the list becomes garbage) */ 993 pw = TG; 994 functor = enter_dict_n(name, length, i); 995 Build_Struct_Or_List(result, pw, functor, *ppos); 996 tail = all_args.val.ptr; 997 do 998 { 999 /* We use Move_Pword() instead of a simple copy - this will move 1000 * anonymous variables instead of creating a reference to them. 1001 */ 1002 ++pw; 1003 Move_Pword(tail, pw); 1004 tail = tail[1].val.ptr; 1005 } 1006 while(--i); 1007 1008 return PSUCCEED; 1009} 1010 1011 1012/* 1013 * Parse a toplevel term, a subterm, or a right argument of prefix/infix 1014 */ 1015 1016static int 1017_read_next_term(parse_desc *pd, 1018 int context_prec, 1019 int context_flags, /* terminators, ARGOFOP */ 1020 pword *result) 1021{ 1022 int status, class; 1023 pword term; 1024 char *name; 1025 uword length; 1026 dident did0; 1027 opi *pre_op; 1028 source_pos_t pos; 1029 1030 pos = pd->token.pos; 1031 class = pd->token.class; 1032 switch(class) 1033 { 1034 1035 case BAR: 1036 if (pd->sd->options & BAR_IS_NO_ATOM) 1037 return UNEXPECTED; 1038 /* fall through and treat like normal identifier */ 1039 1040 case IDENTIFIER: 1041 case QIDENTIFIER: 1042 Save_Token_String(pd, name, length); /* don't make dident eagerly */ 1043_treat_like_identifier_: /* (name,length,class) */ 1044 Next_Token(pd); 1045 switch(pd->token.class) 1046 { 1047 1048 case SOLO: 1049 if (IsChar(pd, '(')) 1050 { 1051_treat_as_functor_: 1052 /* a compound term in canonical functor notation */ 1053 Merge_Source_Pos(pos, pd->token.pos, pos); 1054 Next_Token(pd); 1055 status = _read_struct(pd, name, length, &term, &pos); 1056 Return_If_Error(status); 1057 *result = term; 1058 return _read_after_term(pd, context_prec, context_flags|SUBSCRIPTABLE, 0, result); 1059 } 1060 else if (IsChar(pd, '{') && !(pd->sd->options & NO_CURLY_ARGUMENTS)) 1061 { 1062 /* a structure with arguments in curly braces */ 1063 pword *pw; 1064 dident did0 = enter_dict_n(name, length, 0); 1065 Build_Struct(&term, pw, d_.with2, pd->token.pos); 1066 Build_Atom_Or_Nil(&pw[1], did0, pos); 1067 Next_Token(pd); 1068 if (IsToken(pd, CLOSING_SOLO, '}')) 1069 { 1070 Build_Nil(&pw[2], no_pos_); 1071 Next_Token(pd); 1072 } 1073 else 1074 { 1075 status = _read_sequence_until(pd, &pw[2], '}'); 1076 Return_If_Error(status); 1077 } 1078 *result = term; 1079 return _read_after_term(pd, context_prec, context_flags, 0, result); 1080 } 1081 break; 1082 1083 case SPACE_NUMBER: 1084 if (!(pd->sd->options & BLANK_AFTER_SIGN)) 1085 break; 1086 /* fall through */ 1087 1088 case NUMBER: 1089 /* Here, class is IDENTIFIER or QIDENTIFIER */ 1090 /* ECLiPSe: unquoted plus or minus are signs */ 1091 /* ISO: only minus is a sign, but quoted sign is allowed */ 1092 if (length==1 && 1093 (class==IDENTIFIER || pd->sd->options & BLANK_AFTER_SIGN)) 1094 { 1095 if (*name=='-') 1096 { 1097 /* - followed by number: treat as a sign */ 1098 tag_desc[pd->token.term.tag.kernel].arith_op[ARITH_CHGSIGN] 1099 (pd->token.term.val, &pd->token.term); 1100 Merge_Source_Pos(pos, pd->token.pos, pos); 1101 goto _make_number_; 1102 } 1103 else if (*name=='+' && !(pd->sd->options & PLUS_IS_NO_SIGN)) 1104 { 1105 /* + followed by number: treat as a sign */ 1106 Merge_Source_Pos(pos, pd->token.pos, pos); 1107 goto _make_number_; 1108 } 1109 } 1110 break; 1111 } 1112 1113 /* none of the special cases above - check if prefix */ 1114 did0 = enter_dict_n(name, length, 0); 1115 pre_op = visible_prefix_op(did0, pd->module, pd->module_tag, &status); 1116 if (pre_op) 1117 { 1118 if (!IsPrefix2(pre_op)) /* unary prefix */ 1119 { 1120 int oprec, rprec; 1121 Get_Prefix_Prec(pre_op, oprec, rprec); 1122 if (oprec <= context_prec 1123 && !_cant_follow_prefix(pd, context_flags, oprec, rprec, 1)) 1124 { 1125 /* treat as prefix operator */ 1126 pword *pw; 1127 Build_Struct(&term, pw, OpiDid(pre_op), pos); 1128 status = _read_next_term(pd, rprec, context_flags|ARGOFOP, &pw[1]); 1129 Return_If_Error(status); 1130 *result = term; 1131 return _read_after_term(pd, context_prec, context_flags, oprec, result); 1132 } 1133 } 1134 else /* binary prefix */ 1135 { 1136 int oprec, lprec, rprec; 1137 Get_Prefix2_Prec(pre_op, oprec, lprec, rprec); 1138 if (oprec <= context_prec 1139 && !_cant_follow_prefix(pd, context_flags, oprec, lprec, 2)) 1140 { 1141 /* treat as binary prefix operator */ 1142 pword *pw; 1143 Build_Struct_Or_List(&term, pw, OpiDid(pre_op), pos); 1144 status = _read_next_term(pd, lprec, context_flags|ARGOFOP|PREBINFIRST, &pw[1]); 1145 Return_If_Error(status); 1146 status = _read_next_term(pd, rprec, context_flags|ARGOFOP, &pw[2]); 1147 Return_If_Error(status); 1148 *result = term; 1149 return _read_after_term(pd, context_prec, context_flags, oprec, result); 1150 } 1151 } 1152 } 1153 1154 if (IsToken(pd, SPACE_SOLO, '(')) 1155 { 1156 /* compatibility: allow space between functor and ( */ 1157 if (pd->sd->options & NO_BLANKS) 1158 return BLANK; 1159 goto _treat_as_functor_; 1160 } 1161 1162 /* ISO does not allow operators as arguments of operators */ 1163 if (context_flags & ARGOFOP && pd->sd->options & ISO_RESTRICTIONS 1164 && DidIsOp(did0) && visible_operator(did0, pd->module, pd->module_tag)) 1165 return BRACKET; 1166 /* treat as a simple atom */ 1167 Build_Atom_Or_Nil(&term, did0, pos); 1168 *result = term; 1169 return _read_after_term(pd, context_prec, context_flags|FZINC_SUBSCRIPTABLE|ZINC_SUBSCRIPTABLE, 0, result); 1170 1171 1172 case NUMBER: 1173 case SPACE_NUMBER: 1174_make_number_: 1175 Build_Number_From_Token(pd, &term); 1176 Next_Token(pd); 1177 *result = term; 1178 return _read_after_term(pd, context_prec, context_flags, 0, result); 1179 1180 1181 case STRING: /* string-quoted string */ 1182 Build_String_From_Token(pd, &term); 1183 Next_Token(pd); 1184 *result = term; 1185 return _read_after_term(pd, context_prec, context_flags, 0, result); 1186 1187 1188 case CODES: /* codes-list-quoted string */ 1189 case CHARS: /* chars-list-quoted string */ 1190 _build_list_from_token(pd, &term); 1191 Next_Token(pd); 1192 *result = term; 1193 return _read_after_term(pd, context_prec, context_flags|ZINC_SUBSCRIPTABLE, 0, result); 1194 1195 1196 case REFERENCE: /* general variable */ 1197 { 1198 vword *vp = _var_table_entry(pd, TokenString(pd), TokenStringLen(pd)); 1199 pword *pvar; 1200 Make_Var_Wrapper(result, pvar, pd->token.pos); 1201 if (!vp->ptr) 1202 vp->ptr = _make_variable_from_token(pd, pvar); 1203 else 1204 { 1205 Make_Ref(pvar, vp->ptr); 1206 } 1207 goto _after_variable_; 1208 } 1209 1210 1211 case UREFERENCE: /* anonymous variable */ 1212 { 1213 pword *pvar; 1214 /* Anonymous variables are created "in-place" and 1215 * may be moved later by the Move_Pword() macro */ 1216 Make_Term_Wrapper(result, pvar, d_anonymous_, pd->token.pos); 1217 Make_Var(pvar); 1218 } 1219 1220_after_variable_: 1221 Next_Token(pd); 1222 if (IsToken(pd,SOLO,'(') && (pd->sd->options & VAR_FUNCTOR_IS_APPLY)) 1223 { 1224 /* Arguments follow */ 1225 pword *pw; 1226 Build_Struct(&term, pw, d_.apply2, pd->token.pos); 1227 Move_Pword(result, pw+1); 1228 Next_Token(pd); 1229 status = _read_sequence_until(pd, &pw[2], ')'); 1230 Return_If_Error(status); 1231 *result = term; 1232 return _read_after_term(pd, context_prec, context_flags|ZINC_SUBSCRIPTABLE, 0, result); 1233 } 1234 return _read_after_term(pd, context_prec, context_flags|SUBSCRIPTABLE|ATTRIBUTABLE, 0, result); 1235 1236 1237 case SOLO: /* {[( */ 1238 case SPACE_SOLO: /* {( */ 1239 if (IsChar(pd, '[')) 1240 { 1241 Next_Token(pd); 1242 if (IsToken(pd, CLOSING_SOLO, ']')) 1243 { 1244 /* the atom or functor '[]' */ 1245 name = "[]"; length = 2; 1246 Merge_Source_Pos(pos, pd->token.pos, pos); 1247 goto _treat_like_identifier_; /* (name,length,class) */ 1248 } 1249 else 1250 { 1251 /* non-empty list in standard list notation */ 1252 status = _read_list(pd, &term, &pos); 1253 Return_If_Error(status); 1254 } 1255 *result = term; 1256 context_flags |= ZINC_SUBSCRIPTABLE; 1257 } 1258 else if (IsChar(pd, '(')) 1259 { 1260 /* parenthesised subterm */ 1261 Next_Token(pd); 1262 status = _read_next_term(pd, 1200, 0, &term); 1263 Return_If_Error(status); 1264 if (!IsClass(pd, CLOSING_SOLO)) 1265 return UNEXPECTED; 1266 if (!IsChar(pd, ')')) 1267 return UNCLOSBR; 1268 Next_Token(pd); 1269 Move_Pword(&term, result); /* could be a self-ref! */ 1270 context_flags |= ZINC_SUBSCRIPTABLE; 1271 } 1272 else if (IsChar(pd, '{')) 1273 { 1274 Next_Token(pd); 1275 if (IsToken(pd, CLOSING_SOLO, '}')) 1276 { 1277 /* the atom or functor '{}' */ 1278 name = "{}"; length = 2; 1279 Merge_Source_Pos(pos, pd->token.pos, pos); 1280 goto _treat_like_identifier_; /* (name,length,class) */ 1281 } 1282 else 1283 { 1284 /* term in curly brackets: parse as {}/1 structure */ 1285 pword *pw; 1286 Build_Struct(&term, pw, d_.nilcurbr1, pos); 1287 if (pd->sd->options & CURLY_ARGS_AS_LIST) 1288 { 1289 /* {a,b,c} is read as {}([a,b,c]) */ 1290 status = _read_sequence_until(pd, &pw[1], '}'); 1291 } 1292 else 1293 { 1294 /* {a,b,c} is read as {}(','(a,','(b,c))) */ 1295 status = _read_next_term(pd, 1200, 0, &pw[1]); 1296 Return_If_Error(status); 1297 if (!IsClass(pd, CLOSING_SOLO)) 1298 return UNEXPECTED; 1299 if (!IsChar(pd, '}')) 1300 return UNCLOSBR; 1301 Next_Token(pd); 1302 } 1303 } 1304 *result = term; 1305 } 1306 else 1307 { 1308 return UNEXPECTED; 1309 } 1310 return _read_after_term(pd, context_prec, context_flags, 0, result); 1311 1312 1313 case CLOSING_SOLO: /* }]) */ 1314 return UNCLOSBR; 1315 1316 case COMMA: 1317 return UNEXCOMMA; 1318 1319 case EOI: 1320 return ENDOFFILE; 1321 1322 case EOCL: 1323 return ENDOFCLAUSE; 1324 1325 default: 1326 return LexError(pd->token.class) ? pd->token.class : UNEXPECTED; 1327 } 1328} 1329 1330 1331/* 1332 * Parse what can follow a complete term, i.e. 1333 * delimiter, infix, postfix, or possibly subscript. 1334 */ 1335 1336static int 1337_read_after_term(parse_desc *pd, int context_prec, 1338 int context_flags, /* terminators, allowed subscripts */ 1339 int lterm_prec, 1340 pword *result) /* in: lterm, out: result */ 1341{ 1342 int status; 1343 pword term; 1344 dident did0; 1345 opi *in_op; 1346 opi *post_op; 1347 1348 for(;;) /* for removing tail recursion */ 1349 { 1350 switch(pd->token.class) 1351 { 1352 1353 case IDENTIFIER: 1354 case QIDENTIFIER: 1355 did0 = enter_dict_n(TokenString(pd), TokenStringLen(pd), 0); 1356_treat_like_atom_: /* (did0) - caution: may have wrong token in pd */ 1357 in_op = visible_infix_op(did0, pd->module, pd->module_tag, &status); 1358 post_op = visible_postfix_op(did0, pd->module, pd->module_tag, &status); 1359 if (in_op && !(post_op && _delimiter_follows(pd))) 1360 { 1361 /* treat as infix */ 1362 pword *pw; 1363 int lprec, oprec, rprec; 1364 Get_Infix_Prec(in_op, lprec, oprec, rprec); 1365 if (oprec > context_prec) 1366 return PSUCCEED; 1367 if (lterm_prec > lprec) 1368 return context_flags & PREBINFIRST ? PSUCCEED : BRACKET; 1369 /* ISO does not allow operators as arguments of operators */ 1370 if (context_flags & FZINC_SUBSCRIPTABLE && pd->sd->options & ISO_RESTRICTIONS 1371 && DidIsOp(result->val.did) 1372 && visible_operator(result->val.did, pd->module, pd->module_tag)) 1373 return BRACKET; 1374 Build_Struct_Or_List(&term, pw, OpiDid(in_op), pd->token.pos); 1375 /* Use Move_Pword() to move possible self-refs in result 1376 * (because we are going to reuse result!) */ 1377 Move_Pword(result, pw+1); 1378 Next_Token(pd); 1379 context_flags &= ~(SUBSCRIPTABLE|FZINC_SUBSCRIPTABLE|ZINC_SUBSCRIPTABLE); 1380 status = _read_next_term(pd, rprec, context_flags|ARGOFOP, &pw[2]); 1381 Return_If_Error(status); 1382 /*return _read_after_term(pd, context_prec, context_flags, oprec, result);*/ 1383 *result = term; lterm_prec = oprec; 1384 break; /* tail recursion */ 1385 } 1386 else if (post_op) 1387 { 1388 /* treat as postfix */ 1389 pword *pw; 1390 int lprec, oprec; 1391 Get_Postfix_Prec(post_op, lprec, oprec); 1392 if (oprec > context_prec) 1393 return PSUCCEED; 1394 if (lterm_prec > lprec) 1395 return context_flags & PREBINFIRST ? PSUCCEED : BRACKET; 1396 /* ISO does not allow operators as arguments of operators */ 1397 if (context_flags & FZINC_SUBSCRIPTABLE && pd->sd->options & ISO_RESTRICTIONS 1398 && DidIsOp(result->val.did) 1399 && visible_operator(result->val.did, pd->module, pd->module_tag)) 1400 return BRACKET; 1401 Build_Struct(&term, pw, OpiDid(post_op), pd->token.pos); 1402 /* Use Move_Pword() to move possible self-refs in result 1403 * (because we are going to reuse result!) */ 1404 Move_Pword(result, pw+1); 1405 Next_Token(pd); 1406 context_flags &= ~(SUBSCRIPTABLE|FZINC_SUBSCRIPTABLE|ZINC_SUBSCRIPTABLE); 1407 /*return _read_after_term(pd, context_prec, context_flags, oprec, result);*/ 1408 *result = term; lterm_prec = oprec; 1409 break; /* tail recursion */ 1410 } 1411 return context_flags & PREBINFIRST ? PSUCCEED : POSTINF; 1412 1413 1414 case COMMA: 1415 if ((context_flags & COMMA_TERMINATES)) 1416 { 1417 return PSUCCEED; 1418 } 1419 did0 = d_comma0_; 1420 goto _treat_like_atom_; /* (did0) */ 1421 1422 1423 case BAR: 1424 if ((context_flags & BAR_TERMINATES)) 1425 { 1426 return PSUCCEED; 1427 } 1428 /* Prolog compatibility: an (unquoted) bar in infix position is 1429 * treated as if it were a semicolon (for Cprolog/Quintus). 1430 */ 1431 did0 = pd->sd->options & BAR_IS_SEMICOLON ? d_.semi0 : d_bar0_; 1432 goto _treat_like_atom_; /* (did0) */ 1433 1434 1435 case SOLO: 1436 if (IsChar(pd, '[') && !(pd->sd->options & NO_ARRAY_SUBSCRIPTS) 1437 && ((context_flags & SUBSCRIPTABLE) || 1438 (context_flags & FZINC_SUBSCRIPTABLE) && (pd->sd->options & ATOM_SUBSCRIPTS) || 1439 (context_flags & ZINC_SUBSCRIPTABLE) && (pd->sd->options & GENERAL_SUBSCRIPTS))) 1440 { 1441 /* translate Term[Args] into subscript(Term, [Args]) */ 1442 pword *pw; 1443 Build_Struct(&term, pw, d_.subscript, pd->token.pos); 1444 Move_Pword(result, pw+1); 1445 Next_Token(pd); 1446 status = _read_sequence_until(pd, &pw[2], ']'); 1447 Return_If_Error(status); 1448 context_flags &= ~(SUBSCRIPTABLE|FZINC_SUBSCRIPTABLE); 1449 /*return _read_after_term(pd, context_prec, context_flags, 0, result);*/ 1450 *result = term; lterm_prec = 0; 1451 break; /* tail recursion */ 1452 } 1453 else if (IsChar(pd, '{') && !(pd->sd->options & NO_ATTRIBUTES) 1454 && (context_flags & ATTRIBUTABLE)) 1455 { 1456 /* Attribute follows */ 1457 pword *pw; 1458 Build_Struct(&term, pw, d_.with_attributes2, pd->token.pos); 1459 Move_Pword(result, pw+1); 1460 Next_Token(pd); 1461 status = _read_sequence_until(pd, &pw[2], '}'); 1462 Return_If_Error(status); 1463 /*return _read_after_term(pd, context_prec, context_flags, 0, result);*/ 1464 *result = term; lterm_prec = 0; 1465 break; /* tail recursion */ 1466 } 1467 /* fall through */ 1468 case SPACE_SOLO: 1469 if (IsChar(pd, '[')) { 1470 source_pos_t pos = pd->token.pos; 1471 Lookahead_Next_Token(pd); 1472 if (IsToken(pd, CLOSING_SOLO, ']')) 1473 { 1474 did0 = d_.nil; /* the atom or functor '[]' */ 1475 Merge_Source_Pos(pos, pd->token.pos, pd->token.pos); 1476 Make_Ident_Token(pd, "[]", 2); 1477 goto _treat_like_atom_; /* (pd,did0) */ 1478 } 1479 Prev_Token(pd); 1480 } else if (IsChar(pd, '{')) { 1481 source_pos_t pos = pd->token.pos; 1482 Lookahead_Next_Token(pd); 1483 if (IsToken(pd, CLOSING_SOLO, '}')) 1484 { 1485 did0 = d_.nilcurbr; /* the atom or functor '{}' */ 1486 Merge_Source_Pos(pos, pd->token.pos, pd->token.pos); 1487 Make_Ident_Token(pd, "{}", 2); 1488 goto _treat_like_atom_; /* (pd,did0) */ 1489 } 1490 Prev_Token(pd); 1491 } 1492 return context_flags & PREBINFIRST ? PSUCCEED : UNEXPECTED; 1493 1494 case EOI: 1495 case EOCL: 1496 case CLOSING_SOLO: 1497 return PSUCCEED; 1498 1499 default: 1500 return LexError(pd->token.class) ? pd->token.class : context_flags & PREBINFIRST ? PSUCCEED : UNEXPECTED; 1501 } 1502 } 1503} 1504 1505 1506/* 1507 * The toplevel parser procedure. It reads one term from the given stream 1508 * and makes sure it is properly terminated. 1509 */ 1510 1511int 1512ec_read_term( 1513 stream_id nst, /* the stream to read from */ 1514 int options, /* options (VARNAMES_PLEASE etc) */ 1515 pword *result, /* where to store the parsed term */ 1516 pword *varlist, /* where to store the var list (or NULL) */ 1517 int *has_macro, /* flag that the term may contain (clause or 1518 * goal) macros that need to be expanded */ 1519 value vm, type tm /* context module */ 1520 ) 1521{ 1522 int status; 1523 parse_desc *pd; 1524 pword *old_tg = TG; 1525 pword *pw; 1526 1527 if (StreamMode(nst) & REPROMPT_ONLY) 1528 StreamMode(nst) |= DONT_PROMPT; 1529 1530 pd = _alloc_parse_env(options, nst, vm.did, tm); 1531 pd->var_list_tail = varlist; 1532 1533 Next_Token(pd); 1534 switch(pd->token.class) 1535 { 1536 case EOI: 1537 if (StreamMode(pd->nst) & MEOF) 1538 { 1539 status = IsSoftEofStream(pd->nst) ? PEOF : READ_PAST_EOF; 1540 goto _return_error_; 1541 } 1542 StreamMode(pd->nst) |= MEOF; 1543 status = PEOF; 1544 goto _return_error_; 1545 1546 default: 1547 status = _read_next_term(pd, 1200, 0, result); 1548 if (status != PSUCCEED) 1549 goto _return_error_; 1550 switch(pd->token.class) 1551 { 1552 case EOI: 1553 if (pd->sd->options & ISO_RESTRICTIONS) 1554 { 1555 status = ENDOFFILE; 1556 goto _return_error_; 1557 } 1558 break; 1559 case EOCL: 1560 break; 1561 default: 1562 status = UNEXPECTED; 1563 goto _return_error_; 1564 } 1565 break; 1566 } 1567 1568 if (pd->var_list_tail) 1569 { 1570 Make_Nil(pd->var_list_tail); 1571 } 1572 1573 /* expand (read) macros if there were any (and expansion is not disabled) */ 1574 if (pd->macro && (GlobalFlags & MACROEXP) && !(StreamMode(pd->nst) & SNOMACROEXP) 1575 && !(options & LAYOUT_PLEASE)) 1576 { 1577 pw = result; 1578 Dereference_(pw); 1579 if (!(IsRef(pw->tag) && pw == result)) 1580 { 1581 pw = TG; 1582 Push_Struct_Frame(in_dict("expand_macros_",3)); 1583 pw[1] = *result; 1584 Make_Var(&pw[2]); 1585 pw[3].val.all = vm.all; 1586 pw[3].tag.all = tm.all; 1587 status = do_trafo(pw); 1588 Return_If_Error(status); 1589 *result = pw[2]; 1590 } 1591 } 1592 if (has_macro) 1593 *has_macro = pd->macro; 1594 1595 return PSUCCEED; 1596 1597_return_error_: 1598 TG = old_tg; /* pop (possibly incomplete) constructed term */ 1599 return status; 1600} 1601 1602 1603 1604/*********************** THE PARSER RELATED BUILTINS ********************/ 1605 1606void 1607read_init(int flags) 1608{ 1609 1610 d_comma0_ = in_dict(",", 0); 1611 d_bar0_ = in_dict("|", 0); 1612 d_annotated_term_ = in_dict("annotated_term", TERM_ARITY); 1613 d_anonymous_ = in_dict("anonymous", 0); 1614 no_pos_.file = d_.empty; 1615 1616 if (!(flags & INIT_SHARED)) 1617 return; 1618 1619 exported_built_in(in_dict("read_", 2), p_read2, B_UNSAFE|U_FRESH) 1620 -> mode = BoundArg(1, NONVAR); 1621 exported_built_in(in_dict("read_", 3), p_read3, B_UNSAFE|U_FRESH) 1622 -> mode = BoundArg(2, NONVAR); 1623 exported_built_in(in_dict("readvar", 4), p_readvar, B_UNSAFE|U_FRESH) 1624 -> mode = BoundArg(2, NONVAR) | BoundArg(3, NONVAR); 1625 exported_built_in(in_dict("read_annotated_raw", 4), p_read_annotated_raw, B_UNSAFE|U_FRESH) 1626 -> mode = BoundArg(2, NONVAR) | BoundArg(3, CONSTANT); 1627} 1628 1629 1630 1631/* 1632 * read_(Term, Module) 1633 * reads a term from the current input 1634*/ 1635static int 1636p_read2(value v, type t, value vm, type tm) 1637{ 1638 int status; 1639 1640 Check_Module(tm, vm); 1641 status = _pread3(v, t, current_input_, vm, tm); 1642 if (status < 0) 1643 { 1644 Bip_Error(status) 1645 } 1646 return (status); 1647} 1648 1649/* 1650 * read_(Stream, Term, Module) 1651 * reads a termfrom the current input and unifies it with its argument. 1652 * The unification/dereferencing is done by the emulator on Request_unify 1653*/ 1654int 1655p_read3(value vs, type ts, value v, type t, value vm, type tm) 1656{ 1657 int status; 1658 stream_id nst = get_stream_id(vs, ts, SREAD, &status); 1659 1660 Check_Module(tm, vm); 1661 if (nst == NO_STREAM) 1662 { 1663 Bip_Error(status) 1664 } 1665 if(!(IsReadStream(nst))) 1666 { 1667 Bip_Error(STREAM_MODE); 1668 } 1669 1670 status = _pread3(v, t, nst, vm, tm); 1671 if (status < 0) 1672 { 1673 Bip_Error(status) 1674 } 1675 return (status); 1676} 1677 1678 1679static int 1680_pread3(value v, type t, stream_id nst, value vm, type tm) 1681{ 1682 int status; 1683 pword *pw; 1684 pword result; /* be careful not to pass this pword to Prolog, 1685 * e.g. when calling a macro transformation 1686 * (cf. bug #560), or when returning (see below). 1687 */ 1688 status = ec_read_term(nst, 1689 (GlobalFlags & VARIABLE_NAMES ? VARNAMES_PLEASE : 0), 1690 &result, 0, 0, vm, tm); 1691 1692 if (status != PSUCCEED) 1693 return (status); 1694 1695 pw = &result; 1696 Dereference_(pw); 1697 if (IsRef(pw->tag) && pw == &result) 1698 { 1699 Succeed_; /* a free variable */ 1700 } 1701 Return_Unify_Pw(v, t, pw->val, pw->tag) 1702} 1703 1704 1705/* 1706 * readvar(Stream, Term, ListVar, Module) 1707 * reads a term from the current input, unifies with with the 1708 * first argument, unifies the second argument with the list of doublets 1709 * [namevar|adrvar]. 1710*/ 1711static int 1712p_readvar(value vs, type ts, value v, type t, value vv, type tv, value vm, type tm) 1713{ 1714 pword *pw; 1715 pword result; 1716 pword vars; 1717 int status; 1718 stream_id nst = get_stream_id(vs, ts, SREAD, &status); 1719 Prepare_Requests 1720 1721 if (nst == NO_STREAM) 1722 { 1723 Bip_Error(status) 1724 } 1725 1726 Check_Ref(tv); 1727 Check_Module(tm, vm); 1728 if(!(IsReadStream(nst))) 1729 { 1730 Bip_Error(STREAM_MODE); 1731 } 1732 1733 status = ec_read_term(nst, 1734 (GlobalFlags & VARIABLE_NAMES ? VARNAMES_PLEASE : 0), 1735 &result, &vars, 0, vm, tm); 1736 1737 if (status != PSUCCEED) 1738 { 1739 Bip_Error(status); 1740 } 1741 1742 Request_Unify_Pw(vv, tv, vars.val, vars.tag); 1743 1744 pw = &result; 1745 Dereference_(pw); 1746 if (!(IsRef(pw->tag) && pw == &result)) 1747 { 1748 Request_Unify_Pw(v, t, pw->val, pw->tag); 1749 } 1750 Return_Unify; 1751} 1752 1753 1754static int 1755p_read_annotated_raw(value vs, type ts, value v, type t, value vf, type tf, value vm, type tm) 1756{ 1757 pword *pw; 1758 pword result; 1759 int status; 1760 int has_macro = 0; 1761 stream_id nst = get_stream_id(vs, ts, SREAD, &status); 1762 Prepare_Requests 1763 1764 if (nst == NO_STREAM) 1765 { 1766 Bip_Error(status) 1767 } 1768 1769 Check_Module(tm, vm); 1770 if(!(IsReadStream(nst))) 1771 { 1772 Bip_Error(STREAM_MODE); 1773 } 1774 1775 status = ec_read_term(nst, LAYOUT_PLEASE | 1776 (GlobalFlags & VARIABLE_NAMES ? VARNAMES_PLEASE : 0), 1777 &result, 0, &has_macro, vm, tm); 1778 1779 if (status != PSUCCEED) 1780 { 1781 Bip_Error(status); 1782 } 1783 1784 /* return flag indicating request for macro expansion */ 1785 if (!(GlobalFlags & MACROEXP) || (StreamMode(nst) & SNOMACROEXP)) 1786 has_macro = 0; 1787 Request_Unify_Integer(vf, tf, has_macro) 1788 1789 pw = &result; 1790 Dereference_(pw); 1791 if (!(IsRef(pw->tag) && pw == &result)) 1792 { 1793 Request_Unify_Pw(v, t, pw->val, pw->tag) 1794 } 1795 Return_Unify 1796} 1797 1798 1799 1800/*********************** PREPARING A PARSER CALL ************************/ 1801 1802/* 1803 * Allocate and initialise a parsing environment 1804 * 1805 * contents of the parsing environment: see type declaration 1806 * 1807 * Remaining Problem: when a read is aborted via an interrupt, the parsing 1808 * environment is not freed. 1809 */ 1810 1811static parse_desc * 1812_alloc_parse_env(int options, stream_id nst, dident module, type mod_tag) 1813{ 1814 register parse_desc *pd = (parse_desc *) PARSENV; 1815 1816 if (pd) /* reinit the existing parser environment */ 1817 { 1818 if (NUMBER_VAR != pd->var_table_size) /* table size changed */ 1819 { 1820 hp_free_size((generic_ptr) pd->var_table, pd->var_table_size*sizeof(vword)); 1821 pd->var_table_size = NUMBER_VAR; 1822 pd->var_table = (vword *) hp_alloc_size((int)NUMBER_VAR * sizeof(vword)); 1823 pd->counter = 0; 1824 } 1825 Temp_Reset(pd->string_store); 1826 } 1827 else /* allocate a new parsing environment */ 1828 { 1829 pd = (parse_desc *) hp_alloc_size(sizeof(parse_desc)); 1830 pd->var_table_size = NUMBER_VAR; 1831 pd->var_table = (vword *) hp_alloc_size((int)NUMBER_VAR * sizeof(vword)); 1832 pd->counter = 0; 1833 Temp_Create(pd->string_store, 1024); 1834 PARSENV = (void_ptr) pd; /* store it globally */ 1835 } 1836 1837 pd->nst = nst; 1838 pd->sd = ModuleSyntax(module); 1839 pd->module = module; 1840 pd->module_tag = mod_tag; 1841 pd->token.class = pd->prev_token.class = pd->next_token.class = NO_TOKEN; 1842 pd->macro = 0; 1843 pd->options = options; 1844 pd->max_arg_prec = (pd->sd->options & LIMIT_ARG_PRECEDENCE) ? 999 : 1200; 1845 1846 if (pd->counter++ == 0) /* (re)init the hash table */ 1847 { 1848 register vword *v = pd->var_table; 1849 register vword *last = v + NUMBER_VAR; 1850 while (v < last) 1851 (v++)->lock = 0; 1852 } 1853 return pd; 1854} 1855 1856 1857int 1858destroy_parser_env(void) /* called when exiting emulators */ 1859{ 1860 register parse_desc *pd = (parse_desc *) PARSENV; 1861 1862 if (pd) /* deallocate the parsing environment */ 1863 { 1864 hp_free_size((generic_ptr) pd->var_table, pd->var_table_size*sizeof(vword)); 1865 Temp_Destroy(pd->string_store); 1866 hp_free_size((generic_ptr) pd, sizeof(parse_desc)); 1867 PARSENV = (void_ptr) 0; 1868 } 1869 return 0; 1870} 1871 1872 1873/************************ MACRO TRANSFORMATION SUPPORT **************************/ 1874 1875/* 1876 * Run transformation goal, catch aborts, 1877 * turn numeric exit_block tags into negative error return code. 1878 * Returns PSUCCEED, PFAIL or error code 1879*/ 1880int 1881do_trafo(pword *term) /* goal to call */ 1882{ 1883 pword saved_a1; 1884 int res; 1885 value v1; 1886 value v2; 1887 type t2; 1888 1889 v1.ptr = term; 1890 v2.did = d_.kernel_sepia; 1891 t2.kernel = ModuleTag(d_.kernel_sepia); 1892 /* hack to preserve A[1] in case it gets overwritten by exit_block/1 */ 1893 saved_a1 = A[1]; 1894 res = sub_emulc_noexit(v1, tcomp, v2, t2); 1895 if (res == PTHROW) 1896 { 1897 pword ball = A[1]; 1898 A[1] = saved_a1; 1899 if (IsInteger(ball.tag) && ball.val.nint > 0) 1900 res = (int) -ball.val.nint; 1901 else 1902 res = TRANS_ERROR; 1903 } 1904 return res; 1905} 1906 1907/* 1908 * Create a transformation goal for the functor tr_did. Return 0 if 1909 * no transformation possible/necessary. 1910 */ 1911pword * 1912trafo_term(dident tr_did, /* the functor of the term to transform */ 1913 int flags, /* conditions for the macro */ 1914 dident mv, /* current module */ 1915 type mt, /* its tag */ 1916 int *tr_flags) /* flags of the macro */ 1917{ 1918 pword *pw; 1919 pword *prop; 1920 macro_desc *md; 1921 int err; 1922 int propid; 1923 1924 /* for input goal and clause macros we don't build the goal */ 1925 if ((flags & TR_GOAL) && !(flags & TR_WRITE)) 1926 { 1927 *tr_flags = TR_GOAL; 1928 return 0; 1929 } 1930 1931 if (flags & TR_CLAUSE) 1932 propid = CLAUSE_TRANS_PROP; 1933 else if (flags & TR_GOAL) 1934 propid = GOAL_TRANS_PROP; 1935 else 1936 propid = TRANS_PROP; 1937 if (flags & TR_WRITE) 1938 propid++; 1939 prop = get_modular_property(tr_did, propid, mv, mt, VISIBLE_PROP, &err); 1940 if (!prop) { 1941 *tr_flags = 0; 1942 return 0; 1943 } 1944 1945 md = (macro_desc *) prop->val.ptr; 1946 *tr_flags = md->flags; 1947 /* check if the type is ok */ 1948 if ((md->flags & flags) != (md->flags & TR_TYPE)) 1949 return 0; 1950 1951 /* create a goal of the form: 1952 * trans_term( <trans>(In,Out{,AnnIn,AnnOut}{,CurModule}), TrModule ) or 1953 * AnnIn,AnnOut are always uninstantiated here 1954 */ 1955 pw = Gbl_Tg; 1956 Gbl_Tg += DidArity(md->trans_function) + 4; 1957 Check_Gc; 1958 pw->tag.all = TDICT; 1959 pw->val.did = d_.trans_term; 1960 (pw+1)->tag.kernel = TCOMP; 1961 (pw+1)->val.ptr = pw+3; 1962 (pw+2)->tag.kernel = ModuleTag(tr_did); 1963 (pw+2)->val.did = md->module; 1964 (pw+3)->tag.kernel = TDICT; 1965 (pw+3)->val.did = md->trans_function; 1966 1967 (pw+5)->tag.kernel = TREF; 1968 (pw+5)->val.ptr = (pw+5); 1969 switch (DidArity(md->trans_function)) 1970 { 1971 case 2: /* <trans>(In, Out) */ 1972 break; 1973 case 3: /* <trans>(In,Out,CurModule) */ 1974 (pw+6)->tag.all = mt.all; 1975 (pw+6)->val.did = mv; 1976 break; 1977 case 5: /* <trans>(In,Out,AnnIn,AnnOut,CurModule) */ 1978 (pw+8)->tag.all = mt.all; 1979 (pw+8)->val.did = mv; 1980 /* falls through */ 1981 case 4: /* <trans>(In,Out,AnnIn,AnnOut) */ 1982 (pw+6)->tag.kernel = TREF; 1983 (pw+6)->val.ptr = (pw+6); 1984 (pw+7)->tag.kernel = TREF; 1985 (pw+7)->val.ptr = (pw+7); 1986 break; 1987 default: 1988 /* incorrect arity for <trans> */ 1989 Gbl_Tg = Gbl_Tg - DidArity(md->trans_function) - 4; 1990 return 0; 1991 } 1992 1993 return pw; 1994} 1995 1996 1997/* 1998 * Transform the metaterm attribute into the internal form. 1999 */ 2000pword * 2001transf_meta_in(pword *pw, dident mod, int *err) 2002{ 2003 int arity = p_meta_arity_->val.nint; 2004 int i; 2005 register pword *r; 2006 2007 r = TG; 2008 TG += 1 + arity; 2009 Check_Gc; 2010 r[0].val.did = in_dict("meta", arity); 2011 r[0].tag.kernel = TDICT; 2012 for (i = 1; i <= arity; i++) { 2013 r[i].val.ptr = r + i; 2014 r[i].tag.kernel = TREF; 2015 } 2016 i = meta_index(mod); 2017 i = _transf_attribute(pw, r, i); 2018 if (i != PSUCCEED) { 2019 *err = i; 2020 return 0; 2021 } else 2022 return r; 2023} 2024 2025static int 2026_transf_attribute(register pword *pw, pword *r, int def) 2027{ 2028 int res; 2029 pword *s; 2030 2031 Dereference_(pw); 2032 if (IsStructure(pw->tag)) 2033 { 2034 s = pw->val.ptr; 2035 if (s->val.did == d_.comma) { 2036 if ((res = _transf_attribute(s + 1, r, def)) < 0) 2037 return res; 2038 return _transf_attribute(s + 2, r, def); 2039 } else if (s->val.did == d_.colon) { 2040 pw = s + 1; 2041 Dereference_(pw); 2042 if (IsAtom(pw->tag)) { 2043 def = meta_index(pw->val.did); 2044 pw = s + 2; 2045 } else if (IsRef(pw->tag)) 2046 return INSTANTIATION_FAULT; 2047 else 2048 return TYPE_ERROR; 2049 } 2050 } 2051 if (!def) 2052 return UNDEF_ATTR; 2053 if (!(IsVar(r[def].tag) && r[def].val.ptr == r + def)) 2054 return TYPE_ERROR; 2055 r[def].val.ptr = pw->val.ptr; 2056 r[def].tag.kernel = pw->tag.kernel; 2057 return PSUCCEED; 2058} 2059 2060 2061/* 2062 * Transform the metaterm attribute into the external format. 2063 * Note that the caller has to allocate sufficient memory for 2064 * the constructed term (ATTR_IO_TERM_SIZE pwords at top). 2065 * The function returns the end of the memory actually used. 2066 */ 2067pword * 2068transf_meta_out(value val, /* attribute term to transform */ 2069 type tag, 2070 pword *top, /* where to build the the resulting term */ 2071 dident mod, /* context module (or D_UNKNOWN) */ 2072 pword *presult) /* where to store the result */ 2073{ 2074 /* by default, return the untransformed term */ 2075 presult->val.all = val.all; 2076 presult->tag.all = tag.all; 2077 2078 /* transform only if we have a proper meta/N structure */ 2079 if (IsStructure(tag) && check_did(val.ptr->val.did,0) == d_.meta0) 2080 { 2081 int i, first = 1; 2082 2083 for (i = DidArity(val.ptr->val.did); i > 0; --i) 2084 { 2085 dident wd = meta_name(i); 2086 2087 if (wd != D_UNKNOWN) 2088 { 2089 pword attr; 2090 if (wd == mod) { 2091 attr = val.ptr[i]; /* don't module-qualify */ 2092 } else { 2093 pword *pw = top; /* construct name:AttrI */ 2094 top += 3; 2095 Make_Struct(&attr, pw); 2096 Make_Atom(&pw[0], d_.colon); /* should be Make_Functor() */ 2097 Make_Atom(&pw[1], wd); 2098 pw[2] = val.ptr[i]; 2099 } 2100 if (first) { 2101 *presult = attr; /* the only attribute so far*/ 2102 first = 0; 2103 } else { 2104 pword *pw = top; /* construct QAttrI,Others */ 2105 top += 3; 2106 Make_Atom(&pw[0], d_.comma); /* should be Make_Functor() */ 2107 pw[1] = attr; 2108 pw[2] = *presult; 2109 Make_Struct(presult, pw); 2110 } 2111 } 2112 } 2113 } 2114 return top; 2115} 2116 2117 2118/********************* VARIABLE NAME HASHING *******************************/ 2119 2120static uword 2121hashfunction(char *id) 2122{ 2123 register uword hash; 2124 register int length, shift, ival; 2125 register char *str; 2126 2127 length = 0; 2128 hash = 0; 2129 for (str = id; *str; str++) 2130 { 2131 ival = *str & 0x000000FF; /* get rid of sign extension */ 2132 shift = length + 4 * (length & 3); /* add 0, 4, 8 or 12 */ 2133 shift &= 0x0000000F; /* keep important bits */ 2134 hash ^= (ival << (shift) | ival >> (16 - shift)); 2135 hash &= 0x0000FFFF; 2136 length++; 2137 } 2138 2139 return(hash); 2140} 2141 2142static vword * 2143_alloc_vword(register parse_desc *pd) 2144{ 2145 Temp_Align(pd->string_store, sizeof(int *)); 2146 return (vword *) TempAlloc(pd->string_store, sizeof(vword)); 2147} 2148 2149static vword * 2150_var_table_entry(parse_desc *pd, char *varname, word length) 2151{ 2152 vword *p, *q; 2153 p = &pd->var_table[hashfunction(varname) % NUMBER_VAR]; 2154 2155 if (p->lock == pd->counter) /* there is a table entry */ 2156 { 2157 while (p && strcmp(p->str, varname)) { /* search the chain */ 2158 q = p; 2159 p = p->next; 2160 } 2161 if (p) 2162 { 2163 return p; 2164 } 2165 q->next = p = _alloc_vword(pd); 2166 } 2167 2168 /* it is a new variable, copy the string and make a table entry */ 2169 p->str = TempAlloc(pd->string_store, length+1); 2170 Copy_Bytes(p->str, varname, length+1); 2171 p->lock = pd->counter; 2172 p->next = 0; 2173 p->ptr = 0; 2174 return p; 2175} 2176