1;;; ebnf-bnf.el --- parser for EBNF 2 3;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 4;; Free Software Foundation, Inc. 5 6;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br> 7;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br> 8;; Keywords: wp, ebnf, PostScript 9;; Version: 1.9 10 11;; This file is part of GNU Emacs. 12 13;; GNU Emacs is free software; you can redistribute it and/or modify 14;; it under the terms of the GNU General Public License as published by 15;; the Free Software Foundation; either version 2, or (at your option) 16;; any later version. 17 18;; GNU Emacs is distributed in the hope that it will be useful, 19;; but WITHOUT ANY WARRANTY; without even the implied warranty of 20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21;; GNU General Public License for more details. 22 23;; You should have received a copy of the GNU General Public License 24;; along with GNU Emacs; see the file COPYING. If not, write to the 25;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 26;; Boston, MA 02110-1301, USA. 27 28;;; Commentary: 29 30;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 31;; 32;; 33;; This is part of ebnf2ps package. 34;; 35;; This package defines a parser for EBNF. 36;; 37;; See ebnf2ps.el for documentation. 38;; 39;; 40;; EBNF Syntax 41;; ----------- 42;; 43;; The current EBNF that ebnf2ps accepts has the following constructions: 44;; 45;; ; comment (until end of line) 46;; A non-terminal 47;; "C" terminal 48;; ?C? special 49;; $A default non-terminal 50;; $"C" default terminal 51;; $?C? default special 52;; A = B. production (A is the header and B the body) 53;; C D sequence (C occurs before D) 54;; C | D alternative (C or D occurs) 55;; A - B exception (A excluding B, B without any non-terminal) 56;; n * A repetition (A repeats at least n (integer) times) 57;; n * n A repetition (A repeats exactly n (integer) times) 58;; n * m A repetition (A repeats at least n (integer) and at most 59;; m (integer) times) 60;; (C) group (expression C is grouped together) 61;; [C] optional (C may or not occurs) 62;; C+ one or more occurrences of C 63;; {C}+ one or more occurrences of C 64;; {C}* zero or more occurrences of C 65;; {C} zero or more occurrences of C 66;; C / D equivalent to: C {D C}* 67;; {C || D}+ equivalent to: C {D C}* 68;; {C || D}* equivalent to: [C {D C}*] 69;; {C || D} equivalent to: [C {D C}*] 70;; 71;; The EBNF syntax written using the notation above is: 72;; 73;; EBNF = {production}+. 74;; 75;; production = non_terminal "=" body ".". ;; production 76;; 77;; body = {sequence || "|"}*. ;; alternative 78;; 79;; sequence = {exception}*. ;; sequence 80;; 81;; exception = repeat [ "-" repeat]. ;; exception 82;; 83;; repeat = [ integer "*" [ integer ]] term. ;; repetition 84;; 85;; term = factor 86;; | [factor] "+" ;; one-or-more 87;; | [factor] "/" [factor] ;; one-or-more 88;; . 89;; 90;; factor = [ "$" ] "\"" terminal "\"" ;; terminal 91;; | [ "$" ] non_terminal ;; non-terminal 92;; | [ "$" ] "?" special "?" ;; special 93;; | "(" body ")" ;; group 94;; | "[" body "]" ;; zero-or-one 95;; | "{" body [ "||" body ] "}+" ;; one-or-more 96;; | "{" body [ "||" body ] "}*" ;; zero-or-more 97;; | "{" body [ "||" body ] "}" ;; zero-or-more 98;; . 99;; 100;; non_terminal = "[!#%&'*-,0-:<>@-Z\\\\^-z~\\240-\\377]+". 101;; ;; that is, a valid non_terminal accepts decimal digits, letters (upper 102;; ;; and lower), 8-bit accentuated characters, 103;; ;; "!", "#", "%", "&", "'", "*", "+", ",", ":", 104;; ;; "<", ">", "@", "\", "^", "_", "`" and "~". 105;; 106;; terminal = "\\([^\"\\]\\|\\\\[ -~\\240-\\377]\\)+". 107;; ;; that is, a valid terminal accepts any printable character (including 108;; ;; 8-bit accentuated characters) except `"', as `"' is used to delimit a 109;; ;; terminal. Also, accepts escaped characters, that is, a character 110;; ;; pair starting with `\' followed by a printable character, for 111;; ;; example: \", \\. 112;; 113;; special = "[^?\\000-\\010\\012-\\037\\177-\\237]*". 114;; ;; that is, a valid special accepts any printable character (including 115;; ;; 8-bit accentuated characters) and tabs except `?', as `?' is used to 116;; ;; delimit a special. 117;; 118;; integer = "[0-9]+". 119;; ;; that is, an integer is a sequence of one or more decimal digits. 120;; 121;; comment = ";" "[^\\n\\000-\\010\\016-\\037\\177-\\237]*" "\\n". 122;; ;; that is, a comment starts with the character `;' and terminates at end 123;; ;; of line. Also, it only accepts printable characters (including 8-bit 124;; ;; accentuated characters) and tabs. 125;; 126;; 127;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 128 129;;; Code: 130 131 132(require 'ebnf-otz) 133 134 135(defvar ebnf-bnf-lex nil 136 "Value returned by `ebnf-bnf-lex' function.") 137 138 139;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 140;; Syntactic analyzer 141 142 143;;; EBNF = {production}+. 144 145(defun ebnf-bnf-parser (start) 146 "EBNF parser." 147 (let ((total (+ (- ebnf-limit start) 1)) 148 (bias (1- start)) 149 (origin (point)) 150 prod-list token rule) 151 (goto-char start) 152 (setq token (ebnf-bnf-lex)) 153 (and (eq token 'end-of-input) 154 (error "Invalid EBNF file format")) 155 (while (not (eq token 'end-of-input)) 156 (ebnf-message-float 157 "Parsing...%s%%" 158 (/ (* (- (point) bias) 100.0) total)) 159 (setq token (ebnf-production token) 160 rule (cdr token) 161 token (car token)) 162 (or (ebnf-add-empty-rule-list rule) 163 (setq prod-list (cons rule prod-list)))) 164 (goto-char origin) 165 prod-list)) 166 167 168;;; production = non-terminal "=" body ".". 169 170(defun ebnf-production (token) 171 (let ((header ebnf-bnf-lex) 172 (action ebnf-action) 173 body) 174 (setq ebnf-action nil) 175 (or (eq token 'non-terminal) 176 (error "Invalid header production")) 177 (or (eq (ebnf-bnf-lex) 'equal) 178 (error "Invalid production: missing `='")) 179 (setq body (ebnf-body)) 180 (or (eq (car body) 'period) 181 (error "Invalid production: missing `.'")) 182 (setq body (cdr body)) 183 (ebnf-eps-add-production header) 184 (cons (ebnf-bnf-lex) 185 (ebnf-make-production header body action)))) 186 187 188;;; body = {sequence || "|"}*. 189 190(defun ebnf-body () 191 (let (body sequence) 192 (while (eq (car (setq sequence (ebnf-sequence))) 'alternative) 193 (setq sequence (cdr sequence) 194 body (cons sequence body))) 195 (ebnf-token-alternative body sequence))) 196 197 198;;; sequence = {exception}*. 199 200(defun ebnf-sequence () 201 (let ((token (ebnf-bnf-lex)) 202 seq term) 203 (while (setq term (ebnf-exception token) 204 token (car term) 205 term (cdr term)) 206 (setq seq (cons term seq))) 207 (cons token 208 (ebnf-token-sequence seq)))) 209 210 211;;; exception = repeat [ "-" repeat]. 212 213(defun ebnf-exception (token) 214 (let ((term (ebnf-repeat token))) 215 (if (not (eq (car term) 'except)) 216 ;; repeat 217 term 218 ;; repeat - repeat 219 (let ((exception (ebnf-repeat (ebnf-bnf-lex)))) 220 (ebnf-no-non-terminal (cdr exception)) 221 (ebnf-token-except (cdr term) exception))))) 222 223 224(defun ebnf-no-non-terminal (node) 225 (and (vectorp node) 226 (let ((kind (ebnf-node-kind node))) 227 (cond 228 ((eq kind 'ebnf-generate-non-terminal) 229 (error "Exception sequence should not contain a non-terminal")) 230 ((eq kind 'ebnf-generate-repeat) 231 (ebnf-no-non-terminal (ebnf-node-separator node))) 232 ((memq kind '(ebnf-generate-optional ebnf-generate-except)) 233 (ebnf-no-non-terminal (ebnf-node-list node))) 234 ((memq kind '(ebnf-generate-one-or-more ebnf-generate-zero-or-more)) 235 (ebnf-no-non-terminal (ebnf-node-list node)) 236 (ebnf-no-non-terminal (ebnf-node-separator node))) 237 ((memq kind '(ebnf-generate-alternative ebnf-generate-sequence)) 238 (let ((seq (ebnf-node-list node))) 239 (while seq 240 (ebnf-no-non-terminal (car seq)) 241 (setq seq (cdr seq))))) 242 )))) 243 244 245;;; repeat = [ integer "*" [ integer ]] term. 246 247(defun ebnf-repeat (token) 248 (if (not (eq token 'integer)) 249 (ebnf-term token) 250 (let ((times ebnf-bnf-lex) 251 upper) 252 (or (eq (ebnf-bnf-lex) 'repeat) 253 (error "Missing `*'")) 254 (setq token (ebnf-bnf-lex)) 255 (when (eq token 'integer) 256 (setq upper ebnf-bnf-lex 257 token (ebnf-bnf-lex))) 258 (ebnf-token-repeat times (ebnf-term token) upper)))) 259 260 261;;; term = factor 262;;; | [factor] "+" ;; one-or-more 263;;; | [factor] "/" [factor] ;; one-or-more 264;;; . 265 266(defun ebnf-term (token) 267 (let ((factor (ebnf-factor token))) 268 (and factor 269 (setq token (ebnf-bnf-lex))) 270 (cond 271 ;; [factor] + 272 ((eq token 'one-or-more) 273 (cons (ebnf-bnf-lex) 274 (and factor 275 (let ((kind (ebnf-node-kind factor))) 276 (cond 277 ;; { A }+ + ==> { A }+ 278 ;; { A }* + ==> { A }* 279 ((memq kind '(ebnf-generate-zero-or-more 280 ebnf-generate-one-or-more)) 281 factor) 282 ;; [ A ] + ==> { A }* 283 ((eq kind 'ebnf-generate-optional) 284 (ebnf-make-zero-or-more (list factor))) 285 ;; A + 286 (t 287 (ebnf-make-one-or-more (list factor))) 288 ))))) 289 ;; [factor] / [factor] 290 ((eq token 'list) 291 (setq token (ebnf-bnf-lex)) 292 (let ((sep (ebnf-factor token))) 293 (and sep 294 (setq factor (or factor (ebnf-make-empty)))) 295 (cons (if sep 296 (ebnf-bnf-lex) 297 token) 298 (and factor 299 (ebnf-make-one-or-more factor sep))))) 300 ;; factor 301 (t 302 (cons token factor)) 303 ))) 304 305 306;;; factor = [ "$" ] "\"" terminal "\"" ;; terminal 307;;; | [ "$" ] non_terminal ;; non-terminal 308;;; | [ "$" ] "?" special "?" ;; special 309;;; | "(" body ")" ;; group 310;;; | "[" body "]" ;; zero-or-one 311;;; | "{" body [ "||" body ] "}+" ;; one-or-more 312;;; | "{" body [ "||" body ] "}*" ;; zero-or-more 313;;; | "{" body [ "||" body ] "}" ;; zero-or-more 314;;; . 315 316(defun ebnf-factor (token) 317 (cond 318 ;; terminal 319 ((eq token 'terminal) 320 (ebnf-make-terminal ebnf-bnf-lex)) 321 ;; non-terminal 322 ((eq token 'non-terminal) 323 (ebnf-make-non-terminal ebnf-bnf-lex)) 324 ;; special 325 ((eq token 'special) 326 (ebnf-make-special ebnf-bnf-lex)) 327 ;; group 328 ((eq token 'begin-group) 329 (let ((body (ebnf-body))) 330 (or (eq (car body) 'end-group) 331 (error "Missing `)'")) 332 (cdr body))) 333 ;; optional 334 ((eq token 'begin-optional) 335 (let ((body (ebnf-body))) 336 (or (eq (car body) 'end-optional) 337 (error "Missing `]'")) 338 (ebnf-token-optional (cdr body)))) 339 ;; list 340 ((eq token 'begin-list) 341 (let* ((body (ebnf-body)) 342 (token (car body)) 343 (list-part (cdr body)) 344 sep-part) 345 (and (eq token 'list-separator) 346 ;; { A || B } 347 (setq body (ebnf-body) ; get separator 348 token (car body) 349 sep-part (cdr body))) 350 (cond 351 ;; { A }+ 352 ((eq token 'end-one-or-more) 353 (ebnf-make-one-or-more list-part sep-part)) 354 ;; { A }* 355 ((eq token 'end-zero-or-more) 356 (ebnf-make-zero-or-more list-part sep-part)) 357 (t 358 (error "Missing `}+', `}*' or `}'")) 359 ))) 360 ;; no term 361 (t 362 nil) 363 )) 364 365 366;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 367;; Lexical analyzer 368 369 370(defconst ebnf-bnf-token-table (make-vector 256 'error) 371 "Vector used to map characters to a lexical token.") 372 373 374(defun ebnf-bnf-initialize () 375 "Initialize EBNF token table." 376 ;; control character & control 8-bit character are set to `error' 377 (let ((char ?\040)) 378 ;; printable character: 379 (while (< char ?\060) 380 (aset ebnf-bnf-token-table char 'non-terminal) 381 (setq char (1+ char))) 382 ;; digits: 383 (while (< char ?\072) 384 (aset ebnf-bnf-token-table char 'integer) 385 (setq char (1+ char))) 386 ;; printable character: 387 (while (< char ?\177) 388 (aset ebnf-bnf-token-table char 'non-terminal) 389 (setq char (1+ char))) 390 ;; European 8-bit accentuated characters: 391 (setq char ?\240) 392 (while (< char ?\400) 393 (aset ebnf-bnf-token-table char 'non-terminal) 394 (setq char (1+ char))) 395 ;; Override space characters: 396 (aset ebnf-bnf-token-table ?\013 'space) ; [VT] vertical tab 397 (aset ebnf-bnf-token-table ?\n 'space) ; [NL] linefeed 398 (aset ebnf-bnf-token-table ?\r 'space) ; [CR] carriage return 399 (aset ebnf-bnf-token-table ?\t 'space) ; [HT] horizontal tab 400 (aset ebnf-bnf-token-table ?\ 'space) ; [SP] space 401 ;; Override form feed character: 402 (aset ebnf-bnf-token-table ?\f 'form-feed) ; [FF] form feed 403 ;; Override other lexical characters: 404 (aset ebnf-bnf-token-table ?\" 'terminal) 405 (aset ebnf-bnf-token-table ?\? 'special) 406 (aset ebnf-bnf-token-table ?\( 'begin-group) 407 (aset ebnf-bnf-token-table ?\) 'end-group) 408 (aset ebnf-bnf-token-table ?* 'repeat) 409 (aset ebnf-bnf-token-table ?- 'except) 410 (aset ebnf-bnf-token-table ?= 'equal) 411 (aset ebnf-bnf-token-table ?\[ 'begin-optional) 412 (aset ebnf-bnf-token-table ?\] 'end-optional) 413 (aset ebnf-bnf-token-table ?\{ 'begin-list) 414 (aset ebnf-bnf-token-table ?| 'alternative) 415 (aset ebnf-bnf-token-table ?\} 'end-list) 416 (aset ebnf-bnf-token-table ?/ 'list) 417 (aset ebnf-bnf-token-table ?+ 'one-or-more) 418 (aset ebnf-bnf-token-table ?$ 'default) 419 ;; Override comment character: 420 (aset ebnf-bnf-token-table ebnf-lex-comment-char 'comment) 421 ;; Override end of production character: 422 (aset ebnf-bnf-token-table ebnf-lex-eop-char 'period))) 423 424 425;; replace the range "\240-\377" (see `ebnf-range-regexp'). 426(defconst ebnf-bnf-non-terminal-chars 427 (ebnf-range-regexp "!#%&'*-,0-:<>@-Z\\\\^-z~" ?\240 ?\377)) 428 429 430(defun ebnf-bnf-lex () 431 "Lexical analyzer for EBNF. 432 433Return a lexical token. 434 435See documentation for variable `ebnf-bnf-lex'." 436 (if (>= (point) ebnf-limit) 437 'end-of-input 438 (let (token) 439 ;; skip spaces and comments 440 (while (if (> (following-char) 255) 441 (progn 442 (setq token 'error) 443 nil) 444 (setq token (aref ebnf-bnf-token-table (following-char))) 445 (cond 446 ((eq token 'space) 447 (skip-chars-forward " \013\n\r\t" ebnf-limit) 448 (< (point) ebnf-limit)) 449 ((eq token 'comment) 450 (ebnf-bnf-skip-comment)) 451 ((eq token 'form-feed) 452 (forward-char) 453 (setq ebnf-action 'form-feed)) 454 (t nil) 455 ))) 456 (setq ebnf-default-p nil) 457 (cond 458 ;; end of input 459 ((>= (point) ebnf-limit) 460 'end-of-input) 461 ;; error 462 ((eq token 'error) 463 (error "Invalid character")) 464 ;; default 465 ((eq token 'default) 466 (forward-char) 467 (if (memq (aref ebnf-bnf-token-table (following-char)) 468 '(terminal non-terminal special)) 469 (prog1 470 (ebnf-bnf-lex) 471 (setq ebnf-default-p t)) 472 (error "Invalid `default' element"))) 473 ;; integer 474 ((eq token 'integer) 475 (setq ebnf-bnf-lex (ebnf-buffer-substring "0-9")) 476 'integer) 477 ;; special: ?special? 478 ((eq token 'special) 479 (setq ebnf-bnf-lex (concat (and ebnf-special-show-delimiter "?") 480 (ebnf-string " ->@-~" ?\? "special") 481 (and ebnf-special-show-delimiter "?"))) 482 'special) 483 ;; terminal: "string" 484 ((eq token 'terminal) 485 (setq ebnf-bnf-lex (ebnf-unescape-string (ebnf-get-string))) 486 'terminal) 487 ;; non-terminal or terminal 488 ((eq token 'non-terminal) 489 (setq ebnf-bnf-lex (ebnf-buffer-substring ebnf-bnf-non-terminal-chars)) 490 (let ((case-fold-search ebnf-case-fold-search) 491 match) 492 (if (and ebnf-terminal-regexp 493 (setq match (string-match ebnf-terminal-regexp 494 ebnf-bnf-lex)) 495 (zerop match) 496 (= (match-end 0) (length ebnf-bnf-lex))) 497 'terminal 498 'non-terminal))) 499 ;; end of list: }+, }*, } 500 ((eq token 'end-list) 501 (forward-char) 502 (cond 503 ((= (following-char) ?+) 504 (forward-char) 505 'end-one-or-more) 506 ((= (following-char) ?*) 507 (forward-char) 508 'end-zero-or-more) 509 (t 510 'end-zero-or-more) 511 )) 512 ;; alternative: |, || 513 ((eq token 'alternative) 514 (forward-char) 515 (if (/= (following-char) ?|) 516 'alternative 517 (forward-char) 518 'list-separator)) 519 ;; miscellaneous: {, (, ), [, ], ., =, /, +, -, * 520 (t 521 (forward-char) 522 token) 523 )))) 524 525 526;; replace the range "\177-\237" (see `ebnf-range-regexp'). 527(defconst ebnf-bnf-comment-chars 528 (ebnf-range-regexp "^\n\000-\010\016-\037" ?\177 ?\237)) 529 530 531(defun ebnf-bnf-skip-comment () 532 (forward-char) 533 (cond 534 ;; open EPS file 535 ((and ebnf-eps-executing (= (following-char) ?\[)) 536 (ebnf-eps-add-context (ebnf-bnf-eps-filename))) 537 ;; close EPS file 538 ((and ebnf-eps-executing (= (following-char) ?\])) 539 (ebnf-eps-remove-context (ebnf-bnf-eps-filename))) 540 ;; any other action in comment 541 (t 542 (setq ebnf-action (aref ebnf-comment-table (following-char))) 543 (skip-chars-forward ebnf-bnf-comment-chars ebnf-limit)) 544 ) 545 ;; check for a valid end of comment 546 (cond ((>= (point) ebnf-limit) 547 nil) 548 ((= (following-char) ?\n) 549 (forward-char) 550 t) 551 (t 552 (error "Invalid character")) 553 )) 554 555 556(defun ebnf-bnf-eps-filename () 557 (forward-char) 558 (ebnf-buffer-substring ebnf-bnf-comment-chars)) 559 560 561(defun ebnf-unescape-string (str) 562 (let* ((len (length str)) 563 (size (1- len)) 564 (istr 0) 565 (n-esc 0)) 566 ;; count number of escapes 567 (while (< istr size) 568 (setq istr (+ istr 569 (if (= (aref str istr) ?\\) 570 (progn 571 (setq n-esc (1+ n-esc)) 572 2) 573 1)))) 574 (if (zerop n-esc) 575 ;; no escapes 576 str 577 ;; at least one escape 578 (let ((new (make-string (- len n-esc) ?\ )) 579 (inew 0)) 580 ;; eliminate all escapes 581 (setq istr 0) 582 (while (> n-esc 0) 583 (and (= (aref str istr) ?\\) 584 (setq istr (1+ istr) 585 n-esc (1- n-esc))) 586 (aset new inew (aref str istr)) 587 (setq inew (1+ inew) 588 istr (1+ istr))) 589 ;; remaining string has no escape 590 (while (< istr len) 591 (aset new inew (aref str istr)) 592 (setq inew (1+ inew) 593 istr (1+ istr))) 594 new)))) 595 596 597;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 598 599 600(provide 'ebnf-bnf) 601 602 603;;; arch-tag: 3b1834d3-8367-475b-80d5-8e0bbd00ce50 604;;; ebnf-bnf.el ends here 605