1;;; ebnf-iso.el --- parser for ISO EBNF 2 3;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 4;; Free Software Foundation, Inc. 5 6;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br> 7;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br> 8;; Keywords: wp, ebnf, PostScript 9;; Version: 1.8 10 11;; This file is part of GNU Emacs. 12 13;; GNU Emacs is free software; you can redistribute it and/or modify 14;; it under the terms of the GNU General Public License as published by 15;; the Free Software Foundation; either version 2, or (at your option) 16;; any later version. 17 18;; GNU Emacs is distributed in the hope that it will be useful, 19;; but WITHOUT ANY WARRANTY; without even the implied warranty of 20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21;; GNU General Public License for more details. 22 23;; You should have received a copy of the GNU General Public License 24;; along with GNU Emacs; see the file COPYING. If not, write to the 25;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 26;; Boston, MA 02110-1301, USA. 27 28;;; Commentary: 29 30;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 31;; 32;; 33;; This is part of ebnf2ps package. 34;; 35;; This package defines a parser for ISO EBNF. 36;; 37;; See ebnf2ps.el for documentation. 38;; 39;; 40;; ISO EBNF Syntax 41;; --------------- 42;; 43;; See the URL: 44;; `http://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html' 45;; ("International Standard of the ISO EBNF Notation"). 46;; 47;; 48;; ISO EBNF = syntax rule, {syntax rule}; 49;; 50;; syntax rule = meta identifier, '=', definition list, ';'; 51;; 52;; definition list = single definition, {'|', single definition}; 53;; 54;; single definition = term, {',', term}; 55;; 56;; term = factor, ['-', exception]; 57;; 58;; exception = factor (* without <meta identifier> *); 59;; 60;; factor = [integer, '*'], primary; 61;; 62;; primary = optional sequence | repeated sequence | special sequence 63;; | grouped sequence | meta identifier | terminal string 64;; | empty; 65;; 66;; empty = ; 67;; 68;; optional sequence = '[', definition list, ']'; 69;; 70;; repeated sequence = '{', definition list, '}'; 71;; 72;; grouped sequence = '(', definition list, ')'; 73;; 74;; terminal string = "'", character - "'", {character - "'"}, "'" 75;; | '"', character - '"', {character - '"'}, '"'; 76;; 77;; special sequence = '?', {character - '?'}, '?'; 78;; 79;; meta identifier = letter, { letter | decimal digit | ' ' }; 80;; 81;; integer = decimal digit, {decimal digit}; 82;; 83;; comment = '(*', {comment symbol}, '*)'; 84;; 85;; comment symbol = comment (* <== NESTED COMMENT *) 86;; | terminal string | special sequence | character; 87;; 88;; letter = ? A-Z a-z ?; 89;; 90;; decimal digit = ? 0-9 ?; 91;; 92;; character = letter | decimal digit 93;; | ',' | '=' | '|' | '/' | '!' | '*' | '(' | ')' | '[' | ']' | '{' 94;; | '}' | "'" | '"' | '?' | '-' | ';' | '.' | ' ' | ':' | '+' | '_' 95;; | '%' | '@' | '&' | '#' | '$' | '<' | '>' | '\' | '^' | '`' | '~'; 96;; 97;; 98;; There is also the following alternative representation: 99;; 100;; STANDARD ALTERNATIVE 101;; | ==> / or ! 102;; [ ==> (/ 103;; ] ==> /) 104;; { ==> (: 105;; } ==> :) 106;; ; ==> . 107;; 108;; 109;; Differences Between ISO EBNF And ebnf2ps ISO EBNF 110;; ------------------------------------------------- 111;; 112;; ISO EBNF accepts the characters given by <character> production above, 113;; HORIZONTAL TAB (^I), VERTICAL TAB (^K), NEWLINE (^J or ^M) and FORM FEED 114;; (^L), any other characters are invalid. But ebnf2ps accepts also the 115;; european 8-bit accentuated characters (from \240 to \377) and underscore 116;; (_). 117;; 118;; 119;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 120 121;;; Code: 122 123 124(require 'ebnf-otz) 125 126 127(defvar ebnf-iso-lex nil 128 "Value returned by `ebnf-iso-lex' function.") 129 130 131(defvar ebnf-no-meta-identifier nil 132 "Used by `ebnf-iso-term' and `ebnf-iso-lex' functions.") 133 134 135;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 136;; Syntactic analyzer 137 138 139;;; ISO EBNF = syntax rule, {syntax rule}; 140 141(defun ebnf-iso-parser (start) 142 "ISO EBNF parser." 143 (let ((total (+ (- ebnf-limit start) 1)) 144 (bias (1- start)) 145 (origin (point)) 146 syntax-list token rule) 147 (goto-char start) 148 (setq token (ebnf-iso-lex)) 149 (and (eq token 'end-of-input) 150 (error "Invalid ISO EBNF file format")) 151 (while (not (eq token 'end-of-input)) 152 (ebnf-message-float 153 "Parsing...%s%%" 154 (/ (* (- (point) bias) 100.0) total)) 155 (setq token (ebnf-iso-syntax-rule token) 156 rule (cdr token) 157 token (car token)) 158 (or (ebnf-add-empty-rule-list rule) 159 (setq syntax-list (cons rule syntax-list)))) 160 (goto-char origin) 161 syntax-list)) 162 163 164;;; syntax rule = meta identifier, '=', definition list, ';'; 165 166(defun ebnf-iso-syntax-rule (token) 167 (let ((header ebnf-iso-lex) 168 (action ebnf-action) 169 body) 170 (setq ebnf-action nil) 171 (or (eq token 'non-terminal) 172 (error "Invalid meta identifier syntax rule")) 173 (or (eq (ebnf-iso-lex) 'equal) 174 (error "Invalid syntax rule: missing `='")) 175 (setq body (ebnf-iso-definition-list)) 176 (or (eq (car body) 'period) 177 (error "Invalid syntax rule: missing `;' or `.'")) 178 (setq body (cdr body)) 179 (ebnf-eps-add-production header) 180 (cons (ebnf-iso-lex) 181 (ebnf-make-production header body action)))) 182 183 184;;; definition list = single definition, {'|', single definition}; 185 186(defun ebnf-iso-definition-list () 187 (let (body sequence) 188 (while (eq (car (setq sequence (ebnf-iso-single-definition))) 189 'alternative) 190 (setq sequence (cdr sequence) 191 body (cons sequence body))) 192 (ebnf-token-alternative body sequence))) 193 194 195;;; single definition = term, {',', term}; 196 197(defun ebnf-iso-single-definition () 198 (let (token seq term) 199 (while (and (setq term (ebnf-iso-term (ebnf-iso-lex)) 200 token (car term) 201 term (cdr term)) 202 (eq token 'catenate)) 203 (setq seq (cons term seq))) 204 (cons token 205 (ebnf-token-sequence (if term 206 (cons term seq) 207 seq))))) 208 209 210;;; term = factor, ['-', exception]; 211;;; 212;;; exception = factor (* without <meta identifier> *); 213 214(defun ebnf-iso-term (token) 215 (let ((factor (ebnf-iso-factor token))) 216 (if (not (eq (car factor) 'except)) 217 ;; factor 218 factor 219 ;; factor - exception 220 (let ((ebnf-no-meta-identifier t)) 221 (ebnf-token-except (cdr factor) (ebnf-iso-factor (ebnf-iso-lex))))))) 222 223 224;;; factor = [integer, '*'], primary; 225 226(defun ebnf-iso-factor (token) 227 (if (eq token 'integer) 228 (let ((times ebnf-iso-lex)) 229 (or (eq (ebnf-iso-lex) 'repeat) 230 (error "Missing `*'")) 231 (ebnf-token-repeat times (ebnf-iso-primary (ebnf-iso-lex)))) 232 (ebnf-iso-primary token))) 233 234 235;;; primary = optional sequence | repeated sequence | special sequence 236;;; | grouped sequence | meta identifier | terminal string 237;;; | empty; 238;;; 239;;; empty = ; 240;;; 241;;; optional sequence = '[', definition list, ']'; 242;;; 243;;; repeated sequence = '{', definition list, '}'; 244;;; 245;;; grouped sequence = '(', definition list, ')'; 246;;; 247;;; terminal string = "'", character - "'", {character - "'"}, "'" 248;;; | '"', character - '"', {character - '"'}, '"'; 249;;; 250;;; special sequence = '?', {character - '?'}, '?'; 251;;; 252;;; meta identifier = letter, {letter | decimal digit}; 253 254(defun ebnf-iso-primary (token) 255 (let ((primary 256 (cond 257 ;; terminal string 258 ((eq token 'terminal) 259 (ebnf-make-terminal ebnf-iso-lex)) 260 ;; meta identifier 261 ((eq token 'non-terminal) 262 (ebnf-make-non-terminal ebnf-iso-lex)) 263 ;; special sequence 264 ((eq token 'special) 265 (ebnf-make-special ebnf-iso-lex)) 266 ;; grouped sequence 267 ((eq token 'begin-group) 268 (let ((body (ebnf-iso-definition-list))) 269 (or (eq (car body) 'end-group) 270 (error "Missing `)'")) 271 (cdr body))) 272 ;; optional sequence 273 ((eq token 'begin-optional) 274 (let ((body (ebnf-iso-definition-list))) 275 (or (eq (car body) 'end-optional) 276 (error "Missing `]' or `/)'")) 277 (ebnf-token-optional (cdr body)))) 278 ;; repeated sequence 279 ((eq token 'begin-zero-or-more) 280 (let* ((body (ebnf-iso-definition-list)) 281 (repeat (cdr body))) 282 (or (eq (car body) 'end-zero-or-more) 283 (error "Missing `}' or `:)'")) 284 (ebnf-make-zero-or-more repeat))) 285 ;; empty 286 (t 287 nil) 288 ))) 289 (cons (if primary 290 (ebnf-iso-lex) 291 token) 292 primary))) 293 294 295;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 296;; Lexical analyzer 297 298 299(defconst ebnf-iso-token-table 300 ;; control character & 8-bit character are set to `error' 301 (let ((table (make-vector 256 'error)) 302 (char ?\040)) 303 ;; printable character 304 (while (< char ?\060) 305 (aset table char 'character) 306 (setq char (1+ char))) 307 ;; digits: 308 (while (< char ?\072) 309 (aset table char 'integer) 310 (setq char (1+ char))) 311 (while (< char ?\101) 312 (aset table char 'character) 313 (setq char (1+ char))) 314 ;; upper case letters: 315 (while (< char ?\133) 316 (aset table char 'non-terminal) 317 (setq char (1+ char))) 318 (while (< char ?\141) 319 (aset table char 'character) 320 (setq char (1+ char))) 321 ;; lower case letters: 322 (while (< char ?\173) 323 (aset table char 'non-terminal) 324 (setq char (1+ char))) 325 (while (< char ?\177) 326 (aset table char 'character) 327 (setq char (1+ char))) 328 ;; European 8-bit accentuated characters: 329 (setq char ?\240) 330 (while (< char ?\400) 331 (aset table char 'non-terminal) 332 (setq char (1+ char))) 333 ;; Override space characters: 334 (aset table ?\013 'space) ; [VT] vertical tab 335 (aset table ?\n 'space) ; [NL] linefeed 336 (aset table ?\r 'space) ; [CR] carriage return 337 (aset table ?\t 'space) ; [HT] horizontal tab 338 (aset table ?\ 'space) ; [SP] space 339 ;; Override form feed character: 340 (aset table ?\f 'form-feed) ; [FF] form feed 341 ;; Override other lexical characters: 342 (aset table ?_ 'non-terminal) 343 (aset table ?\" 'double-terminal) 344 (aset table ?\' 'single-terminal) 345 (aset table ?\? 'special) 346 (aset table ?* 'repeat) 347 (aset table ?, 'catenate) 348 (aset table ?- 'except) 349 (aset table ?= 'equal) 350 (aset table ?\) 'end-group) 351 table) 352 "Vector used to map characters to a lexical token.") 353 354 355(defun ebnf-iso-initialize () 356 "Initialize ISO EBNF token table." 357 (if ebnf-iso-alternative-p 358 ;; Override alternative lexical characters: 359 (progn 360 (aset ebnf-iso-token-table ?\( 'left-parenthesis) 361 (aset ebnf-iso-token-table ?\[ 'character) 362 (aset ebnf-iso-token-table ?\] 'character) 363 (aset ebnf-iso-token-table ?\{ 'character) 364 (aset ebnf-iso-token-table ?\} 'character) 365 (aset ebnf-iso-token-table ?| 'character) 366 (aset ebnf-iso-token-table ?\; 'character) 367 (aset ebnf-iso-token-table ?/ 'slash) 368 (aset ebnf-iso-token-table ?! 'alternative) 369 (aset ebnf-iso-token-table ?: 'colon) 370 (aset ebnf-iso-token-table ?. 'period)) 371 ;; Override standard lexical characters: 372 (aset ebnf-iso-token-table ?\( 'begin-parenthesis) 373 (aset ebnf-iso-token-table ?\[ 'begin-optional) 374 (aset ebnf-iso-token-table ?\] 'end-optional) 375 (aset ebnf-iso-token-table ?\{ 'begin-zero-or-more) 376 (aset ebnf-iso-token-table ?\} 'end-zero-or-more) 377 (aset ebnf-iso-token-table ?| 'alternative) 378 (aset ebnf-iso-token-table ?\; 'period) 379 (aset ebnf-iso-token-table ?/ 'character) 380 (aset ebnf-iso-token-table ?! 'character) 381 (aset ebnf-iso-token-table ?: 'character) 382 (aset ebnf-iso-token-table ?. 'character))) 383 384 385;; replace the range "\240-\377" (see `ebnf-range-regexp'). 386(defconst ebnf-iso-non-terminal-chars 387 (ebnf-range-regexp " 0-9A-Za-z_" ?\240 ?\377)) 388 389 390(defun ebnf-iso-lex () 391 "Lexical analyzer for ISO EBNF. 392 393Return a lexical token. 394 395See documentation for variable `ebnf-iso-lex'." 396 (if (>= (point) ebnf-limit) 397 'end-of-input 398 (let (token) 399 ;; skip spaces and comments 400 (while (if (> (following-char) 255) 401 (progn 402 (setq token 'error) 403 nil) 404 (setq token (aref ebnf-iso-token-table (following-char))) 405 (cond 406 ((eq token 'space) 407 (skip-chars-forward " \013\n\r\t" ebnf-limit) 408 (< (point) ebnf-limit)) 409 ((or (eq token 'begin-parenthesis) 410 (eq token 'left-parenthesis)) 411 (forward-char) 412 (if (/= (following-char) ?*) 413 ;; no comment 414 nil 415 ;; comment 416 (ebnf-iso-skip-comment) 417 t)) 418 ((eq token 'form-feed) 419 (forward-char) 420 (setq ebnf-action 'form-feed)) 421 (t nil) 422 ))) 423 (cond 424 ;; end of input 425 ((>= (point) ebnf-limit) 426 'end-of-input) 427 ;; error 428 ((eq token 'error) 429 (error "Invalid character")) 430 ;; integer 431 ((eq token 'integer) 432 (setq ebnf-iso-lex (ebnf-buffer-substring "0-9")) 433 'integer) 434 ;; special: ?special? 435 ((eq token 'special) 436 (setq ebnf-iso-lex (concat (and ebnf-special-show-delimiter "?") 437 (ebnf-string " ->@-~" ?\? "special") 438 (and ebnf-special-show-delimiter "?"))) 439 'special) 440 ;; terminal: "string" 441 ((eq token 'double-terminal) 442 (setq ebnf-iso-lex (ebnf-string " !#-~" ?\" "terminal")) 443 'terminal) 444 ;; terminal: 'string' 445 ((eq token 'single-terminal) 446 (setq ebnf-iso-lex (ebnf-string " -&(-~" ?\' "terminal")) 447 'terminal) 448 ;; non-terminal 449 ((eq token 'non-terminal) 450 (setq ebnf-iso-lex 451 (ebnf-iso-normalize 452 (ebnf-trim-right 453 (ebnf-buffer-substring ebnf-iso-non-terminal-chars)))) 454 (and ebnf-no-meta-identifier 455 (error "Exception sequence should not contain a meta identifier")) 456 'non-terminal) 457 ;; begin optional, begin list or begin group 458 ((eq token 'left-parenthesis) 459 (forward-char) 460 (cond ((= (following-char) ?/) 461 (forward-char) 462 'begin-optional) 463 ((= (following-char) ?:) 464 (forward-char) 465 'begin-zero-or-more) 466 (t 467 'begin-group) 468 )) 469 ;; end optional or alternative 470 ((eq token 'slash) 471 (forward-char) 472 (if (/= (following-char) ?\)) 473 'alternative 474 (forward-char) 475 'end-optional)) 476 ;; end list 477 ((eq token 'colon) 478 (forward-char) 479 (if (/= (following-char) ?\)) 480 'character 481 (forward-char) 482 'end-zero-or-more)) 483 ;; begin group 484 ((eq token 'begin-parenthesis) 485 'begin-group) 486 ;; miscellaneous 487 (t 488 (forward-char) 489 token) 490 )))) 491 492 493;; replace the range "\177-\237" (see `ebnf-range-regexp'). 494(defconst ebnf-iso-comment-chars 495 (ebnf-range-regexp "^*(\000-\010\016-\037" ?\177 ?\237)) 496 497 498(defun ebnf-iso-skip-comment () 499 (forward-char) 500 (cond 501 ;; open EPS file 502 ((and ebnf-eps-executing (= (following-char) ?\[)) 503 (ebnf-eps-add-context (ebnf-iso-eps-filename))) 504 ;; close EPS file 505 ((and ebnf-eps-executing (= (following-char) ?\])) 506 (ebnf-eps-remove-context (ebnf-iso-eps-filename))) 507 ;; any other action in comment 508 (t 509 (setq ebnf-action (aref ebnf-comment-table (following-char)))) 510 ) 511 (let ((pair 1)) 512 (while (> pair 0) 513 (skip-chars-forward ebnf-iso-comment-chars ebnf-limit) 514 (cond ((>= (point) ebnf-limit) 515 (error "Missing end of comment: `*)'")) 516 ((= (following-char) ?*) 517 (skip-chars-forward "*" ebnf-limit) 518 (when (= (following-char) ?\)) 519 ;; end of comment 520 (forward-char) 521 (setq pair (1- pair)))) 522 ((= (following-char) ?\() 523 (skip-chars-forward "(" ebnf-limit) 524 (when (= (following-char) ?*) 525 ;; beginning of comment 526 (forward-char) 527 (setq pair (1+ pair)))) 528 (t 529 (error "Invalid character")) 530 )))) 531 532 533(defun ebnf-iso-eps-filename () 534 (forward-char) 535 (buffer-substring-no-properties 536 (point) 537 (let ((chars (concat ebnf-iso-comment-chars "\n")) 538 found) 539 (while (not found) 540 (skip-chars-forward chars ebnf-limit) 541 (setq found 542 (cond ((>= (point) ebnf-limit) 543 (point)) 544 ((= (following-char) ?*) 545 (skip-chars-forward "*" ebnf-limit) 546 (if (/= (following-char) ?\)) 547 nil 548 (backward-char) 549 (point))) 550 ((= (following-char) ?\() 551 (forward-char) 552 (if (/= (following-char) ?*) 553 nil 554 (backward-char) 555 (point))) 556 (t 557 (point)) 558 ))) 559 found))) 560 561 562(defun ebnf-iso-normalize (str) 563 (if (not ebnf-iso-normalize-p) 564 str 565 (let ((len (length str)) 566 (stri 0) 567 (spaces 0)) 568 ;; count exceeding spaces 569 (while (< stri len) 570 (if (/= (aref str stri) ?\ ) 571 (setq stri (1+ stri)) 572 (setq stri (1+ stri)) 573 (while (and (< stri len) (= (aref str stri) ?\ )) 574 (setq stri (1+ stri) 575 spaces (1+ spaces))))) 576 (if (zerop spaces) 577 ;; no exceeding space 578 str 579 ;; at least one exceeding space 580 (let ((new (make-string (- len spaces) ?\ )) 581 (newi 0)) 582 ;; eliminate exceeding spaces 583 (setq stri 0) 584 (while (> spaces 0) 585 (if (/= (aref str stri) ?\ ) 586 (progn 587 (aset new newi (aref str stri)) 588 (setq stri (1+ stri) 589 newi (1+ newi))) 590 (aset new newi (aref str stri)) 591 (setq stri (1+ stri) 592 newi (1+ newi)) 593 (while (and (> spaces 0) (= (aref str stri) ?\ )) 594 (setq stri (1+ stri) 595 spaces (1- spaces))))) 596 ;; remaining is normalized 597 (while (< stri len) 598 (aset new newi (aref str stri)) 599 (setq stri (1+ stri) 600 newi (1+ newi))) 601 new))))) 602 603 604;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 605 606 607(provide 'ebnf-iso) 608 609 610;;; arch-tag: 03315eef-8f64-404a-bf9d-256d42442ee3 611;;; ebnf-iso.el ends here 612