1;;; ebnf-abn.el --- parser for ABNF (Augmented BNF) 2 3;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007 4;; Free Software Foundation, Inc. 5 6;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br> 7;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br> 8;; Keywords: wp, ebnf, PostScript 9;; Version: 1.1 10 11;; This file is part of GNU Emacs. 12 13;; GNU Emacs is free software; you can redistribute it and/or modify 14;; it under the terms of the GNU General Public License as published by 15;; the Free Software Foundation; either version 2, or (at your option) 16;; any later version. 17 18;; GNU Emacs is distributed in the hope that it will be useful, 19;; but WITHOUT ANY WARRANTY; without even the implied warranty of 20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21;; GNU General Public License for more details. 22 23;; You should have received a copy of the GNU General Public License 24;; along with GNU Emacs; see the file COPYING. If not, write to the 25;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 26;; Boston, MA 02110-1301, USA. 27 28;;; Commentary: 29 30;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 31;; 32;; 33;; This is part of ebnf2ps package. 34;; 35;; This package defines a parser for ABNF (Augmented BNF). 36;; 37;; See ebnf2ps.el for documentation. 38;; 39;; 40;; ABNF Syntax 41;; ----------- 42;; 43;; See the URL: 44;; `http://www.ietf.org/rfc/rfc2234.txt' 45;; or 46;; `http://www.faqs.org/rfcs/rfc2234.html' 47;; or 48;; `http://www.rnp.br/ietf/rfc/rfc2234.txt' 49;; ("Augmented BNF for Syntax Specifications: ABNF"). 50;; 51;; 52;; rulelist = 1*( rule / (*c-wsp c-nl) ) 53;; 54;; rule = rulename defined-as elements c-nl 55;; ; continues if next line starts with white space 56;; 57;; rulename = ALPHA *(ALPHA / DIGIT / "-") 58;; 59;; defined-as = *c-wsp ("=" / "=/") *c-wsp 60;; ; basic rules definition and incremental 61;; ; alternatives 62;; 63;; elements = alternation *c-wsp 64;; 65;; c-wsp = WSP / (c-nl WSP) 66;; 67;; c-nl = comment / CRLF 68;; ; comment or newline 69;; 70;; comment = ";" *(WSP / VCHAR) CRLF 71;; 72;; alternation = concatenation 73;; *(*c-wsp "/" *c-wsp concatenation) 74;; 75;; concatenation = repetition *(1*c-wsp repetition) 76;; 77;; repetition = [repeat] element 78;; 79;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT) 80;; 81;; element = rulename / group / option / 82;; char-val / num-val / prose-val 83;; 84;; group = "(" *c-wsp alternation *c-wsp ")" 85;; 86;; option = "[" *c-wsp alternation *c-wsp "]" 87;; 88;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE 89;; ; quoted string of SP and VCHAR without DQUOTE 90;; 91;; num-val = "%" (bin-val / dec-val / hex-val) 92;; 93;; bin-val = "b" 1*BIT 94;; [ 1*("." 1*BIT) / ("-" 1*BIT) ] 95;; ; series of concatenated bit values 96;; ; or single ONEOF range 97;; 98;; dec-val = "d" 1*DIGIT 99;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ] 100;; 101;; hex-val = "x" 1*HEXDIG 102;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ] 103;; 104;; prose-val = "<" *(%x20-3D / %x3F-7E) ">" 105;; ; bracketed string of SP and VCHAR without 106;; ; angles 107;; ; prose description, to be used as last resort 108;; 109;; ; Core rules -- the coding depends on the system, here is used 7-bit ASCII 110;; 111;; ALPHA = %x41-5A / %x61-7A 112;; ; A-Z / a-z 113;; 114;; BIT = "0" / "1" 115;; 116;; CHAR = %x01-7F 117;; ; any 7-bit US-ASCII character, excluding NUL 118;; 119;; CR = %x0D 120;; ; carriage return 121;; 122;; CRLF = CR LF 123;; ; Internet standard newline 124;; 125;; CTL = %x00-1F / %x7F 126;; ; controls 127;; 128;; DIGIT = %x30-39 129;; ; 0-9 130;; 131;; DQUOTE = %x22 132;; ; " (Double Quote) 133;; 134;; HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" 135;; 136;; HTAB = %x09 137;; ; horizontal tab 138;; 139;; LF = %x0A 140;; ; linefeed 141;; 142;; LWSP = *(WSP / CRLF WSP) 143;; ; linear white space (past newline) 144;; 145;; OCTET = %x00-FF 146;; ; 8 bits of data 147;; 148;; SP = %x20 149;; ; space 150;; 151;; VCHAR = %x21-7E 152;; ; visible (printing) characters 153;; 154;; WSP = SP / HTAB 155;; ; white space 156;; 157;; 158;; NOTES: 159;; 160;; 1. Rules name and terminal strings are case INSENSITIVE. 161;; So, the following rule names are all equals: 162;; Rule-name, rule-Name, rule-name, RULE-NAME 163;; Also, the following strings are equals: 164;; "abc", "ABC", "aBc", "Abc", "aBC", etc. 165;; 166;; 2. To have a case SENSITIVE string, use the character notation. 167;; For example, to specify the lowercase string "abc", use: 168;; %d97.98.99 169;; 170;; 3. There are no implicit spaces between elements, for example, the 171;; following rules: 172;; 173;; foo = %x61 ; a 174;; 175;; bar = %x62 ; b 176;; 177;; mumble = foo bar foo 178;; 179;; Are equivalent to the following rule: 180;; 181;; mumble = %x61.62.61 182;; 183;; If spaces are needed, it should be explicit specified, like: 184;; 185;; spaces = 1*(%x20 / %x09) ; one or more spaces or tabs 186;; 187;; mumble = foo spaces bar spaces foo 188;; 189;; 4. Lines starting with space or tab are considered a continuation line. 190;; For example, the rule: 191;; 192;; rule = foo 193;; bar 194;; 195;; Is equivalent to: 196;; 197;; rule = foo bar 198;; 199;; 200;; Differences Between ABNF And ebnf2ps ABNF 201;; ----------------------------------------- 202;; 203;; Besides the characters that ABNF accepts, ebnf2ps ABNF accepts also the 204;; underscore (_) for rule name and european 8-bit accentuated characters (from 205;; \240 to \377) for rule name, string and comment. 206;; 207;; 208;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 209 210;;; Code: 211 212 213(require 'ebnf-otz) 214 215 216(defvar ebnf-abn-lex nil 217 "Value returned by `ebnf-abn-lex' function.") 218 219 220;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 221;; Syntactic analyzer 222 223 224;;; rulelist = 1*( rule / (*c-wsp c-nl) ) 225 226(defun ebnf-abn-parser (start) 227 "ABNF parser." 228 (let ((total (+ (- ebnf-limit start) 1)) 229 (bias (1- start)) 230 (origin (point)) 231 rule-list token rule) 232 (goto-char start) 233 (setq token (ebnf-abn-lex)) 234 (and (eq token 'end-of-input) 235 (error "Invalid ABNF file format")) 236 (and (eq token 'end-of-rule) 237 (setq token (ebnf-abn-lex))) 238 (while (not (eq token 'end-of-input)) 239 (ebnf-message-float 240 "Parsing...%s%%" 241 (/ (* (- (point) bias) 100.0) total)) 242 (setq token (ebnf-abn-rule token) 243 rule (cdr token) 244 token (car token)) 245 (or (ebnf-add-empty-rule-list rule) 246 (setq rule-list (cons rule rule-list)))) 247 (goto-char origin) 248 rule-list)) 249 250 251;;; rule = rulename defined-as elements c-nl 252;;; ; continues if next line starts with white space 253;;; 254;;; rulename = ALPHA *(ALPHA / DIGIT / "-") 255;;; 256;;; defined-as = *c-wsp ("=" / "=/") *c-wsp 257;;; ; basic rules definition and incremental 258;;; ; alternatives 259;;; 260;;; elements = alternation *c-wsp 261;;; 262;;; c-wsp = WSP / (c-nl WSP) 263;;; 264;;; c-nl = comment / CRLF 265;;; ; comment or newline 266;;; 267;;; comment = ";" *(WSP / VCHAR) CRLF 268 269 270(defun ebnf-abn-rule (token) 271 (let ((name ebnf-abn-lex) 272 (action ebnf-action) 273 elements) 274 (setq ebnf-action nil) 275 (or (eq token 'non-terminal) 276 (error "Invalid rule name")) 277 (setq token (ebnf-abn-lex)) 278 (or (memq token '(equal incremental-alternative)) 279 (error "Invalid rule: missing `=' or `=/'")) 280 (and (eq token 'incremental-alternative) 281 (setq name (concat name " =/"))) 282 (setq elements (ebnf-abn-alternation)) 283 (or (memq (car elements) '(end-of-rule end-of-input)) 284 (error "Invalid rule: there is no end of rule")) 285 (setq elements (cdr elements)) 286 (ebnf-eps-add-production name) 287 (cons (ebnf-abn-lex) 288 (ebnf-make-production name elements action)))) 289 290 291;;; alternation = concatenation 292;;; *(*c-wsp "/" *c-wsp concatenation) 293 294 295(defun ebnf-abn-alternation () 296 (let (body concatenation) 297 (while (eq (car (setq concatenation 298 (ebnf-abn-concatenation (ebnf-abn-lex)))) 299 'alternative) 300 (setq body (cons (cdr concatenation) body))) 301 (ebnf-token-alternative body concatenation))) 302 303 304;;; concatenation = repetition *(1*c-wsp repetition) 305 306 307(defun ebnf-abn-concatenation (token) 308 (let ((term (ebnf-abn-repetition token)) 309 seq) 310 (or (setq token (car term) 311 term (cdr term)) 312 (error "Empty element")) 313 (setq seq (cons term seq)) 314 (while (setq term (ebnf-abn-repetition token) 315 token (car term) 316 term (cdr term)) 317 (setq seq (cons term seq))) 318 (cons token 319 (ebnf-token-sequence seq)))) 320 321 322;;; repetition = [repeat] element 323;;; 324;;; repeat = 1*DIGIT / (*DIGIT "*" *DIGIT) 325 326 327(defun ebnf-abn-repetition (token) 328 (let (lower upper) 329 ;; INTEGER [ "*" [ INTEGER ] ] 330 (when (eq token 'integer) 331 (setq lower ebnf-abn-lex 332 token (ebnf-abn-lex)) 333 (or (eq token 'repeat) 334 (setq upper lower))) 335 ;; "*" [ INTEGER ] 336 (when (eq token 'repeat) 337 ;; only * ==> lower & upper are empty string 338 (or lower 339 (setq lower "" 340 upper "")) 341 (when (eq (setq token (ebnf-abn-lex)) 'integer) 342 (setq upper ebnf-abn-lex 343 token (ebnf-abn-lex)))) 344 (let ((element (ebnf-abn-element token))) 345 (cond 346 ;; there is a repetition 347 (lower 348 (or element 349 (error "Missing element repetition")) 350 (setq token (ebnf-abn-lex)) 351 (cond 352 ;; one or more 353 ((and (string= lower "1") (null upper)) 354 (cons token (ebnf-make-one-or-more element))) 355 ;; zero or more 356 ((or (and (string= lower "0") (null upper)) 357 (and (string= lower "") (string= upper ""))) 358 (cons token (ebnf-make-zero-or-more element))) 359 ;; real repetition 360 (t 361 (ebnf-token-repeat lower (cons token element) upper)))) 362 ;; there is an element 363 (element 364 (cons (ebnf-abn-lex) element)) 365 ;; something that caller has to deal 366 (t 367 (cons token nil)))))) 368 369 370;;; element = rulename / group / option / 371;;; char-val / num-val / prose-val 372;;; 373;;; group = "(" *c-wsp alternation *c-wsp ")" 374;;; 375;;; option = "[" *c-wsp alternation *c-wsp "]" 376;;; 377;;; char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE 378;;; ; quoted string of SP and VCHAR without DQUOTE 379;;; 380;;; num-val = "%" (bin-val / dec-val / hex-val) 381;;; 382;;; bin-val = "b" 1*BIT 383;;; [ 1*("." 1*BIT) / ("-" 1*BIT) ] 384;;; ; series of concatenated bit values 385;;; ; or single ONEOF range 386;;; 387;;; dec-val = "d" 1*DIGIT 388;;; [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ] 389;;; 390;;; hex-val = "x" 1*HEXDIG 391;;; [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ] 392;;; 393;;; prose-val = "<" *(%x20-3D / %x3F-7E) ">" 394;;; ; bracketed string of SP and VCHAR without 395;;; ; angles 396;;; ; prose description, to be used as last resort 397 398 399(defun ebnf-abn-element (token) 400 (cond 401 ;; terminal 402 ((eq token 'terminal) 403 (ebnf-make-terminal ebnf-abn-lex)) 404 ;; non-terminal 405 ((eq token 'non-terminal) 406 (ebnf-make-non-terminal ebnf-abn-lex)) 407 ;; group 408 ((eq token 'begin-group) 409 (let ((body (ebnf-abn-alternation))) 410 (or (eq (car body) 'end-group) 411 (error "Missing `)'")) 412 (cdr body))) 413 ;; optional 414 ((eq token 'begin-optional) 415 (let ((body (ebnf-abn-alternation))) 416 (or (eq (car body) 'end-optional) 417 (error "Missing `]'")) 418 (ebnf-token-optional (cdr body)))) 419 ;; no element 420 (t 421 nil) 422 )) 423 424 425;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 426;; Lexical analyzer 427 428 429(defconst ebnf-abn-token-table (make-vector 256 'error) 430 "Vector used to map characters to a lexical token.") 431 432 433(defun ebnf-abn-initialize () 434 "Initialize EBNF token table." 435 ;; control character & control 8-bit character are set to `error' 436 (let ((char ?\060)) 437 ;; digits: 0-9 438 (while (< char ?\072) 439 (aset ebnf-abn-token-table char 'integer) 440 (setq char (1+ char))) 441 ;; printable character: A-Z 442 (setq char ?\101) 443 (while (< char ?\133) 444 (aset ebnf-abn-token-table char 'non-terminal) 445 (setq char (1+ char))) 446 ;; printable character: a-z 447 (setq char ?\141) 448 (while (< char ?\173) 449 (aset ebnf-abn-token-table char 'non-terminal) 450 (setq char (1+ char))) 451 ;; European 8-bit accentuated characters: 452 (setq char ?\240) 453 (while (< char ?\400) 454 (aset ebnf-abn-token-table char 'non-terminal) 455 (setq char (1+ char))) 456 ;; Override end of line characters: 457 (aset ebnf-abn-token-table ?\n 'end-of-rule) ; [NL] linefeed 458 (aset ebnf-abn-token-table ?\r 'end-of-rule) ; [CR] carriage return 459 ;; Override space characters: 460 (aset ebnf-abn-token-table ?\013 'space) ; [VT] vertical tab 461 (aset ebnf-abn-token-table ?\t 'space) ; [HT] horizontal tab 462 (aset ebnf-abn-token-table ?\ 'space) ; [SP] space 463 ;; Override form feed character: 464 (aset ebnf-abn-token-table ?\f 'form-feed) ; [FF] form feed 465 ;; Override other lexical characters: 466 (aset ebnf-abn-token-table ?< 'non-terminal) 467 (aset ebnf-abn-token-table ?% 'terminal) 468 (aset ebnf-abn-token-table ?\" 'terminal) 469 (aset ebnf-abn-token-table ?\( 'begin-group) 470 (aset ebnf-abn-token-table ?\) 'end-group) 471 (aset ebnf-abn-token-table ?* 'repeat) 472 (aset ebnf-abn-token-table ?= 'equal) 473 (aset ebnf-abn-token-table ?\[ 'begin-optional) 474 (aset ebnf-abn-token-table ?\] 'end-optional) 475 (aset ebnf-abn-token-table ?/ 'alternative) 476 ;; Override comment character: 477 (aset ebnf-abn-token-table ?\; 'comment))) 478 479 480;; replace the range "\240-\377" (see `ebnf-range-regexp'). 481(defconst ebnf-abn-non-terminal-chars 482 (ebnf-range-regexp "-_0-9A-Za-z" ?\240 ?\377)) 483(defconst ebnf-abn-non-terminal-letter-chars 484 (ebnf-range-regexp "A-Za-z" ?\240 ?\377)) 485 486 487(defun ebnf-abn-lex () 488 "Lexical analyzer for ABNF. 489 490Return a lexical token. 491 492See documentation for variable `ebnf-abn-lex'." 493 (if (>= (point) ebnf-limit) 494 'end-of-input 495 (let (token) 496 ;; skip spaces and comments 497 (while (if (> (following-char) 255) 498 (progn 499 (setq token 'error) 500 nil) 501 (setq token (aref ebnf-abn-token-table (following-char))) 502 (cond 503 ((eq token 'space) 504 (skip-chars-forward " \013\t" ebnf-limit) 505 (< (point) ebnf-limit)) 506 ((eq token 'comment) 507 (ebnf-abn-skip-comment)) 508 ((eq token 'form-feed) 509 (forward-char) 510 (setq ebnf-action 'form-feed)) 511 ((eq token 'end-of-rule) 512 (ebnf-abn-skip-end-of-rule)) 513 (t nil) 514 ))) 515 (cond 516 ;; end of input 517 ((>= (point) ebnf-limit) 518 'end-of-input) 519 ;; error 520 ((eq token 'error) 521 (error "Invalid character")) 522 ;; end of rule 523 ((eq token 'end-of-rule) 524 'end-of-rule) 525 ;; integer 526 ((eq token 'integer) 527 (setq ebnf-abn-lex (ebnf-buffer-substring "0-9")) 528 'integer) 529 ;; terminal: "string" or %[bdx]NNN((.NNN)+|-NNN)? 530 ((eq token 'terminal) 531 (setq ebnf-abn-lex 532 (if (= (following-char) ?\") 533 (ebnf-abn-string) 534 (ebnf-abn-character))) 535 'terminal) 536 ;; non-terminal: NAME or <NAME> 537 ((eq token 'non-terminal) 538 (let ((prose-p (= (following-char) ?<))) 539 (when prose-p 540 (forward-char) 541 (or (looking-at ebnf-abn-non-terminal-letter-chars) 542 (error "Invalid prose value"))) 543 (setq ebnf-abn-lex 544 (ebnf-buffer-substring ebnf-abn-non-terminal-chars)) 545 (when prose-p 546 (or (= (following-char) ?>) 547 (error "Invalid prose value")) 548 (setq ebnf-abn-lex (concat "<" ebnf-abn-lex ">")))) 549 'non-terminal) 550 ;; equal: =, =/ 551 ((eq token 'equal) 552 (forward-char) 553 (if (/= (following-char) ?/) 554 'equal 555 (forward-char) 556 'incremental-alternative)) 557 ;; miscellaneous: (, ), [, ], /, * 558 (t 559 (forward-char) 560 token) 561 )))) 562 563 564(defun ebnf-abn-skip-end-of-rule () 565 (let (eor-p) 566 (while (progn 567 ;; end of rule ==> 2 or more consecutive end of lines 568 (setq eor-p (or (> (skip-chars-forward "\r\n" ebnf-limit) 1) 569 eor-p)) 570 ;; skip spaces 571 (skip-chars-forward " \013\t" ebnf-limit) 572 ;; skip comments 573 (and (= (following-char) ?\;) 574 (ebnf-abn-skip-comment)))) 575 (not eor-p))) 576 577 578;; replace the range "\177-\237" (see `ebnf-range-regexp'). 579(defconst ebnf-abn-comment-chars 580 (ebnf-range-regexp "^\n\000-\010\016-\037" ?\177 ?\237)) 581 582 583(defun ebnf-abn-skip-comment () 584 (forward-char) 585 (cond 586 ;; open EPS file 587 ((and ebnf-eps-executing (= (following-char) ?\[)) 588 (ebnf-eps-add-context (ebnf-abn-eps-filename))) 589 ;; close EPS file 590 ((and ebnf-eps-executing (= (following-char) ?\])) 591 (ebnf-eps-remove-context (ebnf-abn-eps-filename))) 592 ;; any other action in comment 593 (t 594 (setq ebnf-action (aref ebnf-comment-table (following-char))) 595 (skip-chars-forward ebnf-abn-comment-chars ebnf-limit)) 596 ) 597 ;; check for a valid end of comment 598 (cond ((>= (point) ebnf-limit) 599 nil) 600 ((= (following-char) ?\n) 601 t) 602 (t 603 (error "Invalid character")) 604 )) 605 606 607(defun ebnf-abn-eps-filename () 608 (forward-char) 609 (ebnf-buffer-substring ebnf-abn-comment-chars)) 610 611 612;; replace the range "\240-\377" (see `ebnf-range-regexp'). 613(defconst ebnf-abn-string-chars 614 (ebnf-range-regexp " -!#-~" ?\240 ?\377)) 615 616 617(defun ebnf-abn-string () 618 (buffer-substring-no-properties 619 (progn 620 (forward-char) 621 (point)) 622 (progn 623 (skip-chars-forward ebnf-abn-string-chars ebnf-limit) 624 (or (= (following-char) ?\") 625 (error "Missing `\"'")) 626 (prog1 627 (point) 628 (forward-char))))) 629 630 631(defun ebnf-abn-character () 632 ;; %[bdx]NNN((-NNN)|(.NNN)+)? 633 (buffer-substring-no-properties 634 (point) 635 (progn 636 (forward-char) 637 (let* ((char (following-char)) 638 (chars (cond ((or (= char ?B) (= char ?b)) "01") 639 ((or (= char ?D) (= char ?d)) "0-9") 640 ((or (= char ?X) (= char ?x)) "0-9A-Fa-f") 641 (t (error "Invalid terminal value"))))) 642 (forward-char) 643 (or (> (skip-chars-forward chars ebnf-limit) 0) 644 (error "Invalid terminal value")) 645 (if (= (following-char) ?-) 646 (progn 647 (forward-char) 648 (or (> (skip-chars-forward chars ebnf-limit) 0) 649 (error "Invalid terminal value range"))) 650 (while (= (following-char) ?.) 651 (forward-char) 652 (or (> (skip-chars-forward chars ebnf-limit) 0) 653 (error "Invalid terminal value"))))) 654 (point)))) 655 656 657;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 658 659 660(provide 'ebnf-abn) 661 662;;; arch-tag: 8d1b3c4d-4226-4393-b9ae-b7ccf07cf779 663;;; ebnf-abn.el ends here 664