1;;; ebnf-bnf.el --- parser for EBNF
2
3;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
4;;   Free Software Foundation, Inc.
5
6;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8;; Keywords: wp, ebnf, PostScript
9;; Version: 1.9
10
11;; This file is part of GNU Emacs.
12
13;; GNU Emacs is free software; you can redistribute it and/or modify
14;; it under the terms of the GNU General Public License as published by
15;; the Free Software Foundation; either version 2, or (at your option)
16;; any later version.
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
24;; along with GNU Emacs; see the file COPYING.  If not, write to the
25;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26;; Boston, MA 02110-1301, USA.
27
28;;; Commentary:
29
30;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31;;
32;;
33;; This is part of ebnf2ps package.
34;;
35;; This package defines a parser for EBNF.
36;;
37;; See ebnf2ps.el for documentation.
38;;
39;;
40;; EBNF Syntax
41;; -----------
42;;
43;; The current EBNF that ebnf2ps accepts has the following constructions:
44;;
45;;    ;			comment (until end of line)
46;;    A			non-terminal
47;;    "C"		terminal
48;;    ?C?		special
49;;    $A		default non-terminal
50;;    $"C"		default terminal
51;;    $?C?		default special
52;;    A = B.		production (A is the header and B the body)
53;;    C D		sequence (C occurs before D)
54;;    C | D		alternative (C or D occurs)
55;;    A - B		exception (A excluding B, B without any non-terminal)
56;;    n * A		repetition (A repeats at least n (integer) times)
57;;    n * n A		repetition (A repeats exactly n (integer) times)
58;;    n * m A		repetition (A repeats at least n (integer) and at most
59;;			m (integer) times)
60;;    (C)		group (expression C is grouped together)
61;;    [C]		optional (C may or not occurs)
62;;    C+		one or more occurrences of C
63;;    {C}+		one or more occurrences of C
64;;    {C}*		zero or more occurrences of C
65;;    {C}		zero or more occurrences of C
66;;    C / D		equivalent to: C {D C}*
67;;    {C || D}+		equivalent to: C {D C}*
68;;    {C || D}*		equivalent to: [C {D C}*]
69;;    {C || D}		equivalent to: [C {D C}*]
70;;
71;; The EBNF syntax written using the notation above is:
72;;
73;;    EBNF = {production}+.
74;;
75;;    production = non_terminal "=" body ".".   ;; production
76;;
77;;    body = {sequence || "|"}*.                ;; alternative
78;;
79;;    sequence = {exception}*.                  ;; sequence
80;;
81;;    exception = repeat [ "-" repeat].         ;; exception
82;;
83;;    repeat = [ integer "*" [ integer ]] term. ;; repetition
84;;
85;;    term = factor
86;;         | [factor] "+"                       ;; one-or-more
87;;         | [factor] "/" [factor]              ;; one-or-more
88;;         .
89;;
90;;    factor = [ "$" ] "\"" terminal "\""       ;; terminal
91;;           | [ "$" ] non_terminal             ;; non-terminal
92;;           | [ "$" ] "?" special "?"          ;; special
93;;           | "(" body ")"                     ;; group
94;;           | "[" body "]"                     ;; zero-or-one
95;;           | "{" body [ "||" body ] "}+"      ;; one-or-more
96;;           | "{" body [ "||" body ] "}*"      ;; zero-or-more
97;;           | "{" body [ "||" body ] "}"       ;; zero-or-more
98;;           .
99;;
100;;    non_terminal = "[!#%&'*-,0-:<>@-Z\\\\^-z~\\240-\\377]+".
101;;    ;; that is, a valid non_terminal accepts decimal digits, letters (upper
102;;    ;; and lower), 8-bit accentuated characters,
103;;    ;; "!", "#", "%", "&", "'", "*", "+", ",", ":",
104;;    ;; "<", ">", "@", "\", "^", "_", "`" and "~".
105;;
106;;    terminal = "\\([^\"\\]\\|\\\\[ -~\\240-\\377]\\)+".
107;;    ;; that is, a valid terminal accepts any printable character (including
108;;    ;; 8-bit accentuated characters) except `"', as `"' is used to delimit a
109;;    ;; terminal.  Also, accepts escaped characters, that is, a character
110;;    ;; pair starting with `\' followed by a printable character, for
111;;    ;; example: \", \\.
112;;
113;;    special = "[^?\\000-\\010\\012-\\037\\177-\\237]*".
114;;    ;; that is, a valid special accepts any printable character (including
115;;    ;; 8-bit accentuated characters) and tabs except `?', as `?' is used to
116;;    ;; delimit a special.
117;;
118;;    integer = "[0-9]+".
119;;    ;; that is, an integer is a sequence of one or more decimal digits.
120;;
121;;    comment = ";" "[^\\n\\000-\\010\\016-\\037\\177-\\237]*" "\\n".
122;;    ;; that is, a comment starts with the character `;' and terminates at end
123;;    ;; of line.  Also, it only accepts printable characters (including 8-bit
124;;    ;; accentuated characters) and tabs.
125;;
126;;
127;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
128
129;;; Code:
130
131
132(require 'ebnf-otz)
133
134
135(defvar ebnf-bnf-lex nil
136  "Value returned by `ebnf-bnf-lex' function.")
137
138
139;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
140;; Syntactic analyzer
141
142
143;;; EBNF = {production}+.
144
145(defun ebnf-bnf-parser (start)
146  "EBNF parser."
147  (let ((total (+ (- ebnf-limit start) 1))
148	(bias (1- start))
149	(origin (point))
150	prod-list token rule)
151    (goto-char start)
152    (setq token (ebnf-bnf-lex))
153    (and (eq token 'end-of-input)
154	 (error "Invalid EBNF file format"))
155    (while (not (eq token 'end-of-input))
156      (ebnf-message-float
157       "Parsing...%s%%"
158       (/ (* (- (point) bias) 100.0) total))
159      (setq token (ebnf-production token)
160	    rule  (cdr token)
161	    token (car token))
162      (or (ebnf-add-empty-rule-list rule)
163	  (setq prod-list (cons rule prod-list))))
164    (goto-char origin)
165    prod-list))
166
167
168;;; production = non-terminal "=" body ".".
169
170(defun ebnf-production (token)
171  (let ((header ebnf-bnf-lex)
172	(action ebnf-action)
173	body)
174    (setq ebnf-action nil)
175    (or (eq token 'non-terminal)
176	(error "Invalid header production"))
177    (or (eq (ebnf-bnf-lex) 'equal)
178	(error "Invalid production: missing `='"))
179    (setq body (ebnf-body))
180    (or (eq (car body) 'period)
181	(error "Invalid production: missing `.'"))
182    (setq body (cdr body))
183    (ebnf-eps-add-production header)
184    (cons (ebnf-bnf-lex)
185	  (ebnf-make-production header body action))))
186
187
188;;; body = {sequence || "|"}*.
189
190(defun ebnf-body ()
191  (let (body sequence)
192    (while (eq (car (setq sequence (ebnf-sequence))) 'alternative)
193      (setq sequence (cdr sequence)
194	    body     (cons sequence body)))
195    (ebnf-token-alternative body sequence)))
196
197
198;;; sequence = {exception}*.
199
200(defun ebnf-sequence ()
201  (let ((token (ebnf-bnf-lex))
202	seq term)
203    (while (setq term  (ebnf-exception token)
204		 token (car term)
205		 term  (cdr term))
206      (setq seq (cons term seq)))
207    (cons token
208	  (ebnf-token-sequence seq))))
209
210
211;;; exception = repeat [ "-" repeat].
212
213(defun ebnf-exception (token)
214  (let ((term (ebnf-repeat token)))
215    (if (not (eq (car term) 'except))
216	;; repeat
217	term
218      ;; repeat - repeat
219      (let ((exception (ebnf-repeat (ebnf-bnf-lex))))
220	(ebnf-no-non-terminal (cdr exception))
221	(ebnf-token-except (cdr term) exception)))))
222
223
224(defun ebnf-no-non-terminal (node)
225  (and (vectorp node)
226       (let ((kind (ebnf-node-kind node)))
227	 (cond
228	  ((eq kind 'ebnf-generate-non-terminal)
229	   (error "Exception sequence should not contain a non-terminal"))
230	  ((eq kind 'ebnf-generate-repeat)
231	   (ebnf-no-non-terminal (ebnf-node-separator node)))
232	  ((memq kind '(ebnf-generate-optional ebnf-generate-except))
233	   (ebnf-no-non-terminal (ebnf-node-list node)))
234	  ((memq kind '(ebnf-generate-one-or-more ebnf-generate-zero-or-more))
235	   (ebnf-no-non-terminal (ebnf-node-list node))
236	   (ebnf-no-non-terminal (ebnf-node-separator node)))
237	  ((memq kind '(ebnf-generate-alternative ebnf-generate-sequence))
238	   (let ((seq (ebnf-node-list node)))
239	     (while seq
240	       (ebnf-no-non-terminal (car seq))
241	       (setq seq (cdr seq)))))
242	  ))))
243
244
245;;; repeat = [ integer "*" [ integer ]] term.
246
247(defun ebnf-repeat (token)
248  (if (not (eq token 'integer))
249      (ebnf-term token)
250    (let ((times ebnf-bnf-lex)
251	  upper)
252      (or (eq (ebnf-bnf-lex) 'repeat)
253	  (error "Missing `*'"))
254      (setq token (ebnf-bnf-lex))
255      (when (eq token 'integer)
256	(setq upper ebnf-bnf-lex
257	      token (ebnf-bnf-lex)))
258      (ebnf-token-repeat times (ebnf-term token) upper))))
259
260
261;;; term = factor
262;;;      | [factor] "+"                       ;; one-or-more
263;;;      | [factor] "/" [factor]              ;; one-or-more
264;;;      .
265
266(defun ebnf-term (token)
267  (let ((factor (ebnf-factor token)))
268    (and factor
269	 (setq token (ebnf-bnf-lex)))
270    (cond
271     ;; [factor] +
272     ((eq token 'one-or-more)
273      (cons (ebnf-bnf-lex)
274	    (and factor
275		 (let ((kind (ebnf-node-kind factor)))
276		   (cond
277		    ;; { A }+ + ==> { A }+
278		    ;; { A }* + ==> { A }*
279		    ((memq kind '(ebnf-generate-zero-or-more
280				  ebnf-generate-one-or-more))
281		     factor)
282		    ;; [ A ] + ==> { A }*
283		    ((eq kind 'ebnf-generate-optional)
284		     (ebnf-make-zero-or-more (list factor)))
285		    ;; A +
286		    (t
287		     (ebnf-make-one-or-more (list factor)))
288		    )))))
289     ;; [factor] / [factor]
290     ((eq token 'list)
291      (setq token (ebnf-bnf-lex))
292      (let ((sep (ebnf-factor token)))
293	(and sep
294	     (setq factor (or factor (ebnf-make-empty))))
295	(cons (if sep
296		  (ebnf-bnf-lex)
297		token)
298	      (and factor
299		   (ebnf-make-one-or-more factor sep)))))
300     ;; factor
301     (t
302      (cons token factor))
303     )))
304
305
306;;; factor = [ "$" ] "\"" terminal "\""         ;; terminal
307;;;        | [ "$" ] non_terminal               ;; non-terminal
308;;;        | [ "$" ] "?" special "?"            ;; special
309;;;        | "(" body ")"                       ;; group
310;;;        | "[" body "]"                       ;; zero-or-one
311;;;        | "{" body [ "||" body ] "}+"        ;; one-or-more
312;;;        | "{" body [ "||" body ] "}*"        ;; zero-or-more
313;;;        | "{" body [ "||" body ] "}"         ;; zero-or-more
314;;;        .
315
316(defun ebnf-factor (token)
317  (cond
318   ;; terminal
319   ((eq token 'terminal)
320    (ebnf-make-terminal ebnf-bnf-lex))
321   ;; non-terminal
322   ((eq token 'non-terminal)
323    (ebnf-make-non-terminal ebnf-bnf-lex))
324   ;; special
325   ((eq token 'special)
326    (ebnf-make-special ebnf-bnf-lex))
327   ;; group
328   ((eq token 'begin-group)
329    (let ((body (ebnf-body)))
330      (or (eq (car body) 'end-group)
331	  (error "Missing `)'"))
332      (cdr body)))
333   ;; optional
334   ((eq token 'begin-optional)
335    (let ((body (ebnf-body)))
336      (or (eq (car body) 'end-optional)
337	  (error "Missing `]'"))
338      (ebnf-token-optional (cdr body))))
339   ;; list
340   ((eq token 'begin-list)
341    (let* ((body      (ebnf-body))
342	   (token     (car body))
343	   (list-part (cdr body))
344	   sep-part)
345      (and (eq token 'list-separator)
346	   ;; { A || B }
347	   (setq body     (ebnf-body)	; get separator
348		 token    (car body)
349		 sep-part (cdr body)))
350      (cond
351       ;; { A }+
352       ((eq token 'end-one-or-more)
353	(ebnf-make-one-or-more list-part sep-part))
354       ;; { A }*
355       ((eq token 'end-zero-or-more)
356	(ebnf-make-zero-or-more list-part sep-part))
357       (t
358	(error "Missing `}+', `}*' or `}'"))
359       )))
360   ;; no term
361   (t
362    nil)
363   ))
364
365
366;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
367;; Lexical analyzer
368
369
370(defconst ebnf-bnf-token-table (make-vector 256 'error)
371  "Vector used to map characters to a lexical token.")
372
373
374(defun ebnf-bnf-initialize ()
375  "Initialize EBNF token table."
376  ;; control character & control 8-bit character are set to `error'
377  (let ((char ?\040))
378    ;; printable character:
379    (while (< char ?\060)
380      (aset ebnf-bnf-token-table char 'non-terminal)
381      (setq char (1+ char)))
382    ;; digits:
383    (while (< char ?\072)
384      (aset ebnf-bnf-token-table char 'integer)
385      (setq char (1+ char)))
386    ;; printable character:
387    (while (< char ?\177)
388      (aset ebnf-bnf-token-table char 'non-terminal)
389      (setq char (1+ char)))
390    ;; European 8-bit accentuated characters:
391    (setq char ?\240)
392    (while (< char ?\400)
393      (aset ebnf-bnf-token-table char 'non-terminal)
394      (setq char (1+ char)))
395    ;; Override space characters:
396    (aset ebnf-bnf-token-table ?\013 'space) ; [VT] vertical tab
397    (aset ebnf-bnf-token-table ?\n   'space) ; [NL] linefeed
398    (aset ebnf-bnf-token-table ?\r   'space) ; [CR] carriage return
399    (aset ebnf-bnf-token-table ?\t   'space) ; [HT] horizontal tab
400    (aset ebnf-bnf-token-table ?\    'space) ; [SP] space
401    ;; Override form feed character:
402    (aset ebnf-bnf-token-table ?\f 'form-feed) ; [FF] form feed
403    ;; Override other lexical characters:
404    (aset ebnf-bnf-token-table ?\" 'terminal)
405    (aset ebnf-bnf-token-table ?\? 'special)
406    (aset ebnf-bnf-token-table ?\( 'begin-group)
407    (aset ebnf-bnf-token-table ?\) 'end-group)
408    (aset ebnf-bnf-token-table ?*  'repeat)
409    (aset ebnf-bnf-token-table ?-  'except)
410    (aset ebnf-bnf-token-table ?=  'equal)
411    (aset ebnf-bnf-token-table ?\[ 'begin-optional)
412    (aset ebnf-bnf-token-table ?\] 'end-optional)
413    (aset ebnf-bnf-token-table ?\{ 'begin-list)
414    (aset ebnf-bnf-token-table ?|  'alternative)
415    (aset ebnf-bnf-token-table ?\} 'end-list)
416    (aset ebnf-bnf-token-table ?/  'list)
417    (aset ebnf-bnf-token-table ?+  'one-or-more)
418    (aset ebnf-bnf-token-table ?$  'default)
419    ;; Override comment character:
420    (aset ebnf-bnf-token-table ebnf-lex-comment-char 'comment)
421    ;; Override end of production character:
422    (aset ebnf-bnf-token-table ebnf-lex-eop-char     'period)))
423
424
425;; replace the range "\240-\377" (see `ebnf-range-regexp').
426(defconst ebnf-bnf-non-terminal-chars
427  (ebnf-range-regexp "!#%&'*-,0-:<>@-Z\\\\^-z~" ?\240 ?\377))
428
429
430(defun ebnf-bnf-lex ()
431  "Lexical analyzer for EBNF.
432
433Return a lexical token.
434
435See documentation for variable `ebnf-bnf-lex'."
436  (if (>= (point) ebnf-limit)
437      'end-of-input
438    (let (token)
439      ;; skip spaces and comments
440      (while (if (> (following-char) 255)
441		 (progn
442		   (setq token 'error)
443		   nil)
444	       (setq token (aref ebnf-bnf-token-table (following-char)))
445	       (cond
446		((eq token 'space)
447		 (skip-chars-forward " \013\n\r\t" ebnf-limit)
448		 (< (point) ebnf-limit))
449		((eq token 'comment)
450		 (ebnf-bnf-skip-comment))
451		((eq token 'form-feed)
452		 (forward-char)
453		 (setq ebnf-action 'form-feed))
454		(t nil)
455		)))
456      (setq ebnf-default-p nil)
457      (cond
458       ;; end of input
459       ((>= (point) ebnf-limit)
460	'end-of-input)
461       ;; error
462       ((eq token 'error)
463	(error "Invalid character"))
464       ;; default
465       ((eq token 'default)
466	(forward-char)
467	(if (memq (aref ebnf-bnf-token-table (following-char))
468		  '(terminal non-terminal special))
469	    (prog1
470		(ebnf-bnf-lex)
471	      (setq ebnf-default-p t))
472	  (error "Invalid `default' element")))
473       ;; integer
474       ((eq token 'integer)
475	(setq ebnf-bnf-lex (ebnf-buffer-substring "0-9"))
476	'integer)
477       ;; special: ?special?
478       ((eq token 'special)
479	(setq ebnf-bnf-lex (concat (and ebnf-special-show-delimiter "?")
480				   (ebnf-string " ->@-~" ?\? "special")
481				   (and ebnf-special-show-delimiter "?")))
482	'special)
483       ;; terminal: "string"
484       ((eq token 'terminal)
485	(setq ebnf-bnf-lex (ebnf-unescape-string (ebnf-get-string)))
486	'terminal)
487       ;; non-terminal or terminal
488       ((eq token 'non-terminal)
489	(setq ebnf-bnf-lex (ebnf-buffer-substring ebnf-bnf-non-terminal-chars))
490	(let ((case-fold-search ebnf-case-fold-search)
491	      match)
492	  (if (and ebnf-terminal-regexp
493		   (setq match (string-match ebnf-terminal-regexp
494					     ebnf-bnf-lex))
495		   (zerop match)
496		   (= (match-end 0) (length ebnf-bnf-lex)))
497	      'terminal
498	    'non-terminal)))
499       ;; end of list: }+, }*, }
500       ((eq token 'end-list)
501	(forward-char)
502	(cond
503	 ((= (following-char) ?+)
504	  (forward-char)
505	  'end-one-or-more)
506	 ((= (following-char) ?*)
507	  (forward-char)
508	  'end-zero-or-more)
509	 (t
510	  'end-zero-or-more)
511	 ))
512       ;; alternative: |, ||
513       ((eq token 'alternative)
514	(forward-char)
515	(if (/= (following-char) ?|)
516	    'alternative
517	  (forward-char)
518	  'list-separator))
519       ;; miscellaneous: {, (, ), [, ], ., =, /, +, -, *
520       (t
521	(forward-char)
522	token)
523       ))))
524
525
526;; replace the range "\177-\237" (see `ebnf-range-regexp').
527(defconst ebnf-bnf-comment-chars
528  (ebnf-range-regexp "^\n\000-\010\016-\037" ?\177 ?\237))
529
530
531(defun ebnf-bnf-skip-comment ()
532  (forward-char)
533  (cond
534   ;; open EPS file
535   ((and ebnf-eps-executing (= (following-char) ?\[))
536    (ebnf-eps-add-context (ebnf-bnf-eps-filename)))
537   ;; close EPS file
538   ((and ebnf-eps-executing (= (following-char) ?\]))
539    (ebnf-eps-remove-context (ebnf-bnf-eps-filename)))
540   ;; any other action in comment
541   (t
542    (setq ebnf-action (aref ebnf-comment-table (following-char)))
543    (skip-chars-forward ebnf-bnf-comment-chars ebnf-limit))
544   )
545  ;; check for a valid end of comment
546  (cond ((>= (point) ebnf-limit)
547	 nil)
548	((= (following-char) ?\n)
549	 (forward-char)
550	 t)
551	(t
552	 (error "Invalid character"))
553	))
554
555
556(defun ebnf-bnf-eps-filename ()
557  (forward-char)
558  (ebnf-buffer-substring ebnf-bnf-comment-chars))
559
560
561(defun ebnf-unescape-string (str)
562  (let* ((len (length str))
563	 (size (1- len))
564	 (istr 0)
565	 (n-esc 0))
566    ;; count number of escapes
567    (while (< istr size)
568      (setq istr (+ istr
569		    (if (= (aref str istr) ?\\)
570			(progn
571			  (setq n-esc (1+ n-esc))
572			  2)
573		      1))))
574    (if (zerop n-esc)
575	;; no escapes
576	str
577      ;; at least one escape
578      (let ((new (make-string (- len n-esc) ?\ ))
579	    (inew 0))
580	;; eliminate all escapes
581	(setq istr 0)
582	(while (> n-esc 0)
583	  (and (= (aref str istr) ?\\)
584	       (setq istr  (1+ istr)
585		     n-esc (1- n-esc)))
586	  (aset new inew (aref str istr))
587	  (setq inew (1+ inew)
588		istr (1+ istr)))
589	;; remaining string has no escape
590	(while (< istr len)
591	  (aset new inew (aref str istr))
592	  (setq inew (1+ inew)
593		istr (1+ istr)))
594	new))))
595
596
597;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
598
599
600(provide 'ebnf-bnf)
601
602
603;;; arch-tag: 3b1834d3-8367-475b-80d5-8e0bbd00ce50
604;;; ebnf-bnf.el ends here
605