1;;; ebnf-iso.el --- parser for ISO EBNF
2
3;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
4;; Free Software Foundation, Inc.
5
6;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8;; Keywords: wp, ebnf, PostScript
9;; Version: 1.8
10
11;; This file is part of GNU Emacs.
12
13;; GNU Emacs is free software; you can redistribute it and/or modify
14;; it under the terms of the GNU General Public License as published by
15;; the Free Software Foundation; either version 2, or (at your option)
16;; any later version.
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
24;; along with GNU Emacs; see the file COPYING.  If not, write to the
25;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26;; Boston, MA 02110-1301, USA.
27
28;;; Commentary:
29
30;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31;;
32;;
33;; This is part of ebnf2ps package.
34;;
35;; This package defines a parser for ISO EBNF.
36;;
37;; See ebnf2ps.el for documentation.
38;;
39;;
40;; ISO EBNF Syntax
41;; ---------------
42;;
43;;	See the URL:
44;;	`http://www.cl.cam.ac.uk/~mgk25/iso-ebnf.html'
45;;	("International Standard of the ISO EBNF Notation").
46;;
47;;
48;; ISO EBNF = syntax rule, {syntax rule};
49;;
50;; syntax rule = meta identifier, '=', definition list, ';';
51;;
52;; definition list = single definition, {'|', single definition};
53;;
54;; single definition = term, {',', term};
55;;
56;; term = factor, ['-', exception];
57;;
58;; exception = factor (* without <meta identifier> *);
59;;
60;; factor = [integer, '*'], primary;
61;;
62;; primary = optional sequence | repeated sequence | special sequence
63;;         | grouped sequence | meta identifier | terminal string
64;;         | empty;
65;;
66;; empty = ;
67;;
68;; optional sequence = '[', definition list, ']';
69;;
70;; repeated sequence = '{', definition list, '}';
71;;
72;; grouped sequence = '(', definition list, ')';
73;;
74;; terminal string = "'", character - "'", {character - "'"}, "'"
75;;                 | '"', character - '"', {character - '"'}, '"';
76;;
77;; special sequence = '?', {character - '?'}, '?';
78;;
79;; meta identifier = letter, { letter | decimal digit | ' ' };
80;;
81;; integer = decimal digit, {decimal digit};
82;;
83;; comment = '(*', {comment symbol}, '*)';
84;;
85;; comment symbol = comment (* <== NESTED COMMENT *)
86;;                | terminal string | special sequence | character;
87;;
88;; letter = ? A-Z a-z ?;
89;;
90;; decimal digit = ? 0-9 ?;
91;;
92;; character = letter | decimal digit
93;;           | ',' | '=' | '|' | '/' | '!' | '*' | '(' | ')' | '[' | ']' | '{'
94;;           | '}' | "'" | '"' | '?' | '-' | ';' | '.' | ' ' | ':' | '+' | '_'
95;;           | '%' | '@' | '&' | '#' | '$' | '<' | '>' | '\' | '^' | '`' | '~';
96;;
97;;
98;; There is also the following alternative representation:
99;;
100;; STANDARD   ALTERNATIVE
101;;    |    ==>   / or !
102;;    [    ==>   (/
103;;    ]    ==>   /)
104;;    {    ==>   (:
105;;    }    ==>   :)
106;;    ;    ==>   .
107;;
108;;
109;; Differences Between ISO EBNF And ebnf2ps ISO EBNF
110;; -------------------------------------------------
111;;
112;; ISO EBNF accepts the characters given by <character> production above,
113;; HORIZONTAL TAB (^I), VERTICAL TAB (^K), NEWLINE (^J or ^M) and FORM FEED
114;; (^L), any other characters are invalid.  But ebnf2ps accepts also the
115;; european 8-bit accentuated characters (from \240 to \377) and underscore
116;; (_).
117;;
118;;
119;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
120
121;;; Code:
122
123
124(require 'ebnf-otz)
125
126
127(defvar ebnf-iso-lex nil
128  "Value returned by `ebnf-iso-lex' function.")
129
130
131(defvar ebnf-no-meta-identifier nil
132  "Used by `ebnf-iso-term' and `ebnf-iso-lex' functions.")
133
134
135;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
136;; Syntactic analyzer
137
138
139;;; ISO EBNF = syntax rule, {syntax rule};
140
141(defun ebnf-iso-parser (start)
142  "ISO EBNF parser."
143  (let ((total (+ (- ebnf-limit start) 1))
144	(bias (1- start))
145	(origin (point))
146	syntax-list token rule)
147    (goto-char start)
148    (setq token (ebnf-iso-lex))
149    (and (eq token 'end-of-input)
150	 (error "Invalid ISO EBNF file format"))
151    (while (not (eq token 'end-of-input))
152      (ebnf-message-float
153       "Parsing...%s%%"
154       (/ (* (- (point) bias) 100.0) total))
155      (setq token (ebnf-iso-syntax-rule token)
156	    rule  (cdr token)
157	    token (car token))
158      (or (ebnf-add-empty-rule-list rule)
159	  (setq syntax-list (cons rule syntax-list))))
160    (goto-char origin)
161    syntax-list))
162
163
164;;; syntax rule = meta identifier, '=', definition list, ';';
165
166(defun ebnf-iso-syntax-rule (token)
167  (let ((header ebnf-iso-lex)
168	(action ebnf-action)
169	body)
170    (setq ebnf-action nil)
171    (or (eq token 'non-terminal)
172	(error "Invalid meta identifier syntax rule"))
173    (or (eq (ebnf-iso-lex) 'equal)
174	(error "Invalid syntax rule: missing `='"))
175    (setq body (ebnf-iso-definition-list))
176    (or (eq (car body) 'period)
177	(error "Invalid syntax rule: missing `;' or `.'"))
178    (setq body (cdr body))
179    (ebnf-eps-add-production header)
180    (cons (ebnf-iso-lex)
181	  (ebnf-make-production header body action))))
182
183
184;;; definition list = single definition, {'|', single definition};
185
186(defun ebnf-iso-definition-list ()
187  (let (body sequence)
188    (while (eq (car (setq sequence (ebnf-iso-single-definition)))
189	       'alternative)
190      (setq sequence (cdr sequence)
191	    body     (cons sequence body)))
192    (ebnf-token-alternative body sequence)))
193
194
195;;; single definition = term, {',', term};
196
197(defun ebnf-iso-single-definition ()
198  (let (token seq term)
199    (while (and (setq term  (ebnf-iso-term (ebnf-iso-lex))
200		      token (car term)
201		      term  (cdr term))
202		(eq token 'catenate))
203      (setq seq (cons term seq)))
204    (cons token
205	  (ebnf-token-sequence (if term
206				   (cons term seq)
207				 seq)))))
208
209
210;;; term = factor, ['-', exception];
211;;;
212;;; exception = factor (* without <meta identifier> *);
213
214(defun ebnf-iso-term (token)
215  (let ((factor (ebnf-iso-factor token)))
216    (if (not (eq (car factor) 'except))
217	;; factor
218	factor
219      ;; factor - exception
220      (let ((ebnf-no-meta-identifier t))
221	(ebnf-token-except (cdr factor) (ebnf-iso-factor (ebnf-iso-lex)))))))
222
223
224;;; factor = [integer, '*'], primary;
225
226(defun ebnf-iso-factor (token)
227  (if (eq token 'integer)
228      (let ((times ebnf-iso-lex))
229	(or (eq (ebnf-iso-lex) 'repeat)
230	    (error "Missing `*'"))
231	(ebnf-token-repeat times (ebnf-iso-primary (ebnf-iso-lex))))
232    (ebnf-iso-primary token)))
233
234
235;;; primary = optional sequence | repeated sequence | special sequence
236;;;         | grouped sequence | meta identifier | terminal string
237;;;         | empty;
238;;;
239;;; empty = ;
240;;;
241;;; optional sequence = '[', definition list, ']';
242;;;
243;;; repeated sequence = '{', definition list, '}';
244;;;
245;;; grouped sequence = '(', definition list, ')';
246;;;
247;;; terminal string = "'", character - "'", {character - "'"}, "'"
248;;;                 | '"', character - '"', {character - '"'}, '"';
249;;;
250;;; special sequence = '?', {character - '?'}, '?';
251;;;
252;;; meta identifier = letter, {letter | decimal digit};
253
254(defun ebnf-iso-primary (token)
255  (let ((primary
256	 (cond
257	  ;; terminal string
258	  ((eq token 'terminal)
259	   (ebnf-make-terminal ebnf-iso-lex))
260	  ;; meta identifier
261	  ((eq token 'non-terminal)
262	   (ebnf-make-non-terminal ebnf-iso-lex))
263	  ;; special sequence
264	  ((eq token 'special)
265	   (ebnf-make-special ebnf-iso-lex))
266	  ;; grouped sequence
267	  ((eq token 'begin-group)
268	   (let ((body (ebnf-iso-definition-list)))
269	     (or (eq (car body) 'end-group)
270		 (error "Missing `)'"))
271	     (cdr body)))
272	  ;; optional sequence
273	  ((eq token 'begin-optional)
274	   (let ((body (ebnf-iso-definition-list)))
275	     (or (eq (car body) 'end-optional)
276		 (error "Missing `]' or `/)'"))
277	     (ebnf-token-optional (cdr body))))
278	  ;; repeated sequence
279	  ((eq token 'begin-zero-or-more)
280	   (let* ((body   (ebnf-iso-definition-list))
281		  (repeat (cdr body)))
282	     (or (eq (car body) 'end-zero-or-more)
283		 (error "Missing `}' or `:)'"))
284	     (ebnf-make-zero-or-more repeat)))
285	  ;; empty
286	  (t
287	   nil)
288	  )))
289    (cons (if primary
290	      (ebnf-iso-lex)
291	    token)
292	  primary)))
293
294
295;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
296;; Lexical analyzer
297
298
299(defconst ebnf-iso-token-table
300  ;; control character & 8-bit character are set to `error'
301  (let ((table (make-vector 256 'error))
302	(char ?\040))
303    ;; printable character
304    (while (< char ?\060)
305      (aset table char 'character)
306      (setq char (1+ char)))
307    ;; digits:
308    (while (< char ?\072)
309      (aset table char 'integer)
310      (setq char (1+ char)))
311    (while (< char ?\101)
312      (aset table char 'character)
313      (setq char (1+ char)))
314    ;; upper case letters:
315    (while (< char ?\133)
316      (aset table char 'non-terminal)
317      (setq char (1+ char)))
318    (while (< char ?\141)
319      (aset table char 'character)
320      (setq char (1+ char)))
321    ;; lower case letters:
322    (while (< char ?\173)
323      (aset table char 'non-terminal)
324      (setq char (1+ char)))
325    (while (< char ?\177)
326      (aset table char 'character)
327      (setq char (1+ char)))
328    ;; European 8-bit accentuated characters:
329    (setq char ?\240)
330    (while (< char ?\400)
331      (aset table char 'non-terminal)
332      (setq char (1+ char)))
333    ;; Override space characters:
334    (aset table ?\013 'space)		; [VT] vertical tab
335    (aset table ?\n   'space)		; [NL] linefeed
336    (aset table ?\r   'space)		; [CR] carriage return
337    (aset table ?\t   'space)		; [HT] horizontal tab
338    (aset table ?\    'space)		; [SP] space
339    ;; Override form feed character:
340    (aset table ?\f 'form-feed)		; [FF] form feed
341    ;; Override other lexical characters:
342    (aset table ?_  'non-terminal)
343    (aset table ?\" 'double-terminal)
344    (aset table ?\' 'single-terminal)
345    (aset table ?\? 'special)
346    (aset table ?*  'repeat)
347    (aset table ?,  'catenate)
348    (aset table ?-  'except)
349    (aset table ?=  'equal)
350    (aset table ?\) 'end-group)
351    table)
352  "Vector used to map characters to a lexical token.")
353
354
355(defun ebnf-iso-initialize ()
356  "Initialize ISO EBNF token table."
357  (if ebnf-iso-alternative-p
358      ;; Override alternative lexical characters:
359      (progn
360	(aset ebnf-iso-token-table ?\( 'left-parenthesis)
361	(aset ebnf-iso-token-table ?\[ 'character)
362	(aset ebnf-iso-token-table ?\] 'character)
363	(aset ebnf-iso-token-table ?\{ 'character)
364	(aset ebnf-iso-token-table ?\} 'character)
365	(aset ebnf-iso-token-table ?|  'character)
366	(aset ebnf-iso-token-table ?\; 'character)
367	(aset ebnf-iso-token-table ?/  'slash)
368	(aset ebnf-iso-token-table ?!  'alternative)
369	(aset ebnf-iso-token-table ?:  'colon)
370	(aset ebnf-iso-token-table ?.  'period))
371    ;; Override standard lexical characters:
372    (aset ebnf-iso-token-table ?\( 'begin-parenthesis)
373    (aset ebnf-iso-token-table ?\[ 'begin-optional)
374    (aset ebnf-iso-token-table ?\] 'end-optional)
375    (aset ebnf-iso-token-table ?\{ 'begin-zero-or-more)
376    (aset ebnf-iso-token-table ?\} 'end-zero-or-more)
377    (aset ebnf-iso-token-table ?|  'alternative)
378    (aset ebnf-iso-token-table ?\; 'period)
379    (aset ebnf-iso-token-table ?/  'character)
380    (aset ebnf-iso-token-table ?!  'character)
381    (aset ebnf-iso-token-table ?:  'character)
382    (aset ebnf-iso-token-table ?.  'character)))
383
384
385;; replace the range "\240-\377" (see `ebnf-range-regexp').
386(defconst ebnf-iso-non-terminal-chars
387  (ebnf-range-regexp " 0-9A-Za-z_" ?\240 ?\377))
388
389
390(defun ebnf-iso-lex ()
391  "Lexical analyzer for ISO EBNF.
392
393Return a lexical token.
394
395See documentation for variable `ebnf-iso-lex'."
396  (if (>= (point) ebnf-limit)
397      'end-of-input
398    (let (token)
399      ;; skip spaces and comments
400      (while (if (> (following-char) 255)
401		 (progn
402		   (setq token 'error)
403		   nil)
404	       (setq token (aref ebnf-iso-token-table (following-char)))
405	       (cond
406		((eq token 'space)
407		 (skip-chars-forward " \013\n\r\t" ebnf-limit)
408		 (< (point) ebnf-limit))
409		((or (eq token 'begin-parenthesis)
410		     (eq token 'left-parenthesis))
411		 (forward-char)
412		 (if (/= (following-char) ?*)
413		     ;; no comment
414		     nil
415		   ;; comment
416		   (ebnf-iso-skip-comment)
417		   t))
418		((eq token 'form-feed)
419		 (forward-char)
420		 (setq ebnf-action 'form-feed))
421		(t nil)
422		)))
423      (cond
424       ;; end of input
425       ((>= (point) ebnf-limit)
426	'end-of-input)
427       ;; error
428       ((eq token 'error)
429	(error "Invalid character"))
430       ;; integer
431       ((eq token 'integer)
432	(setq ebnf-iso-lex (ebnf-buffer-substring "0-9"))
433	'integer)
434       ;; special: ?special?
435       ((eq token 'special)
436	(setq ebnf-iso-lex (concat (and ebnf-special-show-delimiter "?")
437				   (ebnf-string " ->@-~" ?\? "special")
438				   (and ebnf-special-show-delimiter "?")))
439	'special)
440       ;; terminal: "string"
441       ((eq token 'double-terminal)
442	(setq ebnf-iso-lex (ebnf-string " !#-~" ?\" "terminal"))
443	'terminal)
444       ;; terminal: 'string'
445       ((eq token 'single-terminal)
446	(setq ebnf-iso-lex (ebnf-string " -&(-~" ?\' "terminal"))
447	'terminal)
448       ;; non-terminal
449       ((eq token 'non-terminal)
450	(setq ebnf-iso-lex
451	      (ebnf-iso-normalize
452	       (ebnf-trim-right
453		(ebnf-buffer-substring ebnf-iso-non-terminal-chars))))
454	(and ebnf-no-meta-identifier
455	     (error "Exception sequence should not contain a meta identifier"))
456	'non-terminal)
457       ;; begin optional, begin list or begin group
458       ((eq token 'left-parenthesis)
459	(forward-char)
460	(cond ((= (following-char) ?/)
461	       (forward-char)
462	       'begin-optional)
463	      ((= (following-char) ?:)
464	       (forward-char)
465	       'begin-zero-or-more)
466	      (t
467	       'begin-group)
468	      ))
469       ;; end optional or alternative
470       ((eq token 'slash)
471	(forward-char)
472	(if (/= (following-char) ?\))
473	    'alternative
474	  (forward-char)
475	  'end-optional))
476       ;; end list
477       ((eq token 'colon)
478	(forward-char)
479	(if (/= (following-char) ?\))
480	    'character
481	  (forward-char)
482	  'end-zero-or-more))
483       ;; begin group
484       ((eq token 'begin-parenthesis)
485	'begin-group)
486       ;; miscellaneous
487       (t
488	(forward-char)
489	token)
490       ))))
491
492
493;; replace the range "\177-\237" (see `ebnf-range-regexp').
494(defconst ebnf-iso-comment-chars
495  (ebnf-range-regexp "^*(\000-\010\016-\037" ?\177 ?\237))
496
497
498(defun ebnf-iso-skip-comment ()
499  (forward-char)
500  (cond
501   ;; open EPS file
502   ((and ebnf-eps-executing (= (following-char) ?\[))
503    (ebnf-eps-add-context (ebnf-iso-eps-filename)))
504   ;; close EPS file
505   ((and ebnf-eps-executing (= (following-char) ?\]))
506    (ebnf-eps-remove-context (ebnf-iso-eps-filename)))
507   ;; any other action in comment
508   (t
509    (setq ebnf-action (aref ebnf-comment-table (following-char))))
510   )
511  (let ((pair 1))
512    (while (> pair 0)
513      (skip-chars-forward ebnf-iso-comment-chars ebnf-limit)
514      (cond ((>= (point) ebnf-limit)
515	     (error "Missing end of comment: `*)'"))
516	    ((= (following-char) ?*)
517	     (skip-chars-forward "*" ebnf-limit)
518	     (when (= (following-char) ?\))
519	       ;; end of comment
520	       (forward-char)
521	       (setq pair (1- pair))))
522	    ((= (following-char) ?\()
523	     (skip-chars-forward "(" ebnf-limit)
524	     (when (= (following-char) ?*)
525	       ;; beginning of comment
526	       (forward-char)
527	       (setq pair (1+ pair))))
528	    (t
529	     (error "Invalid character"))
530	    ))))
531
532
533(defun ebnf-iso-eps-filename ()
534  (forward-char)
535  (buffer-substring-no-properties
536   (point)
537   (let ((chars (concat ebnf-iso-comment-chars "\n"))
538	 found)
539     (while (not found)
540       (skip-chars-forward chars ebnf-limit)
541       (setq found
542	     (cond ((>= (point) ebnf-limit)
543		    (point))
544		   ((= (following-char) ?*)
545		    (skip-chars-forward "*" ebnf-limit)
546		    (if (/= (following-char) ?\))
547			nil
548		      (backward-char)
549		      (point)))
550		   ((= (following-char) ?\()
551		    (forward-char)
552		    (if (/= (following-char) ?*)
553			nil
554		      (backward-char)
555		      (point)))
556		   (t
557		    (point))
558		   )))
559     found)))
560
561
562(defun ebnf-iso-normalize (str)
563  (if (not ebnf-iso-normalize-p)
564      str
565    (let ((len (length str))
566	  (stri 0)
567	  (spaces 0))
568      ;; count exceeding spaces
569      (while (< stri len)
570	(if (/= (aref str stri) ?\ )
571	    (setq stri (1+ stri))
572	  (setq stri (1+ stri))
573	  (while (and (< stri len) (= (aref str stri) ?\ ))
574	    (setq stri   (1+ stri)
575		  spaces (1+ spaces)))))
576      (if (zerop spaces)
577	  ;; no exceeding space
578	  str
579	;; at least one exceeding space
580	(let ((new (make-string (- len spaces) ?\ ))
581	      (newi 0))
582	  ;; eliminate exceeding spaces
583	  (setq stri 0)
584	  (while (> spaces 0)
585	    (if (/= (aref str stri) ?\ )
586		(progn
587		  (aset new newi (aref str stri))
588		  (setq stri (1+ stri)
589			newi (1+ newi)))
590	      (aset new newi (aref str stri))
591	      (setq stri (1+ stri)
592		    newi (1+ newi))
593	      (while (and (> spaces 0) (= (aref str stri) ?\ ))
594		(setq stri   (1+ stri)
595		      spaces (1- spaces)))))
596	  ;; remaining is normalized
597	  (while (< stri len)
598	    (aset new newi (aref str stri))
599	    (setq stri (1+ stri)
600		  newi (1+ newi)))
601	  new)))))
602
603
604;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
605
606
607(provide 'ebnf-iso)
608
609
610;;; arch-tag: 03315eef-8f64-404a-bf9d-256d42442ee3
611;;; ebnf-iso.el ends here
612