1;;; ebnf-dtd.el --- parser for DTD (Data Type Description for XML)
2
3;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
4;;   Free Software Foundation, Inc.
5
6;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8;; Keywords: wp, ebnf, PostScript
9;; Version: 1.0
10
11;; This file is part of GNU Emacs.
12
13;; GNU Emacs is free software; you can redistribute it and/or modify
14;; it under the terms of the GNU General Public License as published by
15;; the Free Software Foundation; either version 2, or (at your option)
16;; any later version.
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
24;; along with GNU Emacs; see the file COPYING.  If not, write to the
25;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26;; Boston, MA 02110-1301, USA.
27
28;;; Commentary:
29
30;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31;;
32;;
33;; This is part of ebnf2ps package.
34;;
35;; This package defines a parser for DTD (Data Type Description for XML).
36;;
37;; See ebnf2ps.el for documentation.
38;;
39;;
40;; DTD Syntax
41;; ----------
42;;
43;;	See the URLs:
44;;	`http://www.w3.org/TR/2004/REC-xml-20040204/'
45;;	(Extensible Markup Language (XML) 1.0 (Third Edition))
46;;	`http://www.w3.org/TR/html40/'
47;;	(HTML 4.01 Specification)
48;;	`http://www.w3.org/TR/NOTE-html-970421'
49;;	(HTML DTD with support for Style Sheets)
50;;
51;;
52;; /* Document */
53;;
54;; document ::= prolog element Misc*
55;; /* Note that *only* the prolog will be parsed */
56;;
57;;
58;; /* Characters */
59;;
60;; Char ::= #x9 | #xA | #xD
61;;        | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
62;; /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
63;;
64;; /* NOTE:
65;;
66;;    Document authors are encouraged to avoid "compatibility characters", as
67;;    defined in section 6.8 of [Unicode] (see also D21 in section 3.6 of
68;;    [Unicode3]). The characters defined in the following ranges are also
69;;    discouraged. They are either control characters or permanently undefined
70;;    Unicode characters:
71;;
72;;    [#x7F-#x84],      [#x86-#x9F],      [#xFDD0-#xFDDF],
73;;    [#1FFFE-#x1FFFF], [#2FFFE-#x2FFFF], [#3FFFE-#x3FFFF],
74;;    [#4FFFE-#x4FFFF], [#5FFFE-#x5FFFF], [#6FFFE-#x6FFFF],
75;;    [#7FFFE-#x7FFFF], [#8FFFE-#x8FFFF], [#9FFFE-#x9FFFF],
76;;    [#AFFFE-#xAFFFF], [#BFFFE-#xBFFFF], [#CFFFE-#xCFFFF],
77;;    [#DFFFE-#xDFFFF], [#EFFFE-#xEFFFF], [#FFFFE-#xFFFFF],
78;;    [#10FFFE-#x10FFFF]. */
79;;
80;;
81;; /* White Space */
82;;
83;; S ::= (#x20 | #x9 | #xD | #xA)+
84;;
85;;
86;; /* Names and Tokens */
87;;
88;; NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
89;;            | CombiningChar | Extender
90;;
91;; Name ::= (Letter | '_' | ':') (NameChar)*
92;;
93;; Names ::= Name (#x20 Name)*
94;;
95;; Nmtoken ::= (NameChar)+
96;;
97;; Nmtokens ::= Nmtoken (#x20 Nmtoken)*
98;;
99;;
100;; /* Literals */
101;;
102;; EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
103;;               | "'" ([^%&'] | PEReference | Reference)* "'"
104;;
105;; AttValue ::= '"' ([^<&"] | Reference)* '"'
106;;            | "'" ([^<&'] | Reference)* "'"
107;;
108;; SystemLiteral ::= ('"' [^"]* '"')
109;;                 | ("'" [^']* "'")
110;;
111;; PubidLiteral ::= '"' PubidChar* '"'
112;;                | "'" (PubidChar - "'")* "'"
113;;
114;; PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
115;;
116;; /* NOTE:
117;;
118;;    Although the EntityValue production allows the definition of a general
119;;    entity consisting of a single explicit < in the literal (e.g., <!ENTITY
120;;    mylt "<">), it is strongly advised to avoid this practice since any
121;;    reference to that entity will cause a well-formedness error. */
122;;
123;;
124;; /* Character Data */
125;;
126;; CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
127;;
128;;
129;; /* Comments */
130;;
131;; Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
132;;
133;;
134;; /* Processing Instructions */
135;;
136;; PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
137;;
138;; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
139;;
140;;
141;; /* CDATA Sections */
142;;
143;; CDSect ::= CDStart CData CDEnd
144;;
145;; CDStart ::= '<![CDATA['
146;;
147;; CData ::= (Char* - (Char* ']]>' Char*))
148;;
149;; CDEnd ::= ']]>'
150;;
151;;
152;; /* Prolog */
153;;
154;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
155;;
156;; XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
157;;
158;; VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
159;;
160;; Eq ::= S? '=' S?
161;;
162;; VersionNum ::= '1.0'
163;;
164;; Misc ::= Comment | PI | S
165;;
166;;
167;; /* Document Type Definition */
168;;
169;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
170;;                 ('[' intSubset ']' S?)? '>'
171;;               [VC: Root Element Type]
172;;               [WFC: External Subset]
173;;
174;; DeclSep ::= PEReference | S
175;;           [WFC: PE Between Declarations]
176;;
177;; intSubset ::= (markupdecl | DeclSep)*
178;;
179;; markupdecl ::= elementdecl | AttlistDecl | EntityDecl
180;;              | NotationDecl | PI | Comment
181;;              [VC: Proper Declaration/PE Nesting]
182;;              [WFC: PEs in Internal Subset]
183;;
184;;
185;; /* External Subset */
186;;
187;; extSubset ::= TextDecl? extSubsetDecl
188;;
189;; extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
190;;
191;;
192;; /* Standalone Document Declaration */
193;;
194;; SDDecl ::= S 'standalone' Eq
195;;            (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
196;;          [VC: Standalone Document Declaration]
197;;
198;;
199;; /* Element */
200;;
201;; element ::= EmptyElemTag | STag content ETag
202;;           [WFC: Element Type Match]
203;;           [VC: Element Valid]
204;;
205;;
206;; /* Start-tag */
207;;
208;; STag ::= '<' Name (S Attribute)* S? '>'
209;;        [WFC: Unique Att Spec]
210;;
211;; Attribute ::= Name Eq AttValue
212;;             [VC: Attribute Value Type]
213;;             [WFC: No External Entity References]
214;;             [WFC: No < in Attribute Values]
215;;
216;;
217;; /* End-tag */
218;;
219;; ETag ::= '</' Name S? '>'
220;;
221;;
222;; /* Content of Elements */
223;;
224;; content ::= CharData?
225;;             ((element | Reference | CDSect | PI | Comment) CharData?)*
226;;
227;;
228;; /* Tags for Empty Elements */
229;;
230;; EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
231;;                [WFC: Unique Att Spec]
232;;
233;;
234;; /* Element Type Declaration */
235;;
236;; elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
237;;               [VC: Unique Element Type Declaration]
238;;
239;; contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
240;;
241;;
242;; /* Element-content Models */
243;;
244;; children ::= (choice | seq) ('?' | '*' | '+')?
245;;
246;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
247;;
248;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
249;;          [VC: Proper Group/PE Nesting]
250;;
251;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
252;;       [VC: Proper Group/PE Nesting]
253;;
254;;
255;; /* Mixed-content Declaration */
256;;
257;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
258;;         | '(' S? '#PCDATA' S? ')'
259;;         [VC: Proper Group/PE Nesting]
260;;         [VC: No Duplicate Types]
261;;
262;;
263;; /* Attribute-list Declaration */
264;;
265;; AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
266;;
267;; AttDef ::= S Name S AttType S DefaultDecl
268;;
269;;
270;; /* Attribute Types */
271;;
272;; AttType ::= StringType | TokenizedType | EnumeratedType
273;;
274;; StringType ::= 'CDATA'
275;;
276;; TokenizedType ::= 'ID'       [VC: ID]
277;;                              [VC: One ID per Element Type]
278;;                              [VC: ID Attribute Default]
279;;                 | 'IDREF'    [VC: IDREF]
280;;                 | 'IDREFS'   [VC: IDREF]
281;;                 | 'ENTITY'   [VC: Entity Name]
282;;                 | 'ENTITIES' [VC: Entity Name]
283;;                 | 'NMTOKEN'  [VC: Name Token]
284;;                 | 'NMTOKENS' [VC: Name Token]
285;;
286;;
287;; /* Enumerated Attribute Types */
288;;
289;; EnumeratedType ::= NotationType | Enumeration
290;;
291;; NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
292;;                [VC: Notation Attributes]
293;;                [VC: One Notation Per Element Type]
294;;                [VC: No Notation on Empty Element]
295;;                [VC: No Duplicate Tokens]
296;;
297;; Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
298;;               [VC: Enumeration]
299;;               [VC: No Duplicate Tokens]
300;;
301;;
302;; /* Attribute Defaults */
303;;
304;; DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
305;;               | (('#FIXED' S)? AttValue)
306;;               [VC: Required Attribute]
307;;               [VC: Attribute Default Value Syntactically Correct]
308;;               [WFC: No < in Attribute Values]
309;;               [VC: Fixed Attribute Default]
310;;
311;;
312;; /* Conditional Section */
313;;
314;; conditionalSect ::= includeSect | ignoreSect
315;;
316;; includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
317;;               [VC: Proper Conditional Section/PE Nesting]
318;;
319;; ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
320;;              [VC: Proper Conditional Section/PE Nesting]
321;;
322;; ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
323;;
324;; Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
325;;
326;;
327;; /* Character Reference */
328;;
329;; CharRef ::= '&#' [0-9]+ ';'
330;;           | '&#x' [0-9a-fA-F]+ ';'
331;;           [WFC: Legal Character]
332;;
333;;
334;; /* Entity Reference */
335;;
336;; Reference ::= EntityRef | CharRef
337;;
338;; EntityRef ::= '&' Name ';'
339;;             [WFC: Entity Declared]
340;;             [VC: Entity Declared]
341;;             [WFC: Parsed Entity]
342;;             [WFC: No Recursion]
343;;
344;; PEReference ::= '%' Name ';'
345;;               [VC: Entity Declared]
346;;               [WFC: No Recursion]
347;;               [WFC: In DTD]
348;;
349;;
350;; /* Entity Declaration */
351;;
352;; EntityDecl ::= GEDecl | PEDecl
353;;
354;; GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
355;;
356;; PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
357;;
358;; EntityDef ::= EntityValue | (ExternalID NDataDecl?)
359;;
360;; PEDef ::= EntityValue | ExternalID
361;;
362;;
363;; /* External Entity Declaration */
364;;
365;; ExternalID ::= 'SYSTEM' S SystemLiteral
366;;              | 'PUBLIC' S PubidLiteral S SystemLiteral
367;;
368;; NDataDecl ::= S 'NDATA' S Name
369;;             [VC: Notation Declared]
370;;
371;;
372;; /* Text Declaration */
373;;
374;; TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
375;;
376;;
377;; /* Well-Formed External Parsed Entity */
378;;
379;; extParsedEnt ::= TextDecl? content
380;;
381;;
382;; /* Encoding Declaration */
383;;
384;; EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
385;;
386;; EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
387;; /* Encoding name contains only Latin characters */
388;;
389;;
390;; /* Notation Declarations */
391;;
392;; NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
393;;                [VC: Unique Notation Name]
394;;
395;; PublicID ::= 'PUBLIC' S PubidLiteral
396;;
397;;
398;; /* Characters */
399;;
400;; Letter ::= BaseChar | Ideographic
401;;
402;; BaseChar ::= [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6]
403;;            | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131]
404;;            | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E]
405;;            | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5]
406;;            | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1]
407;;            | #x0386          | [#x0388-#x038A] | #x038C
408;;            | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6]
409;;            | #x03DA          | #x03DC          | #x03DE
410;;            | #x03E0          | [#x03E2-#x03F3] | [#x0401-#x040C]
411;;            | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481]
412;;            | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC]
413;;            | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9]
414;;            | [#x0531-#x0556] | #x0559          | [#x0561-#x0586]
415;;            | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A]
416;;            | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE]
417;;            | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5
418;;            | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D
419;;            | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990]
420;;            | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2
421;;            | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1]
422;;            | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10]
423;;            | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33]
424;;            | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C]
425;;            | #x0A5E          | [#x0A72-#x0A74] | [#x0A85-#x0A8B]
426;;            | #x0A8D          | [#x0A8F-#x0A91] | [#x0A93-#x0AA8]
427;;            | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9]
428;;            | #x0ABD          | #x0AE0          | [#x0B05-#x0B0C]
429;;            | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30]
430;;            | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D
431;;            | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A]
432;;            | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A]
433;;            | #x0B9C          | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4]
434;;            | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9]
435;;            | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28]
436;;            | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61]
437;;            | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8]
438;;            | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE
439;;            | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10]
440;;            | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61]
441;;            | [#x0E01-#x0E2E] | #x0E30          | [#x0E32-#x0E33]
442;;            | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84
443;;            | [#x0E87-#x0E88] | #x0E8A          | #x0E8D
444;;            | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3]
445;;            | #x0EA5          | #x0EA7          | [#x0EAA-#x0EAB]
446;;            | [#x0EAD-#x0EAE] | #x0EB0          | [#x0EB2-#x0EB3]
447;;            | #x0EBD          | [#x0EC0-#x0EC4] | [#x0F40-#x0F47]
448;;            | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6]
449;;            | #x1100          | [#x1102-#x1103] | [#x1105-#x1107]
450;;            | #x1109          | [#x110B-#x110C] | [#x110E-#x1112]
451;;            | #x113C          | #x113E          | #x1140
452;;            | #x114C          | #x114E          | #x1150
453;;            | [#x1154-#x1155] | #x1159          | [#x115F-#x1161]
454;;            | #x1163          | #x1165          | #x1167
455;;            | #x1169          | [#x116D-#x116E] | [#x1172-#x1173]
456;;            | #x1175          | #x119E          | #x11A8
457;;            | #x11AB          | [#x11AE-#x11AF] | [#x11B7-#x11B8]
458;;            | #x11BA          | [#x11BC-#x11C2] | #x11EB
459;;            | #x11F0          | #x11F9          | [#x1E00-#x1E9B]
460;;            | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D]
461;;            | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57]
462;;            | #x1F59          | #x1F5B          | #x1F5D
463;;            | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC]
464;;            | #x1FBE          | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC]
465;;            | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC]
466;;            | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126
467;;            | [#x212A-#x212B] | #x212E          | [#x2180-#x2182]
468;;            | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C]
469;;            | [#xAC00-#xD7A3]
470;;
471;; Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
472;;
473;; CombiningChar ::= [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486]
474;;                 | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD]
475;;                 | #x05BF          | [#x05C1-#x05C2] | #x05C4
476;;                 | [#x064B-#x0652] | #x0670          | [#x06D6-#x06DC]
477;;                 | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8]
478;;                 | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C
479;;                 | [#x093E-#x094C] | #x094D          | [#x0951-#x0954]
480;;                 | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC
481;;                 | #x09BE          | #x09BF          | [#x09C0-#x09C4]
482;;                 | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7
483;;                 | [#x09E2-#x09E3] | #x0A02          | #x0A3C
484;;                 | #x0A3E          | #x0A3F          | [#x0A40-#x0A42]
485;;                 | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71]
486;;                 | [#x0A81-#x0A83] | #x0ABC          | [#x0ABE-#x0AC5]
487;;                 | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03]
488;;                 | #x0B3C          | [#x0B3E-#x0B43] | [#x0B47-#x0B48]
489;;                 | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83]
490;;                 | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD]
491;;                 | #x0BD7          | [#x0C01-#x0C03] | [#x0C3E-#x0C44]
492;;                 | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56]
493;;                 | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8]
494;;                 | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03]
495;;                 | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D]
496;;                 | #x0D57          | #x0E31          | [#x0E34-#x0E3A]
497;;                 | [#x0E47-#x0E4E] | #x0EB1          | [#x0EB4-#x0EB9]
498;;                 | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19]
499;;                 | #x0F35          | #x0F37          | #x0F39
500;;                 | #x0F3E          | #x0F3F          | [#x0F71-#x0F84]
501;;                 | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97
502;;                 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9
503;;                 | [#x20D0-#x20DC] | #x20E1          | [#x302A-#x302F]
504;;                 | #x3099          | #x309A
505;;
506;; Digit ::= [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9]
507;;         | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F]
508;;         | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF]
509;;         | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F]
510;;         | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]
511;;
512;; Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6
513;;            | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]
514;;
515;;
516;; NOTES
517;; -----
518;;
519;; At moment, only the `<!ELEMENT' generates a syntactic chart.  The
520;; `<!ATTLIST', `<!NOTATION' and `<!ENTITY' are syntacticly checked but they
521;; don't generate a syntactic chart.
522;;
523;; Besides the syntax above, ebnf-dtd also accepts a `pure' dtd file.  An
524;; example of a `pure' dtd file is:
525;;
526;;    <?xml version="1.0" encoding="UTF-8"?>
527;;    <!--
528;;    The main element.
529;;    -->
530;;    <!ELEMENT workflow (registers?, trigger-functions?, initial-actions,
531;;                        steps, splits?, joins?)>
532;;    <!--
533;;    An action that can be executed (id must be unique among actions for
534;;    the enclosing step).
535;;    Used in: actions
536;;    -->
537;;    <!ELEMENT action (restrict-to, validators?, pre-functions?, results,
538;;                      post-functions?)>
539;;    <!ATTLIST action
540;;    	id CDATA #REQUIRED
541;;    	name CDATA #REQUIRED
542;;    >
543;;
544;;
545;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
546
547;;; Code:
548
549
550(require 'ebnf-otz)
551
552
553(defvar ebnf-dtd-lex nil
554  "Value returned by `ebnf-dtd-lex' function.")
555
556
557;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
558;; Syntactic analyzer
559
560
561;;; document ::= prolog element Misc*
562;;; /* Note that *only* the prolog will be parsed */
563
564(defun ebnf-dtd-parser (start)
565  "DTD parser."
566  (let ((total (+ (- ebnf-limit start) 1))
567	(bias (1- start))
568	(origin (point))
569	rule-list token rule the-end)
570    (goto-char start)
571    (setq token (ebnf-dtd-lex))
572    (and (eq token 'end-of-input)
573	 (error "Empty DTD file"))
574    (setq token (ebnf-dtd-prolog token))
575    (unless (eq (car token) 'end-prolog)
576      (setq the-end (cdr token)
577	    token   (car token))
578      (while (not (eq token the-end))
579	(ebnf-message-float
580	 "Parsing...%s%%"
581	 (/ (* (- (point) bias) 100.0) total))
582	(setq token (ebnf-dtd-intsubset token)
583	      rule  (cdr token)
584	      token (car token))
585	(or (null rule)
586	    (ebnf-add-empty-rule-list rule)
587	    (setq rule-list (cons rule rule-list))))
588      (or (eq the-end 'end-of-input)
589	  (eq (ebnf-dtd-lex) 'end-decl)
590	  (error "Missing end of DOCTYPE"))
591      ;; adjust message, 'cause *only* prolog will be parsed
592      (ebnf-message-float "Parsing...%s%%" 100.0))
593    (goto-char origin)
594    rule-list))
595
596
597;;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
598;;;
599;;; XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
600;;;
601;;; VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
602;;;
603;;; Eq ::= S? '=' S?
604;;;
605;;; VersionNum ::= '1.0'
606;;;
607;;; Misc ::= Comment | PI | S
608;;;
609;;; EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
610;;;
611;;; EncName ::= [A-Za-z] ([-A-Za-z0-9._])*
612;;; /* Encoding name contains only Latin characters */
613;;;
614;;; SDDecl ::= S 'standalone' Eq
615;;;            (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
616;;;
617;;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
618;;;                 ('[' intSubset ']' S?)? '>'
619
620
621(defun ebnf-dtd-prolog (token)
622  (when (and (eq token 'begin-pi) (string= ebnf-dtd-lex "xml"))
623    ;; version = "1.0"
624    (setq token (ebnf-dtd-attribute (ebnf-dtd-lex) 'version-attr
625				    "^1\\.0$" "XML version"))
626    ;; ( encoding = "encoding name" )?
627    (setq token (ebnf-dtd-attribute-optional
628		 token 'encoding-attr
629		 "^[A-Za-z][-A-Za-z0-9._]*$" "XML encoding"))
630    ;; ( standalone = ( "yes" | "no" ) )?
631    (setq token (ebnf-dtd-attribute-optional
632		 token 'standalone-attr
633		 "^yes|no$" "XML standalone"))
634    (or (eq token 'end-pi)
635	(error "Missing end of XML processing instruction")))
636  ;; processing instructions
637  (setq token (ebnf-dtd-pi (ebnf-dtd-lex)))
638  (cond
639   ;; DOCTYPE
640   ((eq token 'doctype-decl)
641    (or (eq (ebnf-dtd-lex) 'name)
642	(error "Document type name is missing"))
643    (cons (if (eq (ebnf-dtd-externalid) 'begin-subset)
644	      (ebnf-dtd-lex)
645	    'end-prolog)
646	  'end-subset))
647   ((memq token '(element-decl attlist-decl entity-decl notation-decl))
648    (cons token 'end-of-input))
649   (t
650    '(end-prolog . end-subset))
651   ))
652
653
654(defun ebnf-dtd-attribute (token attr match attr-name)
655  (or (eq token attr)
656      (error "%s attribute is missing" attr-name))
657  (ebnf-dtd-attribute-optional token attr match attr-name))
658
659
660(defun ebnf-dtd-attribute-optional (token attr match attr-name)
661  (when (eq token attr)
662    (or (and (eq (ebnf-dtd-lex) 'equal)
663	     (eq (ebnf-dtd-lex) 'string)
664	     (string-match match ebnf-dtd-lex))
665	(error "XML %s attribute is invalid" attr-name))
666    (setq token (ebnf-dtd-lex)))
667  token)
668
669
670;;; ExternalID ::= 'SYSTEM' S SystemLiteral
671;;;              | 'PUBLIC' S PubidLiteral S SystemLiteral
672
673
674(defun ebnf-dtd-externalid (&optional token)
675  (let ((must-have token))
676    (or token (setq token (ebnf-dtd-lex)))
677    (cond ((eq token 'system)
678	   (ebnf-dtd-systemliteral))
679	  ((eq token 'public)
680	   (ebnf-dtd-pubidliteral)
681	   (ebnf-dtd-systemliteral))
682	  (must-have
683	   (error "Missing `SYSTEM' or `PUBLIC' in external id"))
684	  (t
685	   token))))
686
687
688;;; SystemLiteral ::= ('"' [^"]* '"')
689;;;                 | ("'" [^']* "'")
690
691
692(defun ebnf-dtd-systemliteral ()
693  (or (eq (ebnf-dtd-lex) 'string)
694      (error "System identifier is invalid"))
695  (ebnf-dtd-lex))
696
697
698;;; PubidLiteral ::= '"' PubidChar* '"'
699;;;                | "'" (PubidChar - "'")* "'"
700;;;
701;;; PubidChar ::= [-'()+,./:=?;!*#@$_%\n\r a-zA-Z0-9]
702
703
704(defun ebnf-dtd-pubidliteral ()
705  (or (and (eq (ebnf-dtd-lex) 'string)
706	   (string-match "^[-'()+,./:=?;!*#@$_%\n\r a-zA-Z0-9]*$"
707			 ebnf-dtd-lex))
708      (error "Public identifier is invalid")))
709
710
711;;; PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
712;;;
713;;; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
714
715
716(defun ebnf-dtd-pi (token)
717  (while (eq token 'begin-pi)
718    (and (string-match "^[xX][mM][lL]$" ebnf-dtd-lex)
719	 (error "Processing instruction name can not be `XML'"))
720    (while (not (eq (ebnf-dtd-lex) 'end-pi)))
721    (setq token (ebnf-dtd-lex)))
722  token)
723
724
725;;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
726;;;                 ('[' intSubset ']' S?)? '>'
727;;;
728;;; intSubset ::= (markupdecl | DeclSep)*
729;;;
730;;; DeclSep ::= PEReference | S
731;;;
732;;; markupdecl ::= elementdecl | AttlistDecl | EntityDecl
733;;;              | NotationDecl | PI | Comment
734
735
736(defun ebnf-dtd-intsubset (token)
737  ;; PI - Processing Instruction
738  (and (eq token 'begin-pi)
739       (setq token (ebnf-dtd-pi token)))
740  (cond
741   ((memq token '(end-subset end-of-input))
742    (cons token nil))
743   ((eq token 'pe-ref)
744    (cons (ebnf-dtd-lex) nil))		; annotation
745   ((eq token 'element-decl)
746    (ebnf-dtd-elementdecl))		; rule
747   ((eq token 'attlist-decl)
748    (ebnf-dtd-attlistdecl))		; annotation
749   ((eq token 'entity-decl)
750    (ebnf-dtd-entitydecl))		; annotation
751   ((eq token 'notation-decl)
752    (ebnf-dtd-notationdecl))		; annotation
753   (t
754    (error "Invalid DOCTYPE element"))
755   ))
756
757
758;;; elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
759;;;
760;;; contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
761;;;
762;;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
763;;;         | '(' S? '#PCDATA' S? ')'
764;;;
765;;; children ::= (choice | seq) ('?' | '*' | '+')?
766;;;
767;;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
768;;;
769;;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
770;;;
771;;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
772
773
774(defun ebnf-dtd-elementdecl ()
775  (let ((action ebnf-action)
776	name token body)
777    (setq ebnf-action nil)
778    (or (eq (ebnf-dtd-lex) 'name)
779	(error "Invalid ELEMENT name"))
780    (setq name  ebnf-dtd-lex
781	  token (ebnf-dtd-lex)
782	  body  (cond ((memq token '(empty any))
783		       (let ((term (ebnf-make-terminal ebnf-dtd-lex)))
784			 (cons (ebnf-dtd-lex) term)))
785		      ((eq token 'begin-group)
786		       (setq token (ebnf-dtd-lex))
787		       (if (eq token 'pcdata)
788			   (ebnf-dtd-mixed)
789			 (ebnf-dtd-children token)))
790		      (t
791		       (error "Invalid ELEMENT content"))
792		      ))
793    (or (eq (car body) 'end-decl)
794	(error "Missing `>' in ELEMENT declaration"))
795    (ebnf-eps-add-production name)
796    (cons (ebnf-dtd-lex)
797	  (ebnf-make-production name (cdr body) action))))
798
799
800;;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
801;;;         | '(' S? '#PCDATA' S? ')'
802
803
804(defun ebnf-dtd-mixed ()
805  (let* ((alt             (cons (ebnf-make-terminal ebnf-dtd-lex) nil))
806	 (token           (ebnf-dtd-lex))
807	 (has-alternative (eq token 'alternative)))
808    (while (eq token 'alternative)
809      (or (eq (ebnf-dtd-lex) 'name)
810	  (error "Invalid name"))
811      (setq alt   (cons ebnf-dtd-lex alt)
812	    token (ebnf-dtd-lex)))
813    (or (eq token 'end-group)
814	(error "Missing `)'"))
815    (and has-alternative
816	 (or (eq (ebnf-dtd-lex) 'zero-or-more)
817	     (error "Missing `*'")))
818    (ebnf-token-alternative alt (cons (ebnf-dtd-lex) nil))))
819
820
821;;; children ::= (choice | seq) ('?' | '*' | '+')?
822
823
824(defun ebnf-dtd-children (token)
825  (ebnf-dtd-operators (ebnf-dtd-choice-seq token)))
826
827
828;;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
829;;;
830;;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
831
832
833(defun ebnf-dtd-choice-seq (token)
834  (setq token (ebnf-dtd-cp token))
835  (let (elist)
836    (cond
837     ;; choice
838     ((eq (car token) 'alternative)
839      (while (eq (car token) 'alternative)
840	(setq elist (cons (cdr token) elist)
841	      token (ebnf-dtd-cp (ebnf-dtd-lex))))
842      (setq elist (ebnf-token-alternative elist token)))
843     ;; seq
844     ((eq (car token) 'comma)
845      (while (eq (car token) 'comma)
846	(setq elist (cons (cdr token) elist)
847	      token (ebnf-dtd-cp (ebnf-dtd-lex))))
848      (setq elist (ebnf-token-sequence (cons (cdr token) elist))))
849     ;; only one element
850     (t
851      (setq elist (cdr token))))
852    (or (eq (car token) 'end-group)
853	(error "Missing `)' in ELEMENT content"))
854    elist))
855
856
857;;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
858
859
860(defun ebnf-dtd-cp (token)
861  (ebnf-dtd-operators (cond ((eq token 'name)
862			     (ebnf-make-terminal ebnf-dtd-lex))
863			    ((eq token 'begin-group)
864			     (ebnf-dtd-choice-seq (ebnf-dtd-lex)))
865			    (t
866			     (error "Invalid element"))
867			    )))
868
869
870;;; elm ('?' | '*' | '+')?
871
872
873(defun ebnf-dtd-operators (elm)
874  (let ((token (ebnf-dtd-lex)))
875    (cond ((eq token 'optional)		; ? -  optional
876	   (cons (ebnf-dtd-lex) (ebnf-token-optional elm)))
877	  ((eq token 'zero-or-more)	; * - zero or more
878	   (cons (ebnf-dtd-lex) (ebnf-make-zero-or-more elm)))
879	  ((eq token 'one-or-more)	; + - one or more
880	   (cons (ebnf-dtd-lex) (ebnf-make-one-or-more elm)))
881	  (t				; only element
882	   (cons token elm))
883	  )))
884
885
886;;; AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
887;;;
888;;; AttDef ::= S Name S AttType S DefaultDecl
889;;;
890;;; AttType ::= StringType | TokenizedType | EnumeratedType
891;;;
892;;; StringType ::= 'CDATA'
893;;;
894;;; TokenizedType ::= 'ID'
895;;;                 | 'IDREF'
896;;;                 | 'IDREFS'
897;;;                 | 'ENTITY'
898;;;                 | 'ENTITIES'
899;;;                 | 'NMTOKEN'
900;;;                 | 'NMTOKENS'
901;;;
902;;; EnumeratedType ::= NotationType | Enumeration
903;;;
904;;; NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
905;;;
906;;; Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
907;;;
908;;; DefaultDecl ::= '#REQUIRED'
909;;;               | '#IMPLIED'
910;;;               | (('#FIXED' S)? AttValue)
911;;;
912;;;
913;;; AttValue ::= '"' ([^<&"] | Reference)* '"'
914;;;            | "'" ([^<&'] | Reference)* "'"
915;;;
916;;; Reference ::= EntityRef | CharRef
917;;;
918;;; EntityRef ::= '&' Name ';'
919;;;
920;;; CharRef ::= '&#' [0-9]+ ';'
921;;;           | '&#x' [0-9a-fA-F]+ ';'
922
923;;; "^\\(&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^<&]\\)*$"
924
925
926(defun ebnf-dtd-attlistdecl ()
927  (or (eq (ebnf-dtd-lex) 'name)
928      (error "Invalid ATTLIST name"))
929  (let (token)
930    (while (eq (setq token (ebnf-dtd-lex)) 'name)
931      ;; type
932      (setq token (ebnf-dtd-lex))
933      (cond
934       ((eq token 'notation)
935	(or (eq (ebnf-dtd-lex) 'begin-group)
936	    (error "Missing `(' in NOTATION type in ATTLIST declaration"))
937	(ebnf-dtd-namelist "NOTATION" '(name)))
938       ((eq token 'begin-group)
939	(ebnf-dtd-namelist "enumeration" '(name name-char)))
940       ((memq token
941	      '(cdata id idref idrefs entity entities nmtoken nmtokens)))
942       (t
943	(error "Invalid type in ATTLIST declaration")))
944      ;; default value
945      (setq token (ebnf-dtd-lex))
946      (unless (memq token '(required implied))
947	(and (eq token 'fixed)
948	     (setq token (ebnf-dtd-lex)))
949	(or (and (eq token 'string)
950		 (string-match
951		  "^\\(&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^<&]\\)*$"
952		  ebnf-dtd-lex))
953	    (error "Invalid default value in ATTLIST declaration"))))
954    (or (eq token 'end-decl)
955	(error "Missing `>' in end of ATTLIST"))
956    (cons (ebnf-dtd-lex) nil)))
957
958
959(defun ebnf-dtd-namelist (type name-list)
960  (let (token)
961    (while (progn
962	     (or (memq (ebnf-dtd-lex) name-list)
963		 (error "Invalid name in %s type in ATTLIST declaration" type))
964	     (eq (setq token (ebnf-dtd-lex)) 'alternative)))
965    (or (eq token 'end-group)
966	(error "Missing `)' in %s type in ATTLIST declaration" type))))
967
968
969;;; EntityDecl ::= GEDecl | PEDecl
970;;;
971;;; GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
972;;;
973;;; PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
974;;;
975;;; EntityDef ::= EntityValue | (ExternalID NDataDecl?)
976;;;
977;;; PEDef ::= EntityValue | ExternalID
978;;;
979;;; NDataDecl ::= S 'NDATA' S Name
980;;;
981;;;
982;;; EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
983;;;               | "'" ([^%&'] | PEReference | Reference)* "'"
984;;;
985;;; PEReference ::= '%' Name ';'
986;;;
987;;; Reference ::= EntityRef | CharRef
988;;;
989;;; EntityRef ::= '&' Name ';'
990;;;
991;;; CharRef ::= '&#' [0-9]+ ';'
992;;;           | '&#x' [0-9a-fA-F]+ ';'
993
994;;; "^\\(%[A-Za-z_:][-A-Za-z0-9._:]*;\\|&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^%&]\\)*$"
995
996
997(defun ebnf-dtd-entitydecl ()
998  (let* ((token (ebnf-dtd-lex))
999	 (pedecl (eq token 'percent)))
1000    (and pedecl
1001	 (setq token (ebnf-dtd-lex)))
1002    (or (eq token 'name)
1003	(error "Invalid name of ENTITY"))
1004    (setq token (ebnf-dtd-lex))
1005    (if (eq token 'string)
1006	(if (string-match
1007	     "^\\(%[A-Za-z_:][-A-Za-z0-9._:]*;\\|&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^%&]\\)*$"
1008	     ebnf-dtd-lex)
1009	    (setq token (ebnf-dtd-lex))
1010	  (error "Invalid ENTITY definition"))
1011      (setq token (ebnf-dtd-externalid token))
1012      (when (and (not pedecl) (eq token 'ndata))
1013	(or (eq (ebnf-dtd-lex) 'name)
1014	    (error "Invalid NDATA name"))
1015	(setq token (ebnf-dtd-lex))))
1016    (or (eq token 'end-decl)
1017	(error "Missing `>' in end of ENTITY"))
1018    (cons (ebnf-dtd-lex) nil)))
1019
1020
1021;;; NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1022;;;
1023;;; PublicID ::= 'PUBLIC' S PubidLiteral
1024
1025
1026(defun ebnf-dtd-notationdecl ()
1027  (or (eq (ebnf-dtd-lex) 'name)
1028      (error "Invalid name NOTATION"))
1029  (or (eq (ebnf-dtd-externalid-or-publicid) 'end-decl)
1030      (error "Missing `>' in end of NOTATION"))
1031  (cons (ebnf-dtd-lex) nil))
1032
1033
1034;;; ExternalID ::= 'SYSTEM' S SystemLiteral
1035;;;              | 'PUBLIC' S PubidLiteral S SystemLiteral
1036;;;
1037;;; PublicID ::= 'PUBLIC' S PubidLiteral
1038
1039
1040(defun ebnf-dtd-externalid-or-publicid ()
1041  (let ((token (ebnf-dtd-lex)))
1042    (cond ((eq token 'system)
1043	   (ebnf-dtd-systemliteral))
1044	  ((eq token 'public)
1045	   (ebnf-dtd-pubidliteral)
1046	   (and (eq (setq token (ebnf-dtd-lex)) 'string)
1047		(setq token (ebnf-dtd-lex)))
1048	   token)
1049	  (t
1050	   (error "Missing `SYSTEM' or `PUBLIC'")))))
1051
1052
1053;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1054;; Lexical analyzer
1055
1056
1057(defconst ebnf-dtd-token-table (make-vector 256 'error)
1058  "Vector used to map characters to a lexical token.")
1059
1060
1061(defun ebnf-dtd-initialize ()
1062  "Initialize EBNF token table."
1063  ;; control character & control 8-bit character are set to `error'
1064  (let ((char ?\060))
1065    ;; digits: 0-9
1066    (while (< char ?\072)
1067      (aset ebnf-dtd-token-table char 'name-char)
1068      (setq char (1+ char)))
1069    ;; printable character: A-Z
1070    (setq char ?\101)
1071    (while (< char ?\133)
1072      (aset ebnf-dtd-token-table char 'name)
1073      (setq char (1+ char)))
1074    ;; printable character: a-z
1075    (setq char ?\141)
1076    (while (< char ?\173)
1077      (aset ebnf-dtd-token-table char 'name)
1078      (setq char (1+ char)))
1079    ;; European 8-bit accentuated characters:
1080    (setq char ?\240)
1081    (while (< char ?\400)
1082      (aset ebnf-dtd-token-table char 'name)
1083      (setq char (1+ char)))
1084    ;; Override name characters:
1085    (aset ebnf-dtd-token-table ?_ 'name)
1086    (aset ebnf-dtd-token-table ?: 'name)
1087    (aset ebnf-dtd-token-table ?. 'name-char)
1088    (aset ebnf-dtd-token-table ?- 'name-char)
1089    ;; Override space characters:
1090    (aset ebnf-dtd-token-table ?\n 'space) ; [NL] linefeed
1091    (aset ebnf-dtd-token-table ?\r 'space) ; [CR] carriage return
1092    (aset ebnf-dtd-token-table ?\t 'space) ; [HT] horizontal tab
1093    (aset ebnf-dtd-token-table ?\  'space) ; [SP] space
1094    ;; Override other lexical characters:
1095    (aset ebnf-dtd-token-table ?=  'equal)
1096    (aset ebnf-dtd-token-table ?,  'comma)
1097    (aset ebnf-dtd-token-table ?*  'zero-or-more)
1098    (aset ebnf-dtd-token-table ?+  'one-or-more)
1099    (aset ebnf-dtd-token-table ?|  'alternative)
1100    (aset ebnf-dtd-token-table ?%  'percent)
1101    (aset ebnf-dtd-token-table ?&  'ampersand)
1102    (aset ebnf-dtd-token-table ?#  'hash)
1103    (aset ebnf-dtd-token-table ?\? 'interrogation)
1104    (aset ebnf-dtd-token-table ?\" 'double-quote)
1105    (aset ebnf-dtd-token-table ?\' 'single-quote)
1106    (aset ebnf-dtd-token-table ?<  'less-than)
1107    (aset ebnf-dtd-token-table ?>  'end-decl)
1108    (aset ebnf-dtd-token-table ?\( 'begin-group)
1109    (aset ebnf-dtd-token-table ?\) 'end-group)
1110    (aset ebnf-dtd-token-table ?\[ 'begin-subset)
1111    (aset ebnf-dtd-token-table ?\] 'end-subset)))
1112
1113
1114;; replace the range "\240-\377" (see `ebnf-range-regexp').
1115(defconst ebnf-dtd-name-chars
1116  (ebnf-range-regexp "-._:0-9A-Za-z" ?\240 ?\377))
1117
1118
1119(defconst ebnf-dtd-decl-alist
1120  '(("ATTLIST"  . attlist-decl)
1121    ("DOCTYPE"  . doctype-decl)
1122    ("ELEMENT"  . element-decl)
1123    ("ENTITY"   . entity-decl)
1124    ("NOTATION" . notation-decl)))
1125
1126
1127(defconst ebnf-dtd-element-alist
1128  '(("#FIXED"    . fixed)
1129    ("#IMPLIED"  . implied)
1130    ("#PCDATA"   . pcdata)
1131    ("#REQUIRED" . required)))
1132
1133
1134(defconst ebnf-dtd-name-alist
1135  '(("ANY"        . any)
1136    ("CDATA"      . cdata)
1137    ("EMPTY"      . empty)
1138    ("ENTITIES"   . entities)
1139    ("ENTITY"     . entity)
1140    ("ID"         . id)
1141    ("IDREF"      . idref)
1142    ("IDREFS"     . idrefs)
1143    ("NDATA"      . ndata)
1144    ("NMTOKEN"    . nmtoken)
1145    ("NMTOKENS"   . nmtokens)
1146    ("NOTATION"   . notation)
1147    ("PUBLIC"     . public)
1148    ("SYSTEM"     . system)
1149    ("encoding"   . encoding-attr)
1150    ("standalone" . standalone-attr)
1151    ("version"    . version-attr)))
1152
1153
1154(defun ebnf-dtd-lex ()
1155  "Lexical analyzer for DTD.
1156
1157Return a lexical token.
1158
1159See documentation for variable `ebnf-dtd-lex'."
1160  (if (>= (point) ebnf-limit)
1161      'end-of-input
1162    (let (token)
1163      ;; skip spaces and comments
1164      (while (if (> (following-char) 255)
1165		 (progn
1166		   (setq token 'error)
1167		   nil)
1168	       (setq token (aref ebnf-dtd-token-table (following-char)))
1169	       (cond
1170		((eq token 'space)
1171		 (skip-chars-forward " \n\r\t" ebnf-limit)
1172		 (< (point) ebnf-limit))
1173		((and (eq token 'less-than)
1174		      (looking-at "&project=macosx-10.10.1">-->"))))
1317    (skip-chars-forward "-" ebnf-limit))
1318  ;; check for a valid end of comment
1319  (cond ((>= (point) ebnf-limit)
1320	 nil)
1321	((looking-at "-->")
1322	 (forward-char 3)
1323	 t)
1324	(t
1325	 (error "Invalid character"))
1326	))
1327
1328
1329(defun ebnf-dtd-eps-filename ()
1330  (forward-char)
1331  (let (fname)
1332    (while (progn
1333	     (setq fname
1334		   (concat fname
1335			   (ebnf-buffer-substring ebnf-dtd-filename-chars)))
1336	     (and (< (point) ebnf-limit)
1337		  (= (following-char) ?-)	; may be \n, \t, \r
1338		  (not (looking-at "-->"))))
1339      (setq fname (concat fname (ebnf-buffer-substring "-"))))
1340    fname))
1341
1342
1343;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1344
1345
1346(provide 'ebnf-dtd)
1347
1348;;; arch-tag: c21bb640-135f-4afa-8712-fa11d86301c4
1349;;; ebnf-dtd.el ends here
1350