1
2[section {PEG Specification Language}]
3
4[include whatis_peg.inc]
5[para]
6
7It is formally specified by the grammar shown below, written in
8itself. For a tutorial / introduction to the language please go and
9read the [manpage {PEG Language Tutorial}].
10
11[para]
12[example {
13PEG pe-grammar-for-peg (Grammar)
14
15	# --------------------------------------------------------------------
16        # Syntactical constructs
17
18        Grammar         <- WHITESPACE Header Definition* Final EOF ;
19
20        Header          <- PEG Identifier StartExpr ;
21        Definition      <- Attribute? Identifier IS Expression SEMICOLON ;
22        Attribute       <- (VOID / LEAF) COLON ;
23        Expression      <- Sequence (SLASH Sequence)* ;
24        Sequence        <- Prefix+ ;
25        Prefix          <- (AND / NOT)? Suffix ;
26        Suffix          <- Primary (QUESTION / STAR / PLUS)? ;
27        Primary         <- ALNUM / ALPHA / ASCII / CONTROL / DDIGIT / DIGIT
28                        /  GRAPH / LOWER / PRINTABLE / PUNCT / SPACE / UPPER
29                        /  WORDCHAR / XDIGIT
30                        / Identifier
31                        /  OPEN Expression CLOSE
32                        /  Literal
33                        /  Class
34                        /  DOT
35                        ;
36        Literal         <- APOSTROPH  (!APOSTROPH  Char)* APOSTROPH  WHITESPACE
37                        /  DAPOSTROPH (!DAPOSTROPH Char)* DAPOSTROPH WHITESPACE ;
38        Class           <- OPENB (!CLOSEB Range)* CLOSEB WHITESPACE ;
39        Range           <- Char TO Char / Char ;
40
41        StartExpr       <- OPEN Expression CLOSE ;
42void:   Final           <- END SEMICOLON WHITESPACE ;
43
44        # --------------------------------------------------------------------
45        # Lexing constructs
46
47        Identifier      <- Ident WHITESPACE ;
48leaf:   Ident           <- ('_' / ':' / <alpha>) ('_' / ':' / <alnum>)* ;
49        Char            <- CharSpecial / CharOctalFull / CharOctalPart
50                        /  CharUnicode / CharUnescaped
51                        ;
52
53leaf:   CharSpecial     <- "\\" [nrt'"\[\]\\] ;
54leaf:   CharOctalFull   <- "\\" [0-2][0-7][0-7] ;
55leaf:   CharOctalPart   <- "\\" [0-7][0-7]? ;
56leaf:   CharUnicode     <- "\\" 'u' HexDigit (HexDigit (HexDigit HexDigit?)?)? ;
57leaf:   CharUnescaped   <- !"\\" . ;
58
59void:   HexDigit        <- [0-9a-fA-F] ;
60
61void:   TO              <- '-'           ;
62void:   OPENB           <- "["           ;
63void:   CLOSEB          <- "]"           ;
64void:   APOSTROPH       <- "'"           ;
65void:   DAPOSTROPH      <- '"'           ;
66void:   PEG             <- "PEG"   WHITESPACE ;
67void:   IS              <- "<-"    WHITESPACE ;
68leaf:   VOID            <- "void"  WHITESPACE ; # Implies that definition has no semantic value.
69leaf:   LEAF            <- "leaf"  WHITESPACE ; # Implies that definition has no terminals.
70void:   END             <- "END"   WHITESPACE ;
71void:   SEMICOLON       <- ";"     WHITESPACE ;
72void:   COLON           <- ":"     WHITESPACE ;
73void:   SLASH           <- "/"     WHITESPACE ;
74leaf:   AND             <- "&"     WHITESPACE ;
75leaf:   NOT             <- "!"     WHITESPACE ;
76leaf:   QUESTION        <- "?"     WHITESPACE ;
77leaf:   STAR            <- "*"     WHITESPACE ;
78leaf:   PLUS            <- "+"     WHITESPACE ;
79void:   OPEN            <- "("     WHITESPACE ;
80void:   CLOSE           <- ")"     WHITESPACE ;
81leaf:   DOT             <- "."     WHITESPACE ;
82
83leaf:   ALNUM           <- "<alnum>"    WHITESPACE ;
84leaf:   ALPHA           <- "<alpha>"    WHITESPACE ;
85leaf:   ASCII           <- "<ascii>"    WHITESPACE ;
86leaf:   CONTROL         <- "<control>"  WHITESPACE ;
87leaf:   DDIGIT          <- "<ddigit>"   WHITESPACE ;
88leaf:   DIGIT           <- "<digit>"    WHITESPACE ;
89leaf:   GRAPH           <- "<graph>"    WHITESPACE ;
90leaf:   LOWER           <- "<lower>"    WHITESPACE ;
91leaf:   PRINTABLE       <- "<print>"    WHITESPACE ;
92leaf:   PUNCT           <- "<punct>"    WHITESPACE ;
93leaf:   SPACE           <- "<space>"    WHITESPACE ;
94leaf:   UPPER           <- "<upper>"    WHITESPACE ;
95leaf:   WORDCHAR        <- "<wordchar>" WHITESPACE ;
96leaf:   XDIGIT          <- "<xdigit>"   WHITESPACE ;
97
98void:   WHITESPACE      <- (" " / "\t" / EOL / COMMENT)* ;
99void:   COMMENT         <- '#' (!EOL .)* EOL ;
100void:   EOL             <- "\n\r" / "\n" / "\r" ;
101void:   EOF             <- !. ;
102
103        # --------------------------------------------------------------------
104END;
105}]
106
107[subsection Example]
108
109Our example specifies the grammar for a basic 4-operation calculator.
110
111[para]
112[include ../example/expr_peg.inc]
113[para]
114
115Using higher-level features of the notation, i.e. the character
116classes (predefined and custom), this example can be rewritten as
117
118[para]
119[include ../example/expr_peg_compact.inc]
120[para]
121