1{-
2  SkateParser: The Skate file parser
3
4  Part of Skate: a Schema specification languge
5
6  Copyright (c) 2017, ETH Zurich.
7  All rights reserved.
8
9  This file is distributed under the terms in the attached LICENSE file.
10  If you do not find this file, copies can be found by writing to:
11  ETH Zurich D-INFK, Universit\"atstr. 6, CH-8092 Zurich. Attn: Systems Group.
12-}
13module SkateParser where
14
15import Prelude
16import Text.ParserCombinators.Parsec as Parsec
17import Text.ParserCombinators.Parsec.Expr
18import Text.ParserCombinators.Parsec.Pos
19import qualified Text.ParserCombinators.Parsec.Token as P
20import Text.ParserCombinators.Parsec.Language( javaStyle )
21import Data.Char
22import Numeric
23import Data.List
24import Text.Printf
25
26import SkateTypes
27
28
29{-
30==============================================================================
31= Helper functions
32==============================================================================
33-}
34
35{- creates a qualified identifier of the form parent.identifier -}
36make_qualified_identifer :: String -> String -> String
37make_qualified_identifer "" i = i
38make_qualified_identifer parent i = parent ++ "." ++ i
39
40{-
41==============================================================================
42= Token data types
43==============================================================================
44-}
45
46{- import data type -}
47data Import = Import String SourcePos
48
49{- Facts -}
50data FactAttrib = FactAttrib String String TypeRef SourcePos
51
52{- Flags -}
53data FlagDef = FlagDef String String Integer SourcePos
54
55{- Constants -}
56data ConstantDef = ConstantDefInt String String Integer SourcePos
57                 | ConstantDefStr String String String SourcePos
58
59{- Enumerations -}
60data EnumDef = EnumDef String String SourcePos
61
62
63{- declarations -}
64data Declaration = Fact String String [ FactAttrib ] SourcePos
65                  | Flags String String Integer [ FlagDef ] SourcePos
66                  | Constants String String TypeRef [ ConstantDef ] SourcePos
67                  | Enumeration String String [ EnumDef ] SourcePos
68                  | Namespace String String [ Declaration ] SourcePos
69                  | Section String [ Declaration ] SourcePos
70                  | Text String SourcePos
71
72{--}
73instance Show Declaration where
74    show de@(Fact i d _ _) = "Fact '" ++ i ++ "'"
75    show de@(Flags  i d _ _ _) = "Flags '" ++ i ++ "'"
76    show de@(Enumeration  i d _ _) = "Enumeration '" ++ i ++ "'"
77    show de@(Constants  i d _ _ _) = "Constants '" ++ i ++ "'"
78    show de@(Namespace  i d _ _) = "Namespace '" ++ i ++ "'"
79    show de@(Section  i _ _) = "Section '" ++ i ++ "'"
80    show de@(Text i _) = "Text Block"
81
82{- the schema -}
83data Schema = Schema String String [ Declaration ] [ String ] SourcePos
84
85
86{-
87==============================================================================
88= The Skate Token Parser
89==============================================================================
90-}
91
92{- create the Skate Lexer -}
93lexer = P.makeTokenParser (
94    javaStyle  {
95        {- list of reserved Names -}
96        P.reservedNames = [
97            "schema", "fact",
98            "flags", "flag",
99            "constants", "const",
100            "enumeration", "enum",
101            "text", "section"
102        ],
103        {- list of reserved operators -}
104        P.reservedOpNames = ["*","/","+","-"],
105
106        {- valid identifiers -}
107        P.identStart = letter,
108        P.identLetter = alphaNum,
109
110        {- Skate is not case sensitive. -}
111        P.caseSensitive = False,
112
113        {- comment start and end -}
114        P.commentStart = "/*",
115        P.commentEnd = "*/",
116        P.commentLine = "//",
117        P.nestedComments = False
118    })
119
120{- Token definitions -}
121whiteSpace = P.whiteSpace lexer
122reserved   = P.reserved lexer
123identifier = P.identifier lexer
124stringLit  = P.stringLiteral lexer
125comma      = P.comma lexer
126commaSep   = P.commaSep lexer
127commaSep1  = P.commaSep1 lexer
128parens     = P.parens lexer
129braces     = P.braces lexer
130squares    = P.squares lexer
131semiSep    = P.semiSep lexer
132symbol     = P.symbol lexer
133natural    = P.natural lexer
134integer    = try ((P.lexeme lexer) binLiteral)
135             <|> P.integer lexer
136
137{- Parsing an integer number -}
138binDigit = oneOf "01"
139binLiteral = do {
140    _ <- char '0';
141    _ <- oneOf "bB";
142    digits <- many1 binDigit;
143    let n = foldl (\x d -> 2*x + (digitToInt d)) 0 digits
144    ; seq n (return (fromIntegral n))
145}
146
147
148{------------------------------------------------------------------------------
149- Parser start point
150------------------------------------------------------------------------------}
151
152
153pErrorDescription :: String -> String -> SourcePos -> String
154pErrorDescription t i spos = " missing description of "++ t ++ " '" ++ i ++ "'"
155                            ++ " in " ++ (show spos)
156
157
158
159
160
161{------------------------------------------------------------------------------
162- Parser start point
163------------------------------------------------------------------------------}
164
165{- parse the the Skate file -}
166parse = do {
167    whiteSpace;
168    imps <- many importfacts;
169    p <- getPosition;
170    reserved "schema";
171    name <- identifier;
172    desc <- stringLit <?> pErrorDescription "schema" name p;
173    decls <- braces (many1 $ schemadecl name);
174    _ <- symbol ";" <?> " ';' missing from end of " ++ name ++ " schema def";
175    return (Schema name desc decls [i | (Import i _) <- imps] p)
176}
177
178
179{------------------------------------------------------------------------------
180- Token rules for the Schema
181------------------------------------------------------------------------------}
182
183schemadecl sn = factdecl sn  <|>  constantdecl sn   <|> flagsdecl sn    <|>
184                enumdecl sn  <|>  namespacedecl sn  <|> sectiondecl sn  <|>
185                textdecl
186
187
188{------------------------------------------------------------------------------
189- Imports
190------------------------------------------------------------------------------}
191
192importfacts = do {
193    reserved "import";
194    p <- getPosition;
195    i <- identifier <?> " required valid identifier";
196    _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " import";
197    return (Import i p)
198}
199
200
201{------------------------------------------------------------------------------
202- Namespace
203------------------------------------------------------------------------------}
204
205namespacedecl parent = do {
206    reserved "namespace";
207    p <- getPosition;
208    i <- identifier;
209    d <- stringLit <?> " missing description of namespace '" ++ (make_qualified_identifer parent i) ++ "'";
210    decls <- braces (many1 $ schemadecl (make_qualified_identifer parent i));
211    _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " namespace";
212    return (Namespace (make_qualified_identifer parent i) d decls p);
213}
214
215{------------------------------------------------------------------------------
216- Facts
217------------------------------------------------------------------------------}
218
219factdecl parent = do {
220    reserved "fact";
221    p <- getPosition;
222    i <- identifier;
223    d <- stringLit <?> " missing description of fact '" ++ (make_qualified_identifer parent i) ++ "'";
224    f <- braces (many1 $ factattrib parent);
225    _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " fact";
226    return (Fact (make_qualified_identifer parent i) d f p)
227}
228
229factattrib parent = do {
230    p <- getPosition;
231    t <- fieldType parent;
232    i <- identifier;
233    d <- stringLit;
234    _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " fact attribute";
235    return (FactAttrib i d t p)
236}
237
238
239{------------------------------------------------------------------------------
240- Flags
241------------------------------------------------------------------------------}
242
243flagsdecl parent = do {
244    reserved "flags";
245    p <- getPosition;
246    i <- identifier;
247    b <- integer;
248    d <- stringLit <?> " missing description of flags '" ++ (make_qualified_identifer parent i) ++ "'";
249    flagvals <- braces (many1 flagvals);
250    _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " flags";
251    return (Flags (make_qualified_identifer parent i) d b flagvals p)
252}
253
254{- identifier = value "opt desc"; -}
255flagvals = do {
256    pos <- getPosition;
257    p <- integer;
258    i <- identifier;
259    d <- stringLit;
260    _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " flag val";
261    return (FlagDef i d p pos)
262};
263
264
265{------------------------------------------------------------------------------
266- Constants
267------------------------------------------------------------------------------}
268
269
270{- constants fname "desc" {[constantvals]}; -}
271constantdecl parent = do {
272    reserved "constants";
273    p <- getPosition;
274    i <- identifier;
275    t <- fieldTypeBuiltIn;
276    d <- stringLit <?> " missing description of constants '" ++ (make_qualified_identifer parent i) ++ "'";
277    vals <- braces (many1 (constantvals t));
278    _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " constants";
279    return (Constants (make_qualified_identifer parent i) d t vals p)
280}
281
282constantvals (TBuiltIn String) = constantvalsstring
283constantvals (TBuiltIn UInt8) =  constantvalsnum
284constantvals (TBuiltIn UInt16) =  constantvalsnum
285constantvals (TBuiltIn UInt32)  =  constantvalsnum
286constantvals (TBuiltIn UInt64)  =  constantvalsnum
287constantvals (TBuiltIn UIntPtr) =  constantvalsnum
288constantvals (TBuiltIn Int8) =  constantvalsnum
289constantvals (TBuiltIn Int16) =  constantvalsnum
290constantvals (TBuiltIn Int32) =  constantvalsnum
291constantvals (TBuiltIn Int64)  =  constantvalsnum
292constantvals (TBuiltIn IntPtr) =  constantvalsnum
293constantvals (TBuiltIn Size) =  constantvalsnum
294constantvals (TBuiltIn Char) =  constantvalsnum
295constantvals (TBuiltIn Bool) =  constantvalsnum
296constantvals s = error $ "Invalid constant type " ++ (show s)
297
298constantvalsnum = do {
299    p <- getPosition;
300    i <- identifier;
301    _ <- symbol "=";
302    v <- integer;
303    d <- stringLit;
304    _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " constant";
305    return (ConstantDefInt i d v p)
306};
307
308constantvalsstring = do {
309    p <- getPosition;
310    i <- identifier;
311    _ <- symbol "=";
312    v <- stringLit;
313    d <- stringLit;
314    _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " constant";
315    return (ConstantDefStr i d v p)
316};
317
318
319{------------------------------------------------------------------------------
320- Enumerations
321------------------------------------------------------------------------------}
322
323enumdecl parent = do {
324    reserved "enumeration";
325    p <- getPosition;
326    i <- identifier;
327    d <- stringLit <?> " missing description of enumeration '" ++ (make_qualified_identifer parent i) ++ "'";
328    enums <- braces (many1 enumdef);
329     _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " enumeration";
330    return (Enumeration (make_qualified_identifer parent i) d enums p)
331}
332
333enumdef = do {
334    p <- getPosition;
335    i <- identifier;
336    d <- stringLit;
337    _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " enum item";
338    return (EnumDef i d p)
339};
340
341
342{------------------------------------------------------------------------------
343- Sections and Text blocks
344------------------------------------------------------------------------------}
345
346sectiondecl parent = do {
347    reserved "section";
348    p <- getPosition;
349    i <- stringLit;
350    decls <- braces (many1 $ schemadecl parent);
351    _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " section";
352    return (Section i decls p);
353};
354
355textdecl = do {
356    reserved "text";
357    p <- getPosition;
358    t <- braces (many1 stringLit);
359    _ <- symbol ";" <?> " ';' missing from end of text block";
360    return (Text (concat (intersperse " " t)) p);
361};
362
363
364{------------------------------------------------------------------------------
365Parsing Types
366------------------------------------------------------------------------------}
367
368fieldType p = fieldTypeFactRef p <|> fieldTypeConstRef p <|>
369              fieldTypeEnumRef p <|> fieldTypeFlagsRef p <|> fieldTypeBuiltIn
370
371fieldTypeBuiltIn = do {
372    n <- identifier;
373    return (TBuiltIn (findBuiltIntType n))
374};
375
376{- Parsing qualified identifiers -}
377qualifiedPart = do {
378    symbol ".";
379    i <- identifier;
380    return ("." ++ i);
381}
382
383qualifiedIdentiferLiteral p = do {
384    i <- identifier;
385    ids <- many qualifiedPart;
386    return (i ++ (concat ids))
387}
388
389fieldTypeFactRef p  = do {
390    reserved "fact";
391    n <- qualifiedIdentiferLiteral p;
392    return (TFact n p);
393}
394
395fieldTypeConstRef p  = do {
396    reserved "const";
397    n <- qualifiedIdentiferLiteral p;
398    return (TConstant n p);
399}
400
401fieldTypeEnumRef p  = do {
402    reserved "enum";
403    n <- qualifiedIdentiferLiteral p;
404    return (TEnum n p);
405}
406
407fieldTypeFlagsRef p = do {
408    reserved "flag";
409    n <- qualifiedIdentiferLiteral p;
410    return (TFlags n p);
411}
412