1{- 2 SkateParser: The Skate file parser 3 4 Part of Skate: a Schema specification languge 5 6 Copyright (c) 2017, ETH Zurich. 7 All rights reserved. 8 9 This file is distributed under the terms in the attached LICENSE file. 10 If you do not find this file, copies can be found by writing to: 11 ETH Zurich D-INFK, Universit\"atstr. 6, CH-8092 Zurich. Attn: Systems Group. 12-} 13module SkateParser where 14 15import Prelude 16import Text.ParserCombinators.Parsec as Parsec 17import Text.ParserCombinators.Parsec.Expr 18import Text.ParserCombinators.Parsec.Pos 19import qualified Text.ParserCombinators.Parsec.Token as P 20import Text.ParserCombinators.Parsec.Language( javaStyle ) 21import Data.Char 22import Numeric 23import Data.List 24import Text.Printf 25 26import SkateTypes 27 28 29{- 30============================================================================== 31= Helper functions 32============================================================================== 33-} 34 35{- creates a qualified identifier of the form parent.identifier -} 36make_qualified_identifer :: String -> String -> String 37make_qualified_identifer "" i = i 38make_qualified_identifer parent i = parent ++ "." ++ i 39 40{- 41============================================================================== 42= Token data types 43============================================================================== 44-} 45 46{- import data type -} 47data Import = Import String SourcePos 48 49{- Facts -} 50data FactAttrib = FactAttrib String String TypeRef SourcePos 51 52{- Flags -} 53data FlagDef = FlagDef String String Integer SourcePos 54 55{- Constants -} 56data ConstantDef = ConstantDefInt String String Integer SourcePos 57 | ConstantDefStr String String String SourcePos 58 59{- Enumerations -} 60data EnumDef = EnumDef String String SourcePos 61 62 63{- declarations -} 64data Declaration = Fact String String [ FactAttrib ] SourcePos 65 | Flags String String Integer [ FlagDef ] SourcePos 66 | Constants String String TypeRef [ ConstantDef ] SourcePos 67 | Enumeration String String [ EnumDef ] SourcePos 68 | Namespace String String [ Declaration ] SourcePos 69 | Section String [ Declaration ] SourcePos 70 | Text String SourcePos 71 72{--} 73instance Show Declaration where 74 show de@(Fact i d _ _) = "Fact '" ++ i ++ "'" 75 show de@(Flags i d _ _ _) = "Flags '" ++ i ++ "'" 76 show de@(Enumeration i d _ _) = "Enumeration '" ++ i ++ "'" 77 show de@(Constants i d _ _ _) = "Constants '" ++ i ++ "'" 78 show de@(Namespace i d _ _) = "Namespace '" ++ i ++ "'" 79 show de@(Section i _ _) = "Section '" ++ i ++ "'" 80 show de@(Text i _) = "Text Block" 81 82{- the schema -} 83data Schema = Schema String String [ Declaration ] [ String ] SourcePos 84 85 86{- 87============================================================================== 88= The Skate Token Parser 89============================================================================== 90-} 91 92{- create the Skate Lexer -} 93lexer = P.makeTokenParser ( 94 javaStyle { 95 {- list of reserved Names -} 96 P.reservedNames = [ 97 "schema", "fact", 98 "flags", "flag", 99 "constants", "const", 100 "enumeration", "enum", 101 "text", "section" 102 ], 103 {- list of reserved operators -} 104 P.reservedOpNames = ["*","/","+","-"], 105 106 {- valid identifiers -} 107 P.identStart = letter, 108 P.identLetter = alphaNum, 109 110 {- Skate is not case sensitive. -} 111 P.caseSensitive = False, 112 113 {- comment start and end -} 114 P.commentStart = "/*", 115 P.commentEnd = "*/", 116 P.commentLine = "//", 117 P.nestedComments = False 118 }) 119 120{- Token definitions -} 121whiteSpace = P.whiteSpace lexer 122reserved = P.reserved lexer 123identifier = P.identifier lexer 124stringLit = P.stringLiteral lexer 125comma = P.comma lexer 126commaSep = P.commaSep lexer 127commaSep1 = P.commaSep1 lexer 128parens = P.parens lexer 129braces = P.braces lexer 130squares = P.squares lexer 131semiSep = P.semiSep lexer 132symbol = P.symbol lexer 133natural = P.natural lexer 134integer = try ((P.lexeme lexer) binLiteral) 135 <|> P.integer lexer 136 137{- Parsing an integer number -} 138binDigit = oneOf "01" 139binLiteral = do { 140 _ <- char '0'; 141 _ <- oneOf "bB"; 142 digits <- many1 binDigit; 143 let n = foldl (\x d -> 2*x + (digitToInt d)) 0 digits 144 ; seq n (return (fromIntegral n)) 145} 146 147 148{------------------------------------------------------------------------------ 149- Parser start point 150------------------------------------------------------------------------------} 151 152 153pErrorDescription :: String -> String -> SourcePos -> String 154pErrorDescription t i spos = " missing description of "++ t ++ " '" ++ i ++ "'" 155 ++ " in " ++ (show spos) 156 157 158 159 160 161{------------------------------------------------------------------------------ 162- Parser start point 163------------------------------------------------------------------------------} 164 165{- parse the the Skate file -} 166parse = do { 167 whiteSpace; 168 imps <- many importfacts; 169 p <- getPosition; 170 reserved "schema"; 171 name <- identifier; 172 desc <- stringLit <?> pErrorDescription "schema" name p; 173 decls <- braces (many1 $ schemadecl name); 174 _ <- symbol ";" <?> " ';' missing from end of " ++ name ++ " schema def"; 175 return (Schema name desc decls [i | (Import i _) <- imps] p) 176} 177 178 179{------------------------------------------------------------------------------ 180- Token rules for the Schema 181------------------------------------------------------------------------------} 182 183schemadecl sn = factdecl sn <|> constantdecl sn <|> flagsdecl sn <|> 184 enumdecl sn <|> namespacedecl sn <|> sectiondecl sn <|> 185 textdecl 186 187 188{------------------------------------------------------------------------------ 189- Imports 190------------------------------------------------------------------------------} 191 192importfacts = do { 193 reserved "import"; 194 p <- getPosition; 195 i <- identifier <?> " required valid identifier"; 196 _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " import"; 197 return (Import i p) 198} 199 200 201{------------------------------------------------------------------------------ 202- Namespace 203------------------------------------------------------------------------------} 204 205namespacedecl parent = do { 206 reserved "namespace"; 207 p <- getPosition; 208 i <- identifier; 209 d <- stringLit <?> " missing description of namespace '" ++ (make_qualified_identifer parent i) ++ "'"; 210 decls <- braces (many1 $ schemadecl (make_qualified_identifer parent i)); 211 _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " namespace"; 212 return (Namespace (make_qualified_identifer parent i) d decls p); 213} 214 215{------------------------------------------------------------------------------ 216- Facts 217------------------------------------------------------------------------------} 218 219factdecl parent = do { 220 reserved "fact"; 221 p <- getPosition; 222 i <- identifier; 223 d <- stringLit <?> " missing description of fact '" ++ (make_qualified_identifer parent i) ++ "'"; 224 f <- braces (many1 $ factattrib parent); 225 _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " fact"; 226 return (Fact (make_qualified_identifer parent i) d f p) 227} 228 229factattrib parent = do { 230 p <- getPosition; 231 t <- fieldType parent; 232 i <- identifier; 233 d <- stringLit; 234 _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " fact attribute"; 235 return (FactAttrib i d t p) 236} 237 238 239{------------------------------------------------------------------------------ 240- Flags 241------------------------------------------------------------------------------} 242 243flagsdecl parent = do { 244 reserved "flags"; 245 p <- getPosition; 246 i <- identifier; 247 b <- integer; 248 d <- stringLit <?> " missing description of flags '" ++ (make_qualified_identifer parent i) ++ "'"; 249 flagvals <- braces (many1 flagvals); 250 _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " flags"; 251 return (Flags (make_qualified_identifer parent i) d b flagvals p) 252} 253 254{- identifier = value "opt desc"; -} 255flagvals = do { 256 pos <- getPosition; 257 p <- integer; 258 i <- identifier; 259 d <- stringLit; 260 _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " flag val"; 261 return (FlagDef i d p pos) 262}; 263 264 265{------------------------------------------------------------------------------ 266- Constants 267------------------------------------------------------------------------------} 268 269 270{- constants fname "desc" {[constantvals]}; -} 271constantdecl parent = do { 272 reserved "constants"; 273 p <- getPosition; 274 i <- identifier; 275 t <- fieldTypeBuiltIn; 276 d <- stringLit <?> " missing description of constants '" ++ (make_qualified_identifer parent i) ++ "'"; 277 vals <- braces (many1 (constantvals t)); 278 _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " constants"; 279 return (Constants (make_qualified_identifer parent i) d t vals p) 280} 281 282constantvals (TBuiltIn String) = constantvalsstring 283constantvals (TBuiltIn UInt8) = constantvalsnum 284constantvals (TBuiltIn UInt16) = constantvalsnum 285constantvals (TBuiltIn UInt32) = constantvalsnum 286constantvals (TBuiltIn UInt64) = constantvalsnum 287constantvals (TBuiltIn UIntPtr) = constantvalsnum 288constantvals (TBuiltIn Int8) = constantvalsnum 289constantvals (TBuiltIn Int16) = constantvalsnum 290constantvals (TBuiltIn Int32) = constantvalsnum 291constantvals (TBuiltIn Int64) = constantvalsnum 292constantvals (TBuiltIn IntPtr) = constantvalsnum 293constantvals (TBuiltIn Size) = constantvalsnum 294constantvals (TBuiltIn Char) = constantvalsnum 295constantvals (TBuiltIn Bool) = constantvalsnum 296constantvals s = error $ "Invalid constant type " ++ (show s) 297 298constantvalsnum = do { 299 p <- getPosition; 300 i <- identifier; 301 _ <- symbol "="; 302 v <- integer; 303 d <- stringLit; 304 _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " constant"; 305 return (ConstantDefInt i d v p) 306}; 307 308constantvalsstring = do { 309 p <- getPosition; 310 i <- identifier; 311 _ <- symbol "="; 312 v <- stringLit; 313 d <- stringLit; 314 _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " constant"; 315 return (ConstantDefStr i d v p) 316}; 317 318 319{------------------------------------------------------------------------------ 320- Enumerations 321------------------------------------------------------------------------------} 322 323enumdecl parent = do { 324 reserved "enumeration"; 325 p <- getPosition; 326 i <- identifier; 327 d <- stringLit <?> " missing description of enumeration '" ++ (make_qualified_identifer parent i) ++ "'"; 328 enums <- braces (many1 enumdef); 329 _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " enumeration"; 330 return (Enumeration (make_qualified_identifer parent i) d enums p) 331} 332 333enumdef = do { 334 p <- getPosition; 335 i <- identifier; 336 d <- stringLit; 337 _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " enum item"; 338 return (EnumDef i d p) 339}; 340 341 342{------------------------------------------------------------------------------ 343- Sections and Text blocks 344------------------------------------------------------------------------------} 345 346sectiondecl parent = do { 347 reserved "section"; 348 p <- getPosition; 349 i <- stringLit; 350 decls <- braces (many1 $ schemadecl parent); 351 _ <- symbol ";" <?> " ';' missing from end of " ++ i ++ " section"; 352 return (Section i decls p); 353}; 354 355textdecl = do { 356 reserved "text"; 357 p <- getPosition; 358 t <- braces (many1 stringLit); 359 _ <- symbol ";" <?> " ';' missing from end of text block"; 360 return (Text (concat (intersperse " " t)) p); 361}; 362 363 364{------------------------------------------------------------------------------ 365Parsing Types 366------------------------------------------------------------------------------} 367 368fieldType p = fieldTypeFactRef p <|> fieldTypeConstRef p <|> 369 fieldTypeEnumRef p <|> fieldTypeFlagsRef p <|> fieldTypeBuiltIn 370 371fieldTypeBuiltIn = do { 372 n <- identifier; 373 return (TBuiltIn (findBuiltIntType n)) 374}; 375 376{- Parsing qualified identifiers -} 377qualifiedPart = do { 378 symbol "."; 379 i <- identifier; 380 return ("." ++ i); 381} 382 383qualifiedIdentiferLiteral p = do { 384 i <- identifier; 385 ids <- many qualifiedPart; 386 return (i ++ (concat ids)) 387} 388 389fieldTypeFactRef p = do { 390 reserved "fact"; 391 n <- qualifiedIdentiferLiteral p; 392 return (TFact n p); 393} 394 395fieldTypeConstRef p = do { 396 reserved "const"; 397 n <- qualifiedIdentiferLiteral p; 398 return (TConstant n p); 399} 400 401fieldTypeEnumRef p = do { 402 reserved "enum"; 403 n <- qualifiedIdentiferLiteral p; 404 return (TEnum n p); 405} 406 407fieldTypeFlagsRef p = do { 408 reserved "flag"; 409 n <- qualifiedIdentiferLiteral p; 410 return (TFlags n p); 411} 412