1structure ParseDoc :> ParseDoc = 2struct 3 4open Substring; 5exception ParseError of string 6 7fun I x = x; 8fun mem x [] = false 9 | mem x (y::ys) = x = y orelse mem x ys 10 11fun curry f x y = f (x,y) 12fun equal x y = (x=y) 13infix ##; 14fun (f##g) (x,y) = (f x, g y); 15 16fun butlast [] = raise Fail "butlast" 17 | butlast [x] = [] 18 | butlast (h::t) = h::butlast t; 19 20fun flat [] = [] 21 | flat ([]::rst) = flat rst 22 | flat ((h::t)::rst) = h::flat(t::rst); 23 24infixr 4 \/ 25infixr 5 /\ 26fun (f \/ g) x = f x orelse g x 27fun (f /\ g) x = f x andalso g x; 28 29fun occurs s ss = not (isEmpty (#2 (position s ss))); 30 31fun fetch_contents docfile = 32 let val istrm = TextIO.openIn docfile 33 val contents = Substring.full (TextIO.inputAll istrm) 34 val _ = TextIO.closeIn istrm 35 in contents 36 end; 37 38fun fetch str = Substring.string (fetch_contents str); 39 40(*--------------------------------------------------------------------------- 41 A HOL ".doc" file has the format 42 43 \DOC <ident> 44 45 ( \<keyword> <paragraph>* )* 46 47 \ENDDOC 48 ---------------------------------------------------------------------------*) 49 50datatype markup 51 = PARA 52 | TEXT of substring 53 | BRKT of substring 54 | EMPH of substring 55 | XMPL of substring; 56 57datatype section 58 = TYPE of substring 59 | FIELD of string * markup list 60 | SEEALSO of substring list; 61 62 63val valid_keywords = 64 Binaryset.addList(Binaryset.empty String.compare, 65 ["DOC", "ELTYPE", "BLTYPE", "TYPE", "SYNOPSIS", 66 "COMMENTS", "USES", "SEEALSO", "KEYWORDS", "DESCRIBE", 67 "FAILURE", "EXAMPLE", "ENDDOC", "LIBRARY", 68 "STRUCTURE"]) 69 70fun check_string ss = let 71 val s = Substring.string ss 72in 73 if Binaryset.member(valid_keywords, s) then 74 s 75 else 76 raise ParseError ("Unknown keyword: "^s) 77end 78 79val empty_ok = ["DOC", "KEYWORDS", "LIBRARY"] 80 81 82fun divide ss = 83 if isEmpty ss 84 then [] 85 else let val (ss1,ss2) = position "\n\\" ss 86 in 87 if isEmpty ss1 then divide (triml 2 ss2) 88 else ss1::divide (triml 2 ss2) 89 end; 90 91local val BLTYPE = Substring.full "BLTYPE" 92 val ELTYPE = Substring.full "ELTYPE" 93 val TYPEss = Substring.full "TYPE" 94 val BLTYPEsize = Substring.size ELTYPE 95in 96fun longtype_elim (l as (doc::ss1::ss2::rst)) = 97 let val (ssa,ssb) = position "BLTYPE" ss1 98 val (ssc,ssd) = position "ELTYPE" ss2 99 in if isEmpty ssa andalso isEmpty ssc 100 then doc :: full(concat[TYPEss, triml BLTYPEsize ssb]) :: rst 101 else l 102 end 103 | longtype_elim l = l 104end; 105 106fun warn s = TextIO.output(TextIO.stdErr, s) 107 108fun find_doc ss = let 109 val (ssa, ssb) = position "\\DOC" ss 110 val _ = not (isEmpty ssb) orelse raise ParseError "No \\DOC beginning entry" 111 val _ = isEmpty ssa orelse raise ParseError "Text before \\DOC" 112 val (docpart, rest) = position "\n" ss 113 val docheader = slice(docpart, 1, NONE) 114in 115 (docheader, slice(rest,1,NONE)) 116end 117 118fun to_sections ss = 119 let open Substring 120 val (docpart, rest) = find_doc ss 121 val sslist = docpart :: divide rest 122 (* butlast below drops final \enddoc *) 123 val sslist1 = longtype_elim (butlast sslist) 124 in 125 map ((check_string ## dropl Char.isSpace) o 126 splitl (not o Char.isSpace)) sslist1 127 end; 128 129(*--------------------------------------------------------------------------- 130 Divide into maximal chunks of text enclosed by braces. 131 ---------------------------------------------------------------------------*) 132 133(* skips over text until it finds the end of a brace delimited bit of text *) 134fun braces ss = let 135 fun recurse ss n = 136 case getc ss of 137 SOME(#"}", ss1) => if n = 1 then ss1 138 else recurse ss1 (n - 1) 139 | SOME(#"{", ss1) => recurse ss1 (n + 1) 140 | SOME (_, ss1) => recurse ss1 n 141 | NONE => raise ParseError "No closing brace" 142in 143 recurse ss 0 144end 145 146fun stars ss = 147 let 148 fun recurse acc ss = 149 case getc ss of 150 SOME (#"*", ss1) => SOME (String.implode (List.rev acc), ss1) 151 | SOME (#"\\", ss1) => escapedchar acc ss1 152 | SOME (c, ss1) => recurse (c::acc) ss1 153 | NONE => NONE 154 and escapedchar acc ss = 155 case getc ss of 156 SOME (#"*", ss1) => recurse (#"*" :: acc) ss1 157 | SOME (#"\\", ss1) => recurse (#"\\" :: acc) ss1 158 | _ => raise ParseError "Strange backslash in normal text" 159 in 160 recurse [] ss 161 end 162 163 164fun markup ss = 165 case CharVector.findi (fn (_,c) => c = #"*" orelse c = #"{") (string ss) of 166 NONE => [TEXT ss] 167 | SOME (i, #"{") => 168 let 169 val (ssa,ssb) = splitAt(ss,i) 170 val ssc = braces ssb 171 val (s,i,n) = base ssa 172 val (_,j,_) = base ssc 173 val chunk = substring (s,i+n+1,j-(i+n+2)) 174 in TEXT ssa 175 :: (if occurs "\n" chunk then XMPL chunk else BRKT chunk) 176 :: markup ssc 177 end 178 | SOME (i, _) => 179 let 180 val (ssa,ssb) = splitAt(ss,i) 181 in 182 case stars (slice(ssb,1,NONE)) of 183 NONE => [TEXT ss] 184 | SOME (s, rest) => TEXT ssa :: EMPH (full s) :: markup rest 185 end 186 187val paragraphs = 188 let fun para (TEXT ss) = 189 let fun insP ss = 190 let val (ssa,ssb) = position "\n\n" ss 191 in if isEmpty ssb then [TEXT ss] 192 else TEXT ssa::PARA::insP (dropl Char.isSpace ssb) 193 end 194 in insP ss 195 end 196 | para other = [other] 197 in flat o map para 198 end; 199 200 201(* Translates out escaped braces wherever they might appear. *) 202fun unescape_braces ss = let 203 fun isBrace c = c = #"{" orelse c = #"}" 204 fun recurse acc ss = let 205 val (ssa, ssb) = splitl (not o equal #"\\") ss 206 in 207 if size ssb = 0 then 208 concat (List.rev (ssa::acc)) 209 else if isPrefix "\\lbrace" ssb then 210 recurse (full "{"::ssa::acc) (triml 7 ssb) 211 else if isPrefix "\\rbrace" ssb then 212 recurse (full "}"::ssa::acc) (triml 7 ssb) 213 else 214 recurse (slice(ss,0,SOME (size ssa + 1))::acc) (triml 1 ssb) 215 end 216in 217 full (recurse [] ss) 218end 219 220fun parse_type ss = 221 let val ss' = 222 case getc (dropl Char.isSpace ss) 223 of SOME(#"{", ss1) => 224 let val ss2 = dropr Char.isSpace ss1 225 in if sub(ss2,size ss2 - 1) = #"}" then trimr 1 ss2 226 else raise ParseError "Closing brace not found in \\TYPE" 227 end 228 | other => ss 229 in unescape_braces ss' 230 end 231 232fun trimws mlist = 233 case mlist of 234 [] => [] 235 | [TEXT ss] => let val newss = dropr Char.isSpace ss 236 in 237 if size newss > 0 then [TEXT newss] else [] 238 end 239 | (x::xs) => x :: trimws xs 240 241 242 243(* if the firstpass has an empty DOC component, then give it a full one 244 generated from the name of the file. *) 245fun name_from_fname fname = let 246 val ss0 = full (OS.Path.file fname) 247 val (ss1,_) = position ".doc" ss0 248in 249 case tokens (equal #".") (full (Symbolic.tosymb (string ss1))) of 250 [] => raise Fail "Can't happen" 251 | [x] => x 252 | (_::y::_) => y 253end 254 255fun install_doc_part fname sections = 256 case sections of 257 FIELD("DOC", []) :: ss => 258 FIELD("DOC", [TEXT (name_from_fname fname)]) :: ss 259 | FIELD("DOC", [TEXT m]) :: ss => sections 260 | _ => raise ParseError "Ill-formed \\DOC section" 261 262(* if there isn't a STRUCTURE section, then add one if the filename of the 263 docfile suggests that there should be. *) 264fun install_structure_part fname sections = let 265 fun has_struct_section slist = 266 case slist of 267 [] => false 268 | FIELD("STRUCTURE", m)::_ => 269 (case m of 270 [TEXT ss] => if length (tokens Char.isSpace ss) <> 1 then 271 raise ParseError "Multi-word \\STRUCTURE section" 272 else true 273 | _ => raise ParseError "Ill-formed \\STRUCTURE section") 274 | _::t => has_struct_section t 275 fun insert3 x [] = [x] 276 | insert3 x [y] = [y, x] 277 | insert3 x (h1::h2::t) = h1::h2::x::t 278 val name_parts = String.tokens (equal #".") (#file (OS.Path.splitDirFile fname)) 279in 280 if not (has_struct_section sections) andalso length name_parts = 3 281 then 282 insert3 (FIELD("STRUCTURE", [TEXT (full (hd name_parts))])) sections 283 else sections 284end 285 286 287fun check_char_markup m = let 288 fun illegal_char c = c = #"{" orelse c = #"}" orelse c = #"\\" 289in 290 case m of 291 TEXT ss => let 292 val (ssa, ssb) = splitl (not o illegal_char) ss 293 in 294 if not (isEmpty ssb) then let 295 val (s0, j, _) = base ssb 296 val lo = if j > 5 then j - 5 else 0 297 val hi = if j + 5 > String.size s0 then NONE else SOME (j + 5 - lo) 298 in 299 raise ParseError ("Illegal character "^str (sub(ssb,0))^ " in "^ 300 "context: "^string (extract(s0,lo,hi))) 301 end 302 else () 303 end 304 | _ => () 305end 306 307 308fun check_char_section s = 309 case s of 310 FIELD (fname, mlist) => if fname <> "DOC" then 311 List.app check_char_markup mlist 312 else () 313 | _ => () 314 315fun final_char_check slist = (List.app check_char_section slist; slist) 316 317(* check that the second field is the "TYPE" one *) 318fun check_type_field2 [] = raise ParseError "Empty field list!" 319 | check_type_field2 [x] = raise ParseError "Only one field!" 320 | check_type_field2 (x as (_::TYPE _::_)) = x 321 | check_type_field2 _ = raise ParseError "\\TYPE field not second" 322 323fun parse_file docfile = let 324 fun db_out (BRKT ss) = BRKT (unescape_braces ss) 325 | db_out (XMPL ss) = XMPL (unescape_braces ss) 326 | db_out otherwise = otherwise 327 328 fun section (tag, ss) = 329 case tag of 330 "TYPE" => TYPE (parse_type ss) 331 | "SEEALSO" => let 332 val args = tokens (Char.isSpace \/ equal #",") 333 (dropr (Char.isSpace \/ equal #".") ss) 334 in 335 if null args then 336 raise ParseError "Empty SEEALSO list" 337 else 338 SEEALSO args 339 end 340 | otherwise => let 341 val args = trimws (List.map db_out (paragraphs (markup ss))) 342 in 343 if null args andalso not (mem tag empty_ok) then 344 raise ParseError ("Empty "^tag^" field") 345 else FIELD (tag, args) 346 end 347 val firstpass = List.map section (to_sections (fetch_contents docfile)) 348 val finalisation = final_char_check o 349 check_type_field2 o 350 install_structure_part docfile o 351 install_doc_part docfile 352in 353 finalisation firstpass 354end handle ParseError s => raise ParseError (docfile^": "^s) 355 | x => (warn ("Exception raised in "^docfile^"\n"); raise x) 356 357(* ---------------------------------------------------------------------- 358 File handling routines 359 ---------------------------------------------------------------------- *) 360 361open String 362fun stripdoc_suff s = String.extract(s, 0, SOME (size s - 4)) 363fun hasdoc_suff s = 364 String.extract(s, size s - 4, NONE) = ".doc" 365 handle Subscript => false 366 367fun valid_doc_name s = hasdoc_suff s 368 369fun core_dname dnm = let 370 val toks = String.tokens (equal #".") dnm 371in 372 if length toks = 2 then hd (tl toks) else hd toks 373end 374 375fun structpart dnm = hd (String.tokens (equal #".") dnm) 376 377(* returns a set of file-names from the given directory that are .doc 378 files *) 379fun find_docfiles dirname = let 380 val dirstr = OS.FileSys.openDir dirname 381 fun name_compare(s1,s2) = let 382 (* names already less .doc suffix *) 383 val lower = String.map Char.toLower 384 val s1tok = lower (core_dname (Symbolic.tosymb s1)) 385 val s2tok = lower (core_dname (Symbolic.tosymb s2)) 386 in 387 case String.compare (s1tok, s2tok) of 388 EQUAL => String.compare(lower (structpart s1), lower (structpart s2)) 389 | x => x 390 end 391 fun insert s t = 392 if valid_doc_name s then Binaryset.add(t, stripdoc_suff s) 393 else t 394 fun loop acc = 395 case OS.FileSys.readDir dirstr of 396 SOME s => loop (insert s acc) 397 | NONE => (OS.FileSys.closeDir dirstr; acc) 398in 399 loop (Binaryset.empty name_compare) 400end 401 402 403end 404