1structure ParseDoc :> ParseDoc =
2struct
3
4open Substring;
5exception ParseError of string
6
7fun I x = x;
8fun mem x [] = false
9  | mem x (y::ys) = x = y orelse mem x ys
10
11fun curry f x y = f (x,y)
12fun equal x y = (x=y)
13infix ##;
14fun (f##g) (x,y) = (f x, g y);
15
16fun butlast [] = raise Fail "butlast"
17  | butlast [x] = []
18  | butlast (h::t) = h::butlast t;
19
20fun flat [] = []
21  | flat ([]::rst) = flat rst
22  | flat ((h::t)::rst) = h::flat(t::rst);
23
24infixr 4 \/
25infixr 5 /\
26fun (f \/ g) x = f x orelse g x
27fun (f /\ g) x = f x andalso g x;
28
29fun occurs s ss = not (isEmpty (#2 (position s ss)));
30
31fun fetch_contents docfile =
32  let val istrm = TextIO.openIn docfile
33      val contents = Substring.full (TextIO.inputAll istrm)
34      val _ = TextIO.closeIn istrm
35  in contents
36  end;
37
38fun fetch str = Substring.string (fetch_contents str);
39
40(*---------------------------------------------------------------------------
41       A HOL ".doc" file has the format
42
43         \DOC <ident>
44
45         ( \<keyword> <paragraph>* )*
46
47         \ENDDOC
48 ---------------------------------------------------------------------------*)
49
50datatype markup
51   = PARA
52   | TEXT of substring
53   | BRKT of substring
54   | EMPH of substring
55   | XMPL of substring;
56
57datatype section
58   = TYPE of substring
59   | FIELD of string * markup list
60   | SEEALSO of substring list;
61
62
63val valid_keywords =
64    Binaryset.addList(Binaryset.empty String.compare,
65                      ["DOC", "ELTYPE", "BLTYPE", "TYPE", "SYNOPSIS",
66                       "COMMENTS", "USES", "SEEALSO", "KEYWORDS", "DESCRIBE",
67                       "FAILURE", "EXAMPLE", "ENDDOC", "LIBRARY",
68                       "STRUCTURE"])
69
70fun check_string ss = let
71  val s = Substring.string ss
72in
73  if Binaryset.member(valid_keywords, s) then
74    s
75  else
76    raise ParseError ("Unknown keyword: "^s)
77end
78
79val empty_ok = ["DOC", "KEYWORDS", "LIBRARY"]
80
81
82fun divide ss =
83  if isEmpty ss
84    then []
85    else let val (ss1,ss2) = position "\n\\" ss
86         in
87           if isEmpty ss1 then divide (triml 2 ss2)
88           else ss1::divide (triml 2 ss2)
89         end;
90
91local val BLTYPE = Substring.full "BLTYPE"
92      val ELTYPE = Substring.full "ELTYPE"
93      val TYPEss = Substring.full "TYPE"
94      val BLTYPEsize = Substring.size ELTYPE
95in
96fun longtype_elim (l as (doc::ss1::ss2::rst)) =
97     let val (ssa,ssb) = position "BLTYPE" ss1
98         val (ssc,ssd) = position "ELTYPE" ss2
99     in if isEmpty ssa andalso isEmpty ssc
100          then doc :: full(concat[TYPEss, triml BLTYPEsize ssb]) :: rst
101          else l
102     end
103  | longtype_elim l = l
104end;
105
106fun warn s = TextIO.output(TextIO.stdErr, s)
107
108fun find_doc ss = let
109  val (ssa, ssb) = position "\\DOC" ss
110  val _ = not (isEmpty ssb) orelse raise ParseError "No \\DOC beginning entry"
111  val _ = isEmpty ssa orelse raise ParseError "Text before \\DOC"
112  val (docpart, rest) = position "\n" ss
113  val docheader = slice(docpart, 1, NONE)
114in
115  (docheader, slice(rest,1,NONE))
116end
117
118fun to_sections ss =
119  let open Substring
120      val (docpart, rest) = find_doc ss
121      val sslist = docpart :: divide rest
122      (* butlast below drops final \enddoc *)
123      val sslist1 = longtype_elim (butlast sslist)
124  in
125   map ((check_string ## dropl Char.isSpace) o
126        splitl (not o Char.isSpace)) sslist1
127  end;
128
129(*---------------------------------------------------------------------------
130        Divide into maximal chunks of text enclosed by braces.
131 ---------------------------------------------------------------------------*)
132
133(* skips over text until it finds the end of a brace delimited bit of text *)
134fun braces ss = let
135  fun recurse ss n =
136      case getc ss of
137        SOME(#"}", ss1) => if n = 1 then ss1
138                           else recurse ss1 (n - 1)
139      | SOME(#"{", ss1) => recurse ss1 (n + 1)
140      | SOME (_, ss1) => recurse ss1 n
141      | NONE => raise ParseError "No closing brace"
142in
143  recurse ss 0
144end
145
146fun stars ss =
147  let
148    fun recurse acc ss =
149      case getc ss of
150          SOME (#"*", ss1) => SOME (String.implode (List.rev acc), ss1)
151        | SOME (#"\\", ss1) => escapedchar acc ss1
152        | SOME (c, ss1) => recurse (c::acc) ss1
153        | NONE => NONE
154    and escapedchar acc ss =
155      case getc ss of
156          SOME (#"*", ss1) => recurse (#"*" :: acc) ss1
157        | SOME (#"\\", ss1) => recurse (#"\\" :: acc) ss1
158        | _ => raise ParseError "Strange backslash in normal text"
159  in
160    recurse [] ss
161  end
162
163
164fun markup ss =
165  case CharVector.findi (fn (_,c) => c = #"*" orelse c = #"{") (string ss) of
166      NONE => [TEXT ss]
167    | SOME (i, #"{") =>
168      let
169        val (ssa,ssb) = splitAt(ss,i)
170        val ssc = braces ssb
171        val (s,i,n) = base ssa
172        val (_,j,_) = base ssc
173        val chunk = substring (s,i+n+1,j-(i+n+2))
174      in TEXT ssa
175         :: (if occurs "\n" chunk then XMPL chunk else BRKT chunk)
176         :: markup ssc
177      end
178    | SOME (i, _) =>
179      let
180        val (ssa,ssb) = splitAt(ss,i)
181      in
182        case stars (slice(ssb,1,NONE)) of
183            NONE => [TEXT ss]
184          | SOME (s, rest) => TEXT ssa :: EMPH (full s) :: markup rest
185      end
186
187val paragraphs =
188  let fun para (TEXT ss) =
189           let fun insP ss =
190                let val (ssa,ssb) = position "\n\n" ss
191                in if isEmpty ssb then [TEXT ss]
192                   else TEXT ssa::PARA::insP (dropl Char.isSpace ssb)
193                end
194           in insP ss
195           end
196        | para other = [other]
197  in flat o map para
198  end;
199
200
201(* Translates out escaped braces wherever they might appear.  *)
202fun unescape_braces ss = let
203  fun isBrace c = c = #"{" orelse c = #"}"
204  fun recurse acc ss = let
205    val (ssa, ssb) = splitl (not o equal #"\\") ss
206  in
207    if size ssb = 0 then
208      concat (List.rev (ssa::acc))
209    else if isPrefix "\\lbrace" ssb then
210      recurse (full "{"::ssa::acc) (triml 7 ssb)
211    else if isPrefix "\\rbrace" ssb then
212      recurse (full "}"::ssa::acc) (triml 7 ssb)
213    else
214      recurse (slice(ss,0,SOME (size ssa + 1))::acc) (triml 1 ssb)
215  end
216in
217  full (recurse [] ss)
218end
219
220fun parse_type ss =
221 let val ss' =
222      case getc (dropl Char.isSpace ss)
223       of SOME(#"{", ss1) =>
224           let val ss2 = dropr Char.isSpace ss1
225           in if sub(ss2,size ss2 - 1) = #"}" then trimr 1 ss2
226              else raise ParseError "Closing brace not found in \\TYPE"
227           end
228        | other => ss
229  in unescape_braces ss'
230  end
231
232fun trimws mlist =
233    case mlist of
234      [] => []
235    | [TEXT ss] => let val newss = dropr Char.isSpace ss
236                   in
237                     if size newss > 0 then [TEXT newss] else []
238                   end
239    | (x::xs) => x :: trimws xs
240
241
242
243(* if the firstpass has an empty DOC component, then give it a full one
244   generated from the name of the file. *)
245fun name_from_fname fname = let
246  val ss0 = full (OS.Path.file fname)
247  val (ss1,_) = position ".doc" ss0
248in
249  case tokens (equal #".") (full (Symbolic.tosymb (string ss1))) of
250    [] => raise Fail "Can't happen"
251  | [x] => x
252  | (_::y::_) => y
253end
254
255fun install_doc_part fname sections =
256    case sections of
257      FIELD("DOC", []) :: ss =>
258         FIELD("DOC", [TEXT (name_from_fname fname)]) :: ss
259    | FIELD("DOC", [TEXT m]) :: ss => sections
260    | _ => raise ParseError "Ill-formed \\DOC section"
261
262(* if there isn't a STRUCTURE section, then add one if the filename of the
263   docfile suggests that there should be. *)
264fun install_structure_part fname sections = let
265  fun has_struct_section slist =
266      case slist of
267        [] => false
268      | FIELD("STRUCTURE", m)::_ =>
269        (case m of
270           [TEXT ss] => if length (tokens Char.isSpace ss) <> 1 then
271                          raise ParseError "Multi-word \\STRUCTURE section"
272                        else true
273         | _ => raise ParseError "Ill-formed \\STRUCTURE section")
274    | _::t => has_struct_section t
275  fun insert3 x [] = [x]
276    | insert3 x [y] = [y, x]
277    | insert3 x (h1::h2::t) = h1::h2::x::t
278  val name_parts = String.tokens (equal #".") (#file (OS.Path.splitDirFile fname))
279in
280  if not (has_struct_section sections) andalso length name_parts = 3
281  then
282    insert3 (FIELD("STRUCTURE", [TEXT (full (hd name_parts))])) sections
283  else sections
284end
285
286
287fun check_char_markup m = let
288  fun illegal_char c = c = #"{" orelse c = #"}" orelse c = #"\\"
289in
290  case m of
291    TEXT ss => let
292      val (ssa, ssb) = splitl (not o illegal_char) ss
293    in
294      if not (isEmpty ssb) then let
295          val (s0, j, _) = base ssb
296          val lo = if j > 5 then j - 5 else 0
297          val hi = if j + 5 > String.size s0 then NONE else SOME (j + 5 - lo)
298        in
299          raise ParseError ("Illegal character "^str (sub(ssb,0))^ " in "^
300                            "context: "^string (extract(s0,lo,hi)))
301        end
302      else ()
303    end
304  | _ => ()
305end
306
307
308fun check_char_section s =
309    case s of
310      FIELD (fname, mlist) => if fname <> "DOC" then
311                                List.app check_char_markup mlist
312                              else ()
313    | _ => ()
314
315fun final_char_check slist = (List.app check_char_section slist; slist)
316
317(* check that the second field is the "TYPE" one *)
318fun check_type_field2 [] = raise ParseError "Empty field list!"
319  | check_type_field2 [x] = raise ParseError "Only one field!"
320  | check_type_field2 (x as (_::TYPE _::_)) = x
321  | check_type_field2 _ = raise ParseError "\\TYPE field not second"
322
323fun parse_file docfile = let
324  fun db_out (BRKT ss) = BRKT (unescape_braces ss)
325    | db_out (XMPL ss) = XMPL (unescape_braces ss)
326    | db_out otherwise = otherwise
327
328  fun section (tag, ss) =
329      case tag of
330        "TYPE" => TYPE (parse_type ss)
331      | "SEEALSO" => let
332          val args = tokens (Char.isSpace \/ equal #",")
333                            (dropr (Char.isSpace \/ equal #".") ss)
334        in
335          if null args then
336            raise ParseError "Empty SEEALSO list"
337          else
338            SEEALSO args
339        end
340      | otherwise => let
341          val args = trimws (List.map db_out (paragraphs (markup ss)))
342        in
343          if null args andalso not (mem tag empty_ok) then
344            raise ParseError ("Empty "^tag^" field")
345          else FIELD (tag, args)
346        end
347  val firstpass = List.map section (to_sections (fetch_contents docfile))
348  val finalisation = final_char_check o
349                     check_type_field2 o
350                     install_structure_part docfile o
351                     install_doc_part docfile
352in
353  finalisation firstpass
354end handle ParseError s => raise ParseError (docfile^": "^s)
355         | x => (warn ("Exception raised in "^docfile^"\n"); raise x)
356
357(* ----------------------------------------------------------------------
358    File handling routines
359   ---------------------------------------------------------------------- *)
360
361open String
362fun stripdoc_suff s = String.extract(s, 0, SOME (size s - 4))
363fun hasdoc_suff s =
364    String.extract(s, size s - 4, NONE) = ".doc"
365    handle Subscript => false
366
367fun valid_doc_name s = hasdoc_suff s
368
369fun core_dname dnm = let
370  val toks = String.tokens (equal #".") dnm
371in
372  if length toks = 2 then hd (tl toks) else hd toks
373end
374
375fun structpart dnm =  hd (String.tokens (equal #".") dnm)
376
377(* returns a set of file-names from the given directory that are .doc
378   files *)
379fun find_docfiles dirname = let
380  val dirstr = OS.FileSys.openDir dirname
381  fun name_compare(s1,s2) = let
382    (* names already less .doc suffix *)
383    val lower = String.map Char.toLower
384    val s1tok = lower (core_dname (Symbolic.tosymb s1))
385    val s2tok = lower (core_dname (Symbolic.tosymb s2))
386  in
387    case String.compare (s1tok, s2tok) of
388      EQUAL => String.compare(lower (structpart s1), lower (structpart s2))
389    | x => x
390  end
391  fun insert s t =
392      if valid_doc_name s then Binaryset.add(t, stripdoc_suff s)
393      else t
394  fun loop acc =
395      case OS.FileSys.readDir dirstr of
396        SOME s => loop (insert s acc)
397      | NONE => (OS.FileSys.closeDir dirstr; acc)
398in
399  loop (Binaryset.empty name_compare)
400end
401
402
403end
404