1structure Doc2Txt = 2struct 3 4open ParseDoc 5 6val pagewidth = 70; 7 8val separator = String.implode (List.tabulate(pagewidth, fn _ => #"-")) 9 10fun out(str,s) = TextIO.output(str, s) 11fun warn s = TextIO.output(TextIO.stdErr, s ^ "\n") 12fun die s = (warn s; OS.Process.exit OS.Process.failure) 13 14fun print_type strm ss = out(strm, Substring.string ss ^ "\n\n") 15 16fun print_filled_words strm col wlist = 17 case wlist of 18 [] => col 19 | (w::ws) => let 20 val sz = Substring.size w 21 in 22 if col = 0 then 23 (out(strm, Substring.string w); 24 print_filled_words strm sz ws) 25 else if sz + 1 + col > pagewidth then 26 (out(strm, "\n"); print_filled_words strm 0 wlist) 27 else 28 (out(strm, " "); out(strm, Substring.string w); 29 print_filled_words strm (col + sz + 1) ws) 30 end 31 32local 33 val picker = 34 if Systeml.OS = "winNT" then (fn x => fn y => y) 35 else (fn x => fn y => x) 36 val lsquo = picker "\226\128\152" "'" 37 val rsquo = picker "\226\128\153" "'" 38 val ldquo = picker "\226\128\156" "\"" 39 val rdquo = picker "\226\128\157" "\"" 40in 41 42 43fun text_encode ss = let 44 (* passes over a substring, replacing single apostrophes with ��� 45 backquotes with ��� and the "latex" encodings of nice double-quotes: 46 `` with ��� and '' with ��� *) 47 open Substring 48 datatype state = backquote | apostrophe | normal of int * substring 49 fun recurse acc s ss = 50 case (s, getc ss) of 51 (backquote, NONE) => (lsquo :: acc) 52 | (apostrophe, NONE) => (rsquo :: acc) 53 | (normal(n,ss0), NONE) => (string ss0 :: acc) 54 | (normal (n,ss0), SOME(#"'", ss')) => 55 recurse (string (slice(ss0,0,SOME n)) :: acc) apostrophe ss' 56 | (normal(n,ss0), SOME(#"`", ss')) => 57 recurse (string (slice(ss0,0,SOME n))::acc) backquote ss' 58 | (normal(n,ss0), SOME(c, ss')) => recurse acc (normal(n + 1, ss0)) ss' 59 | (apostrophe, SOME(#"'", ss')) => 60 recurse (rdquo :: acc) (normal(0,ss')) ss' 61 | (apostrophe, SOME(#"`", ss')) => 62 recurse (rsquo :: acc) backquote ss' 63 | (apostrophe, SOME _) => 64 recurse (rsquo :: acc) (normal(0,ss)) ss 65 | (backquote, SOME(#"`", ss')) => 66 recurse (ldquo :: acc) (normal(0,ss')) ss' 67 | (backquote, SOME(#"'", ss')) => 68 recurse (lsquo :: acc) apostrophe ss' 69 | (backquote, SOME _) => 70 recurse (lsquo :: acc) (normal(0,ss)) ss 71in 72 String.concat (List.rev (recurse [] (normal(0,ss)) ss)) 73end 74 75end (* local *) 76 77 78fun print_markups strm mlist = 79 case mlist of 80 [] => () 81 | (m::ms) => let 82 in 83 case m of 84 PARA => (out(strm, "\n\n"); print_markups strm ms) 85 | TEXT ss => (out(strm, text_encode ss); 86 print_markups strm ms) 87 | EMPH ss => (out(strm, "*" ^ text_encode ss ^ "*"); 88 print_markups strm ms) 89 | BRKT ss => (out(strm, "{" ^ Substring.string ss ^ "}"); 90 print_markups strm ms) 91 | XMPL ss => (out(strm, Substring.string ss); 92 print_markups strm ms) 93 end 94 95 96fun listify [] = raise Fail "Empty SEEALSO list -- impossible" 97 | listify [x] = [Substring.full (Substring.string x ^ ".")] 98 | listify (x::xs) = Substring.full (Substring.string x ^ ",") :: 99 listify xs 100 101fun print_list strm ssl = print_filled_words strm 0 (listify ssl) 102 103fun ignore_these s = List.exists (fn s' => s' = s) ["DOC", "STRUCTURE"] 104 105fun write_section strm s = 106 case s of 107 TYPE ss => print_type strm ss 108 | FIELD(s, mlist) => 109 if ignore_these s then () 110 else let 111 in 112 out(strm, s ^ "\n"); 113 print_markups strm mlist; 114 out(strm, "\n\n") 115 end 116 | SEEALSO ssl => (out(strm, "SEEALSO\n"); 117 print_list strm ssl; 118 out(strm, "\n\n")) 119 120 121fun print_docpart (slist, ostr) = let 122 fun find_structpart [] = NONE 123 | find_structpart (FIELD("STRUCTURE", [TEXT m])::_) = SOME m 124 | find_structpart (_ :: t) = find_structpart t 125 fun find_docpart [] = raise Fail "Can't happen - empty section list" 126 | find_docpart (FIELD("DOC", [TEXT m]) :: _) = m 127 | find_docpart (_ :: t) = raise Fail "Can't happen \\DOC not first entry" 128 val docpart = Substring.string (find_docpart slist) 129 val prettier = 130 case find_structpart slist of 131 NONE => docpart 132 | SOME ss => docpart ^ 133 (StringCvt.padLeft #" " (pagewidth - String.size docpart) 134 ("(" ^ Substring.string ss ^ ")")) 135in 136 out (ostr, separator ^ "\n"); 137 out (ostr, prettier ^ "\n"); 138 out (ostr, separator ^ "\n") 139end 140 141fun do_one_file docdir destdir dname = let 142 val file = parse_file (OS.Path.concat(docdir, dname ^ ".doc")) 143 val outputstr = TextIO.openOut (OS.Path.concat(destdir, dname ^ ".txt")) 144in 145 print_docpart (file, outputstr); 146 app (write_section outputstr) file; 147 out(outputstr, separator ^"\n"); 148 TextIO.closeOut outputstr 149end handle e => die ("Exception raised: " ^ General.exnMessage e) 150 151 152fun main () = 153 case CommandLine.arguments() of 154 [docdir, destdir] => let 155 val docfiles = find_docfiles docdir 156 open Binaryset 157 val (tick,finish) = 158 Flash.initialise ("Directory "^docdir^": ", numItems docfiles) 159 in 160 app (fn d => (do_one_file docdir destdir d; tick())) docfiles; 161 finish(); 162 OS.Process.exit OS.Process.success 163 end 164 | _ => 165 (warn ("Usage: "^CommandLine.name()^ " <doc directory> "^ 166 "<destination directory>\n"); 167 OS.Process.exit OS.Process.failure); 168 169end (* struct *) 170