1/* Title: Pure/Tools/spell_checker.scala 2 Author: Makarius 3 4Spell checker with completion, based on JOrtho (see 5https://sourceforge.net/projects/jortho). 6*/ 7 8package isabelle 9 10 11import java.lang.Class 12 13import scala.collection.mutable 14import scala.annotation.tailrec 15import scala.collection.immutable.SortedMap 16 17 18object Spell_Checker 19{ 20 /* words within text */ 21 22 def marked_words(base: Text.Offset, text: String, mark: Text.Info[String] => Boolean) 23 : List[Text.Info[String]] = 24 { 25 val result = new mutable.ListBuffer[Text.Info[String]] 26 var offset = 0 27 28 def apostrophe(c: Int): Boolean = 29 c == '\'' && (offset + 1 == text.length || text(offset + 1) != '\'') 30 31 @tailrec def scan(pred: Int => Boolean) 32 { 33 if (offset < text.length) { 34 val c = text.codePointAt(offset) 35 if (pred(c)) { 36 offset += Character.charCount(c) 37 scan(pred) 38 } 39 } 40 } 41 42 while (offset < text.length) { 43 scan(c => !Character.isLetter(c)) 44 val start = offset 45 scan(c => Character.isLetterOrDigit(c) || apostrophe(c)) 46 val stop = offset 47 if (stop - start >= 2) { 48 val info = Text.Info(Text.Range(base + start, base + stop), text.substring(start, stop)) 49 if (mark(info)) result += info 50 } 51 } 52 result.toList 53 } 54 55 def current_word(rendering: Rendering, range: Text.Range): Option[Text.Info[String]] = 56 { 57 for { 58 spell_range <- rendering.spell_checker_point(range) 59 text <- rendering.model.get_text(spell_range) 60 info <- marked_words(spell_range.start, text, info => info.range.overlaps(range)).headOption 61 } yield info 62 } 63 64 65 /* dictionaries */ 66 67 class Dictionary private[Spell_Checker](val path: Path) 68 { 69 val lang = path.drop_ext.file_name 70 val user_path = Path.explode("$ISABELLE_HOME_USER/dictionaries") + Path.basic(lang) 71 override def toString: String = lang 72 } 73 74 private object Decl 75 { 76 def apply(name: String, include: Boolean): String = 77 if (include) name else "-" + name 78 79 def unapply(decl: String): Option[(String, Boolean)] = 80 { 81 val decl1 = decl.trim 82 if (decl1 == "" || decl1.startsWith("#")) None 83 else 84 Library.try_unprefix("-", decl1.trim) match { 85 case None => Some((decl1, true)) 86 case Some(decl2) => Some((decl2, false)) 87 } 88 } 89 } 90 91 def dictionaries(): List[Dictionary] = 92 for { 93 path <- Path.split(Isabelle_System.getenv("JORTHO_DICTIONARIES")) 94 if path.is_file 95 } yield new Dictionary(path) 96 97 98 /* create spell checker */ 99 100 def apply(dictionary: Dictionary): Spell_Checker = new Spell_Checker(dictionary) 101 102 private sealed case class Update(include: Boolean, permanent: Boolean) 103} 104 105 106class Spell_Checker private(dictionary: Spell_Checker.Dictionary) 107{ 108 override def toString: String = dictionary.toString 109 110 111 /* main dictionary content */ 112 113 private var dict = new Object 114 private var updates = SortedMap.empty[String, Spell_Checker.Update] 115 116 private def included_iterator(): Iterator[String] = 117 for { 118 (word, upd) <- updates.iterator 119 if upd.include 120 } yield word 121 122 private def excluded(word: String): Boolean = 123 updates.get(word) match { 124 case Some(upd) => !upd.include 125 case None => false 126 } 127 128 private def load() 129 { 130 val main_dictionary = split_lines(File.read_gzip(dictionary.path)) 131 132 val permanent_updates = 133 if (dictionary.user_path.is_file) 134 for { 135 Spell_Checker.Decl(word, include) <- split_lines(File.read(dictionary.user_path)) 136 } yield (word, Spell_Checker.Update(include, true)) 137 else Nil 138 139 updates = 140 updates -- (for ((name, upd) <- updates.iterator; if upd.permanent) yield name) ++ 141 permanent_updates 142 143 val factory_class = Class.forName("com.inet.jortho.DictionaryFactory") 144 val factory_cons = factory_class.getConstructor() 145 factory_cons.setAccessible(true) 146 val factory = factory_cons.newInstance() 147 148 val add = Untyped.method(factory_class, "add", classOf[String]) 149 150 for { 151 word <- main_dictionary.iterator ++ included_iterator() 152 if !excluded(word) 153 } add.invoke(factory, word) 154 155 dict = Untyped.method(factory_class, "create").invoke(factory) 156 } 157 load() 158 159 private def save() 160 { 161 val permanent_decls = 162 (for { 163 (word, upd) <- updates.iterator 164 if upd.permanent 165 } yield Spell_Checker.Decl(word, upd.include)).toList 166 167 if (permanent_decls.nonEmpty || dictionary.user_path.is_file) { 168 val header = """# User updates for spell-checker dictionary 169# 170# * each line contains at most one word 171# * extra blanks are ignored 172# * lines starting with "#" are stripped 173# * lines starting with "-" indicate excluded words 174# 175#:mode=text:encoding=UTF-8: 176 177""" 178 Isabelle_System.mkdirs(dictionary.user_path.expand.dir) 179 File.write(dictionary.user_path, header + cat_lines(permanent_decls)) 180 } 181 } 182 183 def update(word: String, include: Boolean, permanent: Boolean) 184 { 185 updates += (word -> Spell_Checker.Update(include, permanent)) 186 187 if (include) { 188 if (permanent) save() 189 Untyped.method(dict.getClass, "add", classOf[String]).invoke(dict, word) 190 } 191 else { save(); load() } 192 } 193 194 def reset() 195 { 196 updates = SortedMap.empty 197 load() 198 } 199 200 def reset_enabled(): Int = 201 updates.valuesIterator.count(upd => !upd.permanent) 202 203 204 /* check known words */ 205 206 def contains(word: String): Boolean = 207 Untyped.method(dict.getClass.getSuperclass, "exist", classOf[String]). 208 invoke(dict, word).asInstanceOf[java.lang.Boolean].booleanValue 209 210 def check(word: String): Boolean = 211 word match { 212 case Word.Case(c) if c != Word.Lowercase => 213 contains(word) || contains(Word.lowercase(word)) 214 case _ => 215 contains(word) 216 } 217 218 def marked_words(base: Text.Offset, text: String): List[Text.Info[String]] = 219 Spell_Checker.marked_words(base, text, info => !check(info.info)) 220 221 222 /* completion: suggestions for unknown words */ 223 224 private def suggestions(word: String): Option[List[String]] = 225 { 226 val res = 227 Untyped.method(dict.getClass.getSuperclass, "searchSuggestions", classOf[String]). 228 invoke(dict, word).asInstanceOf[java.util.List[AnyRef]].toArray.toList.map(_.toString) 229 if (res.isEmpty) None else Some(res) 230 } 231 232 def complete(word: String): List[String] = 233 if (check(word)) Nil 234 else { 235 val word_case = Word.Case.unapply(word) 236 def recover_case(s: String) = 237 word_case match { 238 case Some(c) => Word.Case(c, s) 239 case None => s 240 } 241 val result = 242 word_case match { 243 case Some(c) if c != Word.Lowercase => 244 suggestions(word) orElse suggestions(Word.lowercase(word)) 245 case _ => 246 suggestions(word) 247 } 248 result.getOrElse(Nil).map(recover_case) 249 } 250 251 def completion(rendering: Rendering, caret: Text.Offset): Option[Completion.Result] = 252 { 253 val caret_range = rendering.before_caret_range(caret) 254 for { 255 word <- Spell_Checker.current_word(rendering, caret_range) 256 words = complete(word.info) 257 if words.nonEmpty 258 descr = "(from dictionary " + quote(dictionary.toString) + ")" 259 items = 260 words.map(w => Completion.Item(word.range, word.info, "", List(w, descr), w, 0, false)) 261 } yield Completion.Result(word.range, word.info, false, items) 262 } 263} 264 265class Spell_Checker_Variable 266{ 267 private val no_spell_checker: (String, Option[Spell_Checker]) = ("", None) 268 private var current_spell_checker = no_spell_checker 269 270 def get: Option[Spell_Checker] = synchronized { current_spell_checker._2 } 271 272 def update(options: Options): Unit = synchronized { 273 if (options.bool("spell_checker")) { 274 val lang = options.string("spell_checker_dictionary") 275 if (current_spell_checker._1 != lang) { 276 Spell_Checker.dictionaries.find(_.lang == lang) match { 277 case Some(dictionary) => 278 val spell_checker = 279 Exn.capture { Spell_Checker(dictionary) } match { 280 case Exn.Res(spell_checker) => Some(spell_checker) 281 case Exn.Exn(_) => None 282 } 283 current_spell_checker = (lang, spell_checker) 284 case None => 285 current_spell_checker = no_spell_checker 286 } 287 } 288 } 289 else current_spell_checker = no_spell_checker 290 } 291} 292