1/*  Title:      Pure/Tools/spell_checker.scala
2    Author:     Makarius
3
4Spell checker with completion, based on JOrtho (see
5https://sourceforge.net/projects/jortho).
6*/
7
8package isabelle
9
10
11import java.lang.Class
12
13import scala.collection.mutable
14import scala.annotation.tailrec
15import scala.collection.immutable.SortedMap
16
17
18object Spell_Checker
19{
20  /* words within text */
21
22  def marked_words(base: Text.Offset, text: String, mark: Text.Info[String] => Boolean)
23    : List[Text.Info[String]] =
24  {
25    val result = new mutable.ListBuffer[Text.Info[String]]
26    var offset = 0
27
28    def apostrophe(c: Int): Boolean =
29      c == '\'' && (offset + 1 == text.length || text(offset + 1) != '\'')
30
31    @tailrec def scan(pred: Int => Boolean)
32    {
33      if (offset < text.length) {
34        val c = text.codePointAt(offset)
35        if (pred(c)) {
36          offset += Character.charCount(c)
37          scan(pred)
38        }
39      }
40    }
41
42    while (offset < text.length) {
43      scan(c => !Character.isLetter(c))
44      val start = offset
45      scan(c => Character.isLetterOrDigit(c) || apostrophe(c))
46      val stop = offset
47      if (stop - start >= 2) {
48        val info = Text.Info(Text.Range(base + start, base + stop), text.substring(start, stop))
49        if (mark(info)) result += info
50      }
51    }
52    result.toList
53  }
54
55  def current_word(rendering: Rendering, range: Text.Range): Option[Text.Info[String]] =
56  {
57    for {
58      spell_range <- rendering.spell_checker_point(range)
59      text <- rendering.model.get_text(spell_range)
60      info <- marked_words(spell_range.start, text, info => info.range.overlaps(range)).headOption
61    } yield info
62  }
63
64
65  /* dictionaries */
66
67  class Dictionary private[Spell_Checker](val path: Path)
68  {
69    val lang = path.drop_ext.file_name
70    val user_path = Path.explode("$ISABELLE_HOME_USER/dictionaries") + Path.basic(lang)
71    override def toString: String = lang
72  }
73
74  private object Decl
75  {
76    def apply(name: String, include: Boolean): String =
77      if (include) name else "-" + name
78
79    def unapply(decl: String): Option[(String, Boolean)] =
80    {
81      val decl1 = decl.trim
82      if (decl1 == "" || decl1.startsWith("#")) None
83      else
84        Library.try_unprefix("-", decl1.trim) match {
85          case None => Some((decl1, true))
86          case Some(decl2) => Some((decl2, false))
87        }
88    }
89  }
90
91  def dictionaries(): List[Dictionary] =
92    for {
93      path <- Path.split(Isabelle_System.getenv("JORTHO_DICTIONARIES"))
94      if path.is_file
95    } yield new Dictionary(path)
96
97
98  /* create spell checker */
99
100  def apply(dictionary: Dictionary): Spell_Checker = new Spell_Checker(dictionary)
101
102  private sealed case class Update(include: Boolean, permanent: Boolean)
103}
104
105
106class Spell_Checker private(dictionary: Spell_Checker.Dictionary)
107{
108  override def toString: String = dictionary.toString
109
110
111  /* main dictionary content */
112
113  private var dict = new Object
114  private var updates = SortedMap.empty[String, Spell_Checker.Update]
115
116  private def included_iterator(): Iterator[String] =
117    for {
118      (word, upd) <- updates.iterator
119      if upd.include
120    } yield word
121
122  private def excluded(word: String): Boolean =
123    updates.get(word) match {
124      case Some(upd) => !upd.include
125      case None => false
126    }
127
128  private def load()
129  {
130    val main_dictionary = split_lines(File.read_gzip(dictionary.path))
131
132    val permanent_updates =
133      if (dictionary.user_path.is_file)
134        for {
135          Spell_Checker.Decl(word, include) <- split_lines(File.read(dictionary.user_path))
136        } yield (word, Spell_Checker.Update(include, true))
137      else Nil
138
139    updates =
140      updates -- (for ((name, upd) <- updates.iterator; if upd.permanent) yield name) ++
141        permanent_updates
142
143    val factory_class = Class.forName("com.inet.jortho.DictionaryFactory")
144    val factory_cons = factory_class.getConstructor()
145    factory_cons.setAccessible(true)
146    val factory = factory_cons.newInstance()
147
148    val add = Untyped.method(factory_class, "add", classOf[String])
149
150    for {
151      word <- main_dictionary.iterator ++ included_iterator()
152      if !excluded(word)
153    } add.invoke(factory, word)
154
155    dict = Untyped.method(factory_class, "create").invoke(factory)
156  }
157  load()
158
159  private def save()
160  {
161    val permanent_decls =
162      (for {
163        (word, upd) <- updates.iterator
164        if upd.permanent
165      } yield Spell_Checker.Decl(word, upd.include)).toList
166
167    if (permanent_decls.nonEmpty || dictionary.user_path.is_file) {
168      val header = """# User updates for spell-checker dictionary
169#
170#   * each line contains at most one word
171#   * extra blanks are ignored
172#   * lines starting with "#" are stripped
173#   * lines starting with "-" indicate excluded words
174#
175#:mode=text:encoding=UTF-8:
176
177"""
178      Isabelle_System.mkdirs(dictionary.user_path.expand.dir)
179      File.write(dictionary.user_path, header + cat_lines(permanent_decls))
180    }
181  }
182
183  def update(word: String, include: Boolean, permanent: Boolean)
184  {
185    updates += (word -> Spell_Checker.Update(include, permanent))
186
187    if (include) {
188      if (permanent) save()
189      Untyped.method(dict.getClass, "add", classOf[String]).invoke(dict, word)
190    }
191    else { save(); load() }
192  }
193
194  def reset()
195  {
196    updates = SortedMap.empty
197    load()
198  }
199
200  def reset_enabled(): Int =
201    updates.valuesIterator.count(upd => !upd.permanent)
202
203
204  /* check known words */
205
206  def contains(word: String): Boolean =
207    Untyped.method(dict.getClass.getSuperclass, "exist", classOf[String]).
208      invoke(dict, word).asInstanceOf[java.lang.Boolean].booleanValue
209
210  def check(word: String): Boolean =
211    word match {
212      case Word.Case(c) if c != Word.Lowercase =>
213        contains(word) || contains(Word.lowercase(word))
214      case _ =>
215        contains(word)
216    }
217
218  def marked_words(base: Text.Offset, text: String): List[Text.Info[String]] =
219    Spell_Checker.marked_words(base, text, info => !check(info.info))
220
221
222  /* completion: suggestions for unknown words */
223
224  private def suggestions(word: String): Option[List[String]] =
225  {
226    val res =
227      Untyped.method(dict.getClass.getSuperclass, "searchSuggestions", classOf[String]).
228        invoke(dict, word).asInstanceOf[java.util.List[AnyRef]].toArray.toList.map(_.toString)
229    if (res.isEmpty) None else Some(res)
230  }
231
232  def complete(word: String): List[String] =
233    if (check(word)) Nil
234    else {
235      val word_case = Word.Case.unapply(word)
236      def recover_case(s: String) =
237        word_case match {
238          case Some(c) => Word.Case(c, s)
239          case None => s
240        }
241      val result =
242        word_case match {
243          case Some(c) if c != Word.Lowercase =>
244            suggestions(word) orElse suggestions(Word.lowercase(word))
245          case _ =>
246            suggestions(word)
247        }
248      result.getOrElse(Nil).map(recover_case)
249    }
250
251  def completion(rendering: Rendering, caret: Text.Offset): Option[Completion.Result] =
252  {
253    val caret_range = rendering.before_caret_range(caret)
254    for {
255      word <- Spell_Checker.current_word(rendering, caret_range)
256      words = complete(word.info)
257      if words.nonEmpty
258      descr = "(from dictionary " + quote(dictionary.toString) + ")"
259      items =
260        words.map(w => Completion.Item(word.range, word.info, "", List(w, descr), w, 0, false))
261    } yield Completion.Result(word.range, word.info, false, items)
262  }
263}
264
265class Spell_Checker_Variable
266{
267  private val no_spell_checker: (String, Option[Spell_Checker]) = ("", None)
268  private var current_spell_checker = no_spell_checker
269
270  def get: Option[Spell_Checker] = synchronized { current_spell_checker._2 }
271
272  def update(options: Options): Unit = synchronized {
273    if (options.bool("spell_checker")) {
274      val lang = options.string("spell_checker_dictionary")
275      if (current_spell_checker._1 != lang) {
276        Spell_Checker.dictionaries.find(_.lang == lang) match {
277          case Some(dictionary) =>
278            val spell_checker =
279              Exn.capture { Spell_Checker(dictionary) } match {
280                case Exn.Res(spell_checker) => Some(spell_checker)
281                case Exn.Exn(_) => None
282              }
283            current_spell_checker = (lang, spell_checker)
284          case None =>
285            current_spell_checker = no_spell_checker
286        }
287      }
288    }
289    else current_spell_checker = no_spell_checker
290  }
291}
292