121308Sache/* Title: Pure/General/word.scala 221308Sache Author: Makarius 321308Sache 421308SacheSupport for words within Unicode text. 521308Sache*/ 621308Sache 721308Sachepackage isabelle 821308Sache 921308Sacheimport java.text.Bidi 1021308Sacheimport java.util.Locale 1121308Sache 1221308Sache 1321308Sacheobject Word 1421308Sache{ 1558310Sache /* directionality */ 1621308Sache 1721308Sache def bidi_detect(str: String): Boolean = 1821308Sache str.exists(c => c >= 0x590) && Bidi.requiresBidi(str.toArray, 0, str.length) 1921308Sache 2021308Sache def bidi_override(str: String): String = 2121308Sache if (bidi_detect(str)) "\u200E\u202D" + str + "\u202C" else str 2221308Sache 2321308Sache 2421308Sache /* case */ 2521308Sache 2658310Sache def lowercase(str: String): String = str.toLowerCase(Locale.ROOT) 2721308Sache def uppercase(str: String): String = str.toUpperCase(Locale.ROOT) 2821308Sache 2921308Sache def capitalize(str: String): String = 3021308Sache if (str.length == 0) str 3121308Sache else { 3221308Sache val n = Character.charCount(str.codePointAt(0)) 3326497Sache uppercase(str.substring(0, n)) + lowercase(str.substring(n)) 3426497Sache } 3521308Sache 3621308Sache def perhaps_capitalize(str: String): String = 3721308Sache if (Codepoint.iterator(str).forall(c => Character.isLowerCase(c) || Character.isDigit(c))) 3821308Sache capitalize(str) 3921308Sache else str 4021308Sache 4126497Sache sealed abstract class Case 4226497Sache case object Lowercase extends Case 4326497Sache case object Uppercase extends Case 4426497Sache case object Capitalized extends Case 4526497Sache 4621308Sache object Case 4721308Sache { 4821308Sache def apply(c: Case, str: String): String = 4921308Sache c match { 5021308Sache case Lowercase => lowercase(str) 5158310Sache case Uppercase => uppercase(str) 5258310Sache case Capitalized => capitalize(str) 5358310Sache } 5447558Sache def unapply(str: String): Option[Case] = 5547558Sache if (str.nonEmpty) { 5647558Sache if (Codepoint.iterator(str).forall(Character.isLowerCase(_))) Some(Lowercase) 5721308Sache else if (Codepoint.iterator(str).forall(Character.isUpperCase(_))) Some(Uppercase) 5821308Sache else { 5921308Sache val it = Codepoint.iterator(str) 6058310Sache if (Character.isUpperCase(it.next) && it.forall(Character.isLowerCase(_))) 6158310Sache Some(Capitalized) 6221308Sache else None 6321308Sache } 6421308Sache } 6521308Sache else None 6621308Sache } 6721308Sache 6821308Sache 6921308Sache /* sequence of words */ 7021308Sache 7121308Sache def implode(words: Iterable[String]): String = words.iterator.mkString(" ") 7221308Sache 7321308Sache def explode(sep: Char => Boolean, text: String): List[String] = 7421308Sache Library.separated_chunks(sep, text).map(_.toString).filter(_ != "").toList 7521308Sache 7621308Sache def explode(sep: Char, text: String): List[String] = 7721308Sache explode(_ == sep, text) 7821308Sache 7921308Sache def explode(text: String): List[String] = 8021308Sache explode(Character.isWhitespace(_), text) 8121308Sache 8221308Sache 8321308Sache /* brackets */ 8421308Sache 8521308Sache val open_brackets = "([{�����������������������" 8621308Sache val close_brackets = ")]}�����������������������" 8721308Sache} 8821308Sache