1251875Speter/* Title: Pure/General/word.scala 2251875Speter Author: Makarius 3251875Speter 4251875SpeterSupport for words within Unicode text. 5251875Speter*/ 6251875Speter 7251875Speterpackage isabelle 8251875Speter 9251875Speterimport java.text.Bidi 10251875Speterimport java.util.Locale 11251875Speter 12251875Speter 13251875Speterobject Word 14251875Speter{ 15251875Speter /* directionality */ 16251875Speter 17251875Speter def bidi_detect(str: String): Boolean = 18251875Speter str.exists(c => c >= 0x590) && Bidi.requiresBidi(str.toArray, 0, str.length) 19251875Speter 20251875Speter def bidi_override(str: String): String = 21251875Speter if (bidi_detect(str)) "\u200E\u202D" + str + "\u202C" else str 22251875Speter 23251875Speter 24251875Speter /* case */ 25251875Speter 26251875Speter def lowercase(str: String): String = str.toLowerCase(Locale.ROOT) 27251875Speter def uppercase(str: String): String = str.toUpperCase(Locale.ROOT) 28251875Speter 29251875Speter def capitalize(str: String): String = 30251875Speter if (str.length == 0) str 31251875Speter else { 32251875Speter val n = Character.charCount(str.codePointAt(0)) 33251875Speter uppercase(str.substring(0, n)) + lowercase(str.substring(n)) 34251875Speter } 35251875Speter 36251875Speter def perhaps_capitalize(str: String): String = 37251875Speter if (Codepoint.iterator(str).forall(c => Character.isLowerCase(c) || Character.isDigit(c))) 38251875Speter capitalize(str) 39251875Speter else str 40251875Speter 41251875Speter sealed abstract class Case 42251875Speter case object Lowercase extends Case 43251875Speter case object Uppercase extends Case 44251875Speter case object Capitalized extends Case 45251875Speter 46251875Speter object Case 47251875Speter { 48251875Speter def apply(c: Case, str: String): String = 49251875Speter c match { 50251875Speter case Lowercase => lowercase(str) 51251875Speter case Uppercase => uppercase(str) 52251875Speter case Capitalized => capitalize(str) 53251875Speter } 54251875Speter def unapply(str: String): Option[Case] = 55251875Speter if (str.nonEmpty) { 56251875Speter if (Codepoint.iterator(str).forall(Character.isLowerCase(_))) Some(Lowercase) 57251875Speter else if (Codepoint.iterator(str).forall(Character.isUpperCase(_))) Some(Uppercase) 58251875Speter else { 59251875Speter val it = Codepoint.iterator(str) 60251875Speter if (Character.isUpperCase(it.next) && it.forall(Character.isLowerCase(_))) 61251875Speter Some(Capitalized) 62251875Speter else None 63251875Speter } 64251875Speter } 65251875Speter else None 66251875Speter } 67251875Speter 68251875Speter 69251875Speter /* sequence of words */ 70251875Speter 71251875Speter def implode(words: Iterable[String]): String = words.iterator.mkString(" ") 72251875Speter 73251875Speter def explode(sep: Char => Boolean, text: String): List[String] = 74251875Speter Library.separated_chunks(sep, text).map(_.toString).filter(_ != "").toList 75251875Speter 76251875Speter def explode(sep: Char, text: String): List[String] = 77251875Speter explode(_ == sep, text) 78251875Speter 79251875Speter def explode(text: String): List[String] = 80251875Speter explode(Character.isWhitespace(_), text) 81251875Speter 82251875Speter 83251875Speter /* brackets */ 84251875Speter 85251875Speter val open_brackets = "([{�����������������������" 86251875Speter val close_brackets = ")]}�����������������������" 87251875Speter} 88251875Speter