1/* Title: Pure/PIDE/xml.scala 2 Author: Makarius 3 4Untyped XML trees and basic data representation. 5*/ 6 7package isabelle 8 9 10object XML 11{ 12 /** XML trees **/ 13 14 /* datatype representation */ 15 16 type Attribute = Properties.Entry 17 type Attributes = Properties.T 18 19 sealed abstract class Tree { override def toString: String = string_of_tree(this) } 20 type Body = List[Tree] 21 case class Elem(markup: Markup, body: Body) extends Tree 22 { 23 def name: String = markup.name 24 25 def update_attributes(more_attributes: Attributes): Elem = 26 if (more_attributes.isEmpty) this 27 else Elem(markup.update_properties(more_attributes), body) 28 29 def + (att: Attribute): Elem = Elem(markup + att, body) 30 } 31 case class Text(content: String) extends Tree 32 33 def elem(markup: Markup): XML.Elem = XML.Elem(markup, Nil) 34 def elem(name: String, body: Body): XML.Elem = XML.Elem(Markup(name, Nil), body) 35 def elem(name: String): XML.Elem = XML.Elem(Markup(name, Nil), Nil) 36 37 val newline: Text = Text("\n") 38 39 40 /* name space */ 41 42 object Namespace 43 { 44 def apply(prefix: String, target: String): Namespace = 45 new Namespace(prefix, target) 46 } 47 48 final class Namespace private(prefix: String, target: String) 49 { 50 def apply(name: String): String = prefix + ":" + name 51 val attribute: XML.Attribute = ("xmlns:" + prefix, target) 52 53 override def toString: String = attribute.toString 54 } 55 56 57 /* wrapped elements */ 58 59 val XML_ELEM = "xml_elem" 60 val XML_NAME = "xml_name" 61 val XML_BODY = "xml_body" 62 63 object Wrapped_Elem 64 { 65 def apply(markup: Markup, body1: Body, body2: Body): XML.Elem = 66 XML.Elem(Markup(XML_ELEM, (XML_NAME, markup.name) :: markup.properties), 67 XML.Elem(Markup(XML_BODY, Nil), body1) :: body2) 68 69 def unapply(tree: Tree): Option[(Markup, Body, Body)] = 70 tree match { 71 case 72 XML.Elem(Markup(XML_ELEM, (XML_NAME, name) :: props), 73 XML.Elem(Markup(XML_BODY, Nil), body1) :: body2) => 74 Some(Markup(name, props), body1, body2) 75 case _ => None 76 } 77 } 78 79 object Root_Elem 80 { 81 def apply(body: Body): XML.Elem = XML.Elem(Markup(XML_ELEM, Nil), body) 82 def unapply(tree: Tree): Option[Body] = 83 tree match { 84 case XML.Elem(Markup(XML_ELEM, Nil), body) => Some(body) 85 case _ => None 86 } 87 } 88 89 90 /* traverse text */ 91 92 def traverse_text[A](body: Body)(a: A)(op: (A, String) => A): A = 93 { 94 def traverse(x: A, t: Tree): A = 95 t match { 96 case XML.Wrapped_Elem(_, _, ts) => (x /: ts)(traverse) 97 case XML.Elem(_, ts) => (x /: ts)(traverse) 98 case XML.Text(s) => op(x, s) 99 } 100 (a /: body)(traverse) 101 } 102 103 def text_length(body: Body): Int = traverse_text(body)(0) { case (n, s) => n + s.length } 104 105 106 /* text content */ 107 108 def content(body: Body): String = 109 { 110 val text = new StringBuilder(text_length(body)) 111 traverse_text(body)(()) { case (_, s) => text.append(s) } 112 text.toString 113 } 114 115 def content(tree: Tree): String = content(List(tree)) 116 117 118 119 /** string representation **/ 120 121 val header: String = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" 122 123 def output_char(c: Char, s: StringBuilder) 124 { 125 c match { 126 case '<' => s ++= "<" 127 case '>' => s ++= ">" 128 case '&' => s ++= "&" 129 case '"' => s ++= """ 130 case '\'' => s ++= "'" 131 case _ => s += c 132 } 133 } 134 135 def output_string(str: String, s: StringBuilder) 136 { 137 if (str == null) s ++= str 138 else str.iterator.foreach(c => output_char(c, s)) 139 } 140 141 def string_of_body(body: Body): String = 142 { 143 val s = new StringBuilder 144 145 def text(txt: String) { output_string(txt, s) } 146 def elem(markup: Markup) 147 { 148 s ++= markup.name 149 for ((a, b) <- markup.properties) { 150 s += ' '; s ++= a; s += '='; s += '"'; text(b); s += '"' 151 } 152 } 153 def tree(t: Tree): Unit = 154 t match { 155 case XML.Elem(markup, Nil) => 156 s += '<'; elem(markup); s ++= "/>" 157 case XML.Elem(markup, ts) => 158 s += '<'; elem(markup); s += '>' 159 ts.foreach(tree) 160 s ++= "</"; s ++= markup.name; s += '>' 161 case XML.Text(txt) => text(txt) 162 } 163 body.foreach(tree) 164 s.toString 165 } 166 167 def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree)) 168 169 170 171 /** cache **/ 172 173 def make_cache(initial_size: Int = 131071, max_string: Int = 100): Cache = 174 new Cache(initial_size, max_string) 175 176 class Cache private[XML](initial_size: Int, max_string: Int) 177 extends isabelle.Cache(initial_size, max_string) 178 { 179 protected def cache_props(x: Properties.T): Properties.T = 180 { 181 if (x.isEmpty) x 182 else 183 lookup(x) match { 184 case Some(y) => y 185 case None => store(x.map(p => (Library.isolate_substring(p._1).intern, cache_string(p._2)))) 186 } 187 } 188 189 protected def cache_markup(x: Markup): Markup = 190 { 191 lookup(x) match { 192 case Some(y) => y 193 case None => 194 x match { 195 case Markup(name, props) => 196 store(Markup(cache_string(name), cache_props(props))) 197 } 198 } 199 } 200 201 protected def cache_tree(x: XML.Tree): XML.Tree = 202 { 203 lookup(x) match { 204 case Some(y) => y 205 case None => 206 x match { 207 case XML.Elem(markup, body) => 208 store(XML.Elem(cache_markup(markup), cache_body(body))) 209 case XML.Text(text) => store(XML.Text(cache_string(text))) 210 } 211 } 212 } 213 214 protected def cache_body(x: XML.Body): XML.Body = 215 { 216 if (x.isEmpty) x 217 else 218 lookup(x) match { 219 case Some(y) => y 220 case None => x.map(cache_tree(_)) 221 } 222 } 223 224 // main methods 225 def props(x: Properties.T): Properties.T = synchronized { cache_props(x) } 226 def markup(x: Markup): Markup = synchronized { cache_markup(x) } 227 def tree(x: XML.Tree): XML.Tree = synchronized { cache_tree(x) } 228 def body(x: XML.Body): XML.Body = synchronized { cache_body(x) } 229 def elem(x: XML.Elem): XML.Elem = synchronized { cache_tree(x).asInstanceOf[XML.Elem] } 230 } 231 232 233 234 /** XML as data representation language **/ 235 236 abstract class Error(s: String) extends Exception(s) 237 class XML_Atom(s: String) extends Error(s) 238 class XML_Body(body: XML.Body) extends Error("") 239 240 object Encode 241 { 242 type T[A] = A => XML.Body 243 type V[A] = PartialFunction[A, (List[String], XML.Body)] 244 type P[A] = PartialFunction[A, List[String]] 245 246 247 /* atomic values */ 248 249 def long_atom(i: Long): String = Library.signed_string_of_long(i) 250 251 def int_atom(i: Int): String = Library.signed_string_of_int(i) 252 253 def bool_atom(b: Boolean): String = if (b) "1" else "0" 254 255 def unit_atom(u: Unit) = "" 256 257 258 /* structural nodes */ 259 260 private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts) 261 262 private def vector(xs: List[String]): XML.Attributes = 263 xs.zipWithIndex.map({ case (x, i) => (int_atom(i), x) }) 264 265 private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree = 266 XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2) 267 268 269 /* representation of standard types */ 270 271 val tree: T[XML.Tree] = (t => List(t)) 272 273 val properties: T[Properties.T] = 274 (props => List(XML.Elem(Markup(":", props), Nil))) 275 276 val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s))) 277 278 val long: T[Long] = (x => string(long_atom(x))) 279 280 val int: T[Int] = (x => string(int_atom(x))) 281 282 val bool: T[Boolean] = (x => string(bool_atom(x))) 283 284 val unit: T[Unit] = (x => string(unit_atom(x))) 285 286 def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = 287 (x => List(node(f(x._1)), node(g(x._2)))) 288 289 def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = 290 (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3)))) 291 292 def list[A](f: T[A]): T[List[A]] = 293 (xs => xs.map((x: A) => node(f(x)))) 294 295 def option[A](f: T[A]): T[Option[A]] = 296 { 297 case None => Nil 298 case Some(x) => List(node(f(x))) 299 } 300 301 def variant[A](fs: List[V[A]]): T[A] = 302 { 303 case x => 304 val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get 305 List(tagged(tag, f(x))) 306 } 307 } 308 309 object Decode 310 { 311 type T[A] = XML.Body => A 312 type V[A] = (List[String], XML.Body) => A 313 type P[A] = PartialFunction[List[String], A] 314 315 316 /* atomic values */ 317 318 def long_atom(s: String): Long = 319 try { java.lang.Long.parseLong(s) } 320 catch { case e: NumberFormatException => throw new XML_Atom(s) } 321 322 def int_atom(s: String): Int = 323 try { Integer.parseInt(s) } 324 catch { case e: NumberFormatException => throw new XML_Atom(s) } 325 326 def bool_atom(s: String): Boolean = 327 if (s == "1") true 328 else if (s == "0") false 329 else throw new XML_Atom(s) 330 331 def unit_atom(s: String): Unit = 332 if (s == "") () else throw new XML_Atom(s) 333 334 335 /* structural nodes */ 336 337 private def node(t: XML.Tree): XML.Body = 338 t match { 339 case XML.Elem(Markup(":", Nil), ts) => ts 340 case _ => throw new XML_Body(List(t)) 341 } 342 343 private def vector(atts: XML.Attributes): List[String] = 344 atts.iterator.zipWithIndex.map( 345 { case ((a, x), i) => if (int_atom(a) == i) x else throw new XML_Atom(a) }).toList 346 347 private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) = 348 t match { 349 case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts)) 350 case _ => throw new XML_Body(List(t)) 351 } 352 353 354 /* representation of standard types */ 355 356 val tree: T[XML.Tree] = 357 { 358 case List(t) => t 359 case ts => throw new XML_Body(ts) 360 } 361 362 val properties: T[Properties.T] = 363 { 364 case List(XML.Elem(Markup(":", props), Nil)) => props 365 case ts => throw new XML_Body(ts) 366 } 367 368 val string: T[String] = 369 { 370 case Nil => "" 371 case List(XML.Text(s)) => s 372 case ts => throw new XML_Body(ts) 373 } 374 375 val long: T[Long] = (x => long_atom(string(x))) 376 377 val int: T[Int] = (x => int_atom(string(x))) 378 379 val bool: T[Boolean] = (x => bool_atom(string(x))) 380 381 val unit: T[Unit] = (x => unit_atom(string(x))) 382 383 def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = 384 { 385 case List(t1, t2) => (f(node(t1)), g(node(t2))) 386 case ts => throw new XML_Body(ts) 387 } 388 389 def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = 390 { 391 case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3))) 392 case ts => throw new XML_Body(ts) 393 } 394 395 def list[A](f: T[A]): T[List[A]] = 396 (ts => ts.map(t => f(node(t)))) 397 398 def option[A](f: T[A]): T[Option[A]] = 399 { 400 case Nil => None 401 case List(t) => Some(f(node(t))) 402 case ts => throw new XML_Body(ts) 403 } 404 405 def variant[A](fs: List[V[A]]): T[A] = 406 { 407 case List(t) => 408 val (tag, (xs, ts)) = tagged(t) 409 val f = 410 try { fs(tag) } 411 catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) } 412 f(xs, ts) 413 case ts => throw new XML_Body(ts) 414 } 415 } 416} 417