1/*  Title:      Pure/PIDE/xml.scala
2    Author:     Makarius
3
4Untyped XML trees and basic data representation.
5*/
6
7package isabelle
8
9
10object XML
11{
12  /** XML trees **/
13
14  /* datatype representation */
15
16  type Attribute = Properties.Entry
17  type Attributes = Properties.T
18
19  sealed abstract class Tree { override def toString: String = string_of_tree(this) }
20  type Body = List[Tree]
21  case class Elem(markup: Markup, body: Body) extends Tree
22  {
23    def name: String = markup.name
24
25    def update_attributes(more_attributes: Attributes): Elem =
26      if (more_attributes.isEmpty) this
27      else Elem(markup.update_properties(more_attributes), body)
28
29    def + (att: Attribute): Elem = Elem(markup + att, body)
30  }
31  case class Text(content: String) extends Tree
32
33  def elem(markup: Markup): XML.Elem = XML.Elem(markup, Nil)
34  def elem(name: String, body: Body): XML.Elem = XML.Elem(Markup(name, Nil), body)
35  def elem(name: String): XML.Elem = XML.Elem(Markup(name, Nil), Nil)
36
37  val newline: Text = Text("\n")
38
39
40  /* name space */
41
42  object Namespace
43  {
44    def apply(prefix: String, target: String): Namespace =
45      new Namespace(prefix, target)
46  }
47
48  final class Namespace private(prefix: String, target: String)
49  {
50    def apply(name: String): String = prefix + ":" + name
51    val attribute: XML.Attribute = ("xmlns:" + prefix, target)
52
53    override def toString: String = attribute.toString
54  }
55
56
57  /* wrapped elements */
58
59  val XML_ELEM = "xml_elem"
60  val XML_NAME = "xml_name"
61  val XML_BODY = "xml_body"
62
63  object Wrapped_Elem
64  {
65    def apply(markup: Markup, body1: Body, body2: Body): XML.Elem =
66      XML.Elem(Markup(XML_ELEM, (XML_NAME, markup.name) :: markup.properties),
67        XML.Elem(Markup(XML_BODY, Nil), body1) :: body2)
68
69    def unapply(tree: Tree): Option[(Markup, Body, Body)] =
70      tree match {
71        case
72          XML.Elem(Markup(XML_ELEM, (XML_NAME, name) :: props),
73            XML.Elem(Markup(XML_BODY, Nil), body1) :: body2) =>
74          Some(Markup(name, props), body1, body2)
75        case _ => None
76      }
77  }
78
79  object Root_Elem
80  {
81    def apply(body: Body): XML.Elem = XML.Elem(Markup(XML_ELEM, Nil), body)
82    def unapply(tree: Tree): Option[Body] =
83      tree match {
84        case XML.Elem(Markup(XML_ELEM, Nil), body) => Some(body)
85        case _ => None
86      }
87  }
88
89
90  /* traverse text */
91
92  def traverse_text[A](body: Body)(a: A)(op: (A, String) => A): A =
93  {
94    def traverse(x: A, t: Tree): A =
95      t match {
96        case XML.Wrapped_Elem(_, _, ts) => (x /: ts)(traverse)
97        case XML.Elem(_, ts) => (x /: ts)(traverse)
98        case XML.Text(s) => op(x, s)
99      }
100    (a /: body)(traverse)
101  }
102
103  def text_length(body: Body): Int = traverse_text(body)(0) { case (n, s) => n + s.length }
104
105
106  /* text content */
107
108  def content(body: Body): String =
109  {
110    val text = new StringBuilder(text_length(body))
111    traverse_text(body)(()) { case (_, s) => text.append(s) }
112    text.toString
113  }
114
115  def content(tree: Tree): String = content(List(tree))
116
117
118
119  /** string representation **/
120
121  val header: String = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
122
123  def output_char(c: Char, s: StringBuilder)
124  {
125    c match {
126      case '<' => s ++= "&lt;"
127      case '>' => s ++= "&gt;"
128      case '&' => s ++= "&amp;"
129      case '"' => s ++= "&quot;"
130      case '\'' => s ++= "&apos;"
131      case _ => s += c
132    }
133  }
134
135  def output_string(str: String, s: StringBuilder)
136  {
137    if (str == null) s ++= str
138    else str.iterator.foreach(c => output_char(c, s))
139  }
140
141  def string_of_body(body: Body): String =
142  {
143    val s = new StringBuilder
144
145    def text(txt: String) { output_string(txt, s) }
146    def elem(markup: Markup)
147    {
148      s ++= markup.name
149      for ((a, b) <- markup.properties) {
150        s += ' '; s ++= a; s += '='; s += '"'; text(b); s += '"'
151      }
152    }
153    def tree(t: Tree): Unit =
154      t match {
155        case XML.Elem(markup, Nil) =>
156          s += '<'; elem(markup); s ++= "/>"
157        case XML.Elem(markup, ts) =>
158          s += '<'; elem(markup); s += '>'
159          ts.foreach(tree)
160          s ++= "</"; s ++= markup.name; s += '>'
161        case XML.Text(txt) => text(txt)
162      }
163    body.foreach(tree)
164    s.toString
165  }
166
167  def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
168
169
170
171  /** cache **/
172
173  def make_cache(initial_size: Int = 131071, max_string: Int = 100): Cache =
174    new Cache(initial_size, max_string)
175
176  class Cache private[XML](initial_size: Int, max_string: Int)
177    extends isabelle.Cache(initial_size, max_string)
178  {
179    protected def cache_props(x: Properties.T): Properties.T =
180    {
181      if (x.isEmpty) x
182      else
183        lookup(x) match {
184          case Some(y) => y
185          case None => store(x.map(p => (Library.isolate_substring(p._1).intern, cache_string(p._2))))
186        }
187    }
188
189    protected def cache_markup(x: Markup): Markup =
190    {
191      lookup(x) match {
192        case Some(y) => y
193        case None =>
194          x match {
195            case Markup(name, props) =>
196              store(Markup(cache_string(name), cache_props(props)))
197          }
198      }
199    }
200
201    protected def cache_tree(x: XML.Tree): XML.Tree =
202    {
203      lookup(x) match {
204        case Some(y) => y
205        case None =>
206          x match {
207            case XML.Elem(markup, body) =>
208              store(XML.Elem(cache_markup(markup), cache_body(body)))
209            case XML.Text(text) => store(XML.Text(cache_string(text)))
210          }
211      }
212    }
213
214    protected def cache_body(x: XML.Body): XML.Body =
215    {
216      if (x.isEmpty) x
217      else
218        lookup(x) match {
219          case Some(y) => y
220          case None => x.map(cache_tree(_))
221        }
222    }
223
224    // main methods
225    def props(x: Properties.T): Properties.T = synchronized { cache_props(x) }
226    def markup(x: Markup): Markup = synchronized { cache_markup(x) }
227    def tree(x: XML.Tree): XML.Tree = synchronized { cache_tree(x) }
228    def body(x: XML.Body): XML.Body = synchronized { cache_body(x) }
229    def elem(x: XML.Elem): XML.Elem = synchronized { cache_tree(x).asInstanceOf[XML.Elem] }
230  }
231
232
233
234  /** XML as data representation language **/
235
236  abstract class Error(s: String) extends Exception(s)
237  class XML_Atom(s: String) extends Error(s)
238  class XML_Body(body: XML.Body) extends Error("")
239
240  object Encode
241  {
242    type T[A] = A => XML.Body
243    type V[A] = PartialFunction[A, (List[String], XML.Body)]
244    type P[A] = PartialFunction[A, List[String]]
245
246
247    /* atomic values */
248
249    def long_atom(i: Long): String = Library.signed_string_of_long(i)
250
251    def int_atom(i: Int): String = Library.signed_string_of_int(i)
252
253    def bool_atom(b: Boolean): String = if (b) "1" else "0"
254
255    def unit_atom(u: Unit) = ""
256
257
258    /* structural nodes */
259
260    private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
261
262    private def vector(xs: List[String]): XML.Attributes =
263      xs.zipWithIndex.map({ case (x, i) => (int_atom(i), x) })
264
265    private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =
266      XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)
267
268
269    /* representation of standard types */
270
271    val tree: T[XML.Tree] = (t => List(t))
272
273    val properties: T[Properties.T] =
274      (props => List(XML.Elem(Markup(":", props), Nil)))
275
276    val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s)))
277
278    val long: T[Long] = (x => string(long_atom(x)))
279
280    val int: T[Int] = (x => string(int_atom(x)))
281
282    val bool: T[Boolean] = (x => string(bool_atom(x)))
283
284    val unit: T[Unit] = (x => string(unit_atom(x)))
285
286    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
287      (x => List(node(f(x._1)), node(g(x._2))))
288
289    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
290      (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))
291
292    def list[A](f: T[A]): T[List[A]] =
293      (xs => xs.map((x: A) => node(f(x))))
294
295    def option[A](f: T[A]): T[Option[A]] =
296    {
297      case None => Nil
298      case Some(x) => List(node(f(x)))
299    }
300
301    def variant[A](fs: List[V[A]]): T[A] =
302    {
303      case x =>
304        val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get
305        List(tagged(tag, f(x)))
306    }
307  }
308
309  object Decode
310  {
311    type T[A] = XML.Body => A
312    type V[A] = (List[String], XML.Body) => A
313    type P[A] = PartialFunction[List[String], A]
314
315
316    /* atomic values */
317
318    def long_atom(s: String): Long =
319      try { java.lang.Long.parseLong(s) }
320      catch { case e: NumberFormatException => throw new XML_Atom(s) }
321
322    def int_atom(s: String): Int =
323      try { Integer.parseInt(s) }
324      catch { case e: NumberFormatException => throw new XML_Atom(s) }
325
326    def bool_atom(s: String): Boolean =
327      if (s == "1") true
328      else if (s == "0") false
329      else throw new XML_Atom(s)
330
331    def unit_atom(s: String): Unit =
332      if (s == "") () else throw new XML_Atom(s)
333
334
335    /* structural nodes */
336
337    private def node(t: XML.Tree): XML.Body =
338      t match {
339        case XML.Elem(Markup(":", Nil), ts) => ts
340        case _ => throw new XML_Body(List(t))
341      }
342
343    private def vector(atts: XML.Attributes): List[String] =
344      atts.iterator.zipWithIndex.map(
345        { case ((a, x), i) => if (int_atom(a) == i) x else throw new XML_Atom(a) }).toList
346
347    private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =
348      t match {
349        case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))
350        case _ => throw new XML_Body(List(t))
351      }
352
353
354    /* representation of standard types */
355
356    val tree: T[XML.Tree] =
357    {
358      case List(t) => t
359      case ts => throw new XML_Body(ts)
360    }
361
362    val properties: T[Properties.T] =
363    {
364      case List(XML.Elem(Markup(":", props), Nil)) => props
365      case ts => throw new XML_Body(ts)
366    }
367
368    val string: T[String] =
369    {
370      case Nil => ""
371      case List(XML.Text(s)) => s
372      case ts => throw new XML_Body(ts)
373    }
374
375    val long: T[Long] = (x => long_atom(string(x)))
376
377    val int: T[Int] = (x => int_atom(string(x)))
378
379    val bool: T[Boolean] = (x => bool_atom(string(x)))
380
381    val unit: T[Unit] = (x => unit_atom(string(x)))
382
383    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
384    {
385      case List(t1, t2) => (f(node(t1)), g(node(t2)))
386      case ts => throw new XML_Body(ts)
387    }
388
389    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
390    {
391      case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))
392      case ts => throw new XML_Body(ts)
393    }
394
395    def list[A](f: T[A]): T[List[A]] =
396      (ts => ts.map(t => f(node(t))))
397
398    def option[A](f: T[A]): T[Option[A]] =
399    {
400      case Nil => None
401      case List(t) => Some(f(node(t)))
402      case ts => throw new XML_Body(ts)
403    }
404
405    def variant[A](fs: List[V[A]]): T[A] =
406    {
407      case List(t) =>
408        val (tag, (xs, ts)) = tagged(t)
409        val f =
410          try { fs(tag) }
411          catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
412        f(xs, ts)
413      case ts => throw new XML_Body(ts)
414    }
415  }
416}
417