1# $Id: xoRDF.xotcl,v 1.4 2005/09/09 21:09:01 neumann Exp $
2package provide xotcl::rdf::parser 1.0
3
4package require XOTcl
5package require xotcl::xml::parser
6#package require xotcl::pattern::link
7package require xotcl::trace
8
9namespace eval ::xotcl::rdf::parser {
10  namespace import ::xotcl::*
11
12  ##############################################################################
13  #
14  #  RDF Parse Type Handling for RDF Node Class and RDF Parser class
15  #  to be used as mixin. Here, we have decomposed the parse type handling
16  #
17  ##############################################################################
18
19  #
20  #  Nodes just call "isParseLiteral", "isParseResource", and "handleParseType"
21  #  by their template methods -> mixins concretizes implementation
22  #
23  Class RDFNodeParseTypeHandling
24
25  #
26  # parseType=literal nodes are not parsed, but handled as literals
27  # -> the XML parser should parse these nodes -> we have cut them off
28  # if we encounter "parseType = literal" nextParsedLiterals searches the
29  # parseLiterals array and returns the content
30  #
31  RDFNodeParseTypeHandling instproc nextParsedLiterals {} {
32    set parser [my set parser]
33    $parser set parseLiterals([$parser incr parseLiteralsCount])
34  }
35
36  #
37  # handle attributes that determine the parse type
38  #
39  RDFNodeParseTypeHandling instproc handleParseType value {
40    if {$value eq "Resource"} {
41      my set parseResource 1
42    } else {
43      # with RDF 1.0 all values other than Resource are treated
44      # as parseType = literal
45      my set pcdata [list "" [my nextParsedLiterals]]
46      my set parseLiteral 1
47    }
48  }
49
50  #
51  # two convinience methods that tell us whether the parse type is literal/resource
52  #
53  RDFNodeParseTypeHandling instproc isParseLiteral {} {
54    #
55    # if the parse literal var is set -> one child
56    # is of type ParseTypeLiteral !
57    #
58    my exists parseLiteral
59  }
60  RDFNodeParseTypeHandling instproc isParseResource {} {
61    #
62    # if the parseResource var is set -> one child
63    # is of type ParseTypeResource !
64    #
65    my exists parseResource
66  }
67
68  #
69  # and we overload the Parser's parse method in order to cut off
70  # all parseType = "Literal", because we have to hinder the XML
71  # parser to parse RDF text that is marked as parseType = literal
72  # we store the result in an array "parseLiterals" that is used
73  # by the RDFNodeParseTypeHandling Mixin
74  #
75  Class RDFParserParseTypeHandling
76  RDFParserParseTypeHandling instproc parse data {
77    my array set parseLiterals {}
78    my set parseLiteralsCount 0
79    set count 0
80
81    set dt $data
82
83    while {[set pt [string first "parseType" $dt]] != -1} {
84      # we cut the string off manually, because a regexp is slower
85      if {$::tcl_version > 8.0} {
86	set last [string first "=" $dt $pt]
87      } else {
88	set last [string first "=" [string range $dt $pt end]]
89	incr last $pt
90      }
91      set ptStart [expr {[string last "<" [string range $dt 0 $pt]] + 1}]
92      set propName [string range $dt $ptStart $pt]
93      set blank [string first " " $propName]
94      if {$blank != -1} {
95	set propName [string range $propName 0 [expr {$blank -1}]]
96      }
97      set dt [string range $dt $last end]
98      # All parse types != Resource treated as literals
99      if {![regexp {^= *[\"']Resource} $dt]} {
100	regexp -indices ">" $dt idx
101	set start [lindex $idx 1]
102	if {[regexp -indices "</$propName>" $dt idx]} {
103	  set endTagLeft [lindex $idx 0]
104	  set literal [string range $dt [expr {$start + 1}] [expr {$endTagLeft - 1}]]
105	  set dt [string range $dt $endTagLeft end]
106	  my set parseLiterals([incr count]) $literal
107	} else {
108	  error "end tag for $propName missing"
109	}
110      }
111    }
112    next $data
113  }
114
115  ##############################################################################
116  #
117  #  RDFNode Node Class
118  #
119  ##############################################################################
120
121  Class RDFNode -superclass XMLNode -parameter {
122    subject
123    {rdfNSPrefix ""}
124  }
125  @ Class RDFNode -superclass XMLNode {
126    description {
127      general superclass for RDF nodes
128      common properties
129    }
130  }
131
132  #
133  # add mixins for parse type handling
134  #
135  RDFNode instproc init args {
136    next
137    my mixin add RDFNodeParseTypeHandling
138    set p [my info parent]
139    if {[$p exists rdfNSPrefix]} {
140      my set rdfNSPrefix [$p set rdfNSPrefix]
141      #puts stderr "RDF Prefix defined in [self]->init to [$p set rdfNSPrefix]" 
142    }
143  }
144
145  RDFNode instproc parseData {text} {
146    if {[my isParseLiteral]} {return}
147    next
148  }
149
150  #
151  # try to find the "subject" of the RDF statement ->
152  # if it not found on the actual node search the parents
153  #
154  # per default subject is ""; subclasses add subjects,
155  # when they encounter ID, about, ... attrs
156  #
157  RDFNode instproc getSubject {} {
158    for {set o [self]} {![$o istype RDFTag]} {set o [$o info parent]} {
159      if {[$o exists subject]} {return [$o set subject]}
160    }
161    return ""
162  }
163
164
165  #
166  # lets the parser construct an unique ID in the parser
167  #
168  RDFNode instproc makeID {} {
169    [my set parser] makeID
170  }
171
172  #
173  # abstract methods that have to be concretized with parse type handling
174  # by a parse type mixin (or in subclass)
175  #
176  RDFNode abstract instproc isParseLiteral {}
177  RDFNode abstract instproc isParseResource {}
178  RDFNode abstract instproc handleParseType value
179
180  RDFNode instproc appendRDFType t {
181    set t [[my resolveNS] getFullName $t]
182    my set rdfTypes($t) 1
183  }
184
185  #
186  # get a typed node abbreviation -> convert it to
187  # a description + a nested rdf:type property
188  #
189  RDFNode instproc getTypedNode {name attrList} {
190    set r [my getNestingNode RDFDescription \
191	       [my qualifyWithRdfNsPrefix Description] $attrList]
192    $r appendRDFType $name
193    set r
194  }
195
196  #
197  # try to parse children corresponding to parse type or if none is given
198  # try to parse a child of type obj -> Description or Container
199  #
200  RDFNode instproc parseNestedChild {name attrList} {
201    if {[my isParseResource]} {
202      if {![my exists resourceDescription]} {
203	my set resourceDescription \
204	    [my getNestingNode RDFDescription \
205		 [my qualifyWithRdfNsPrefix Description] {}]
206	# we have resolved parseType="resource" with a description
207	# -> remove parse type attribute info ... it is not correct anymore,
208	# but remember parseResource flag
209	if {[my exists attributes(parseType)]} {
210	  my unset attributes(parseType)
211	}
212	if {[my exists attributes([set parseType [my qualifyWithRdfNsPrefix parseType]])]} {
213	  my unset attributes($parseType)
214	}
215      }
216      
217      set r [[my set resourceDescription] getPropertyNodeChild $name $attrList]
218    } elseif {[my isParseLiteral]} {
219      set r [self]
220      # literal -> do nothing
221    } else {
222      if {[set node [my isNestingNode $name]] ne ""} {
223	set r [my getNestingNode $node $name $attrList]
224      } else {
225	set r [my getTypedNode $name $attrList]
226      }
227    }
228    return $r
229  }
230
231  #
232  # step forward in the attrList
233  #
234  RDFNode instproc nextAttrNode {node attrList index} {
235    upvar [self callinglevel] $index i $attrList a
236    if {$node ne ""} {
237      set a [lreplace $a $i [expr {$i + 1}]]
238    } else {
239      incr i 2
240    }
241  }
242
243  #
244  # create a child node of Property type and return it
245  #
246  # don't build a node for "type" properties, but append them to
247  # the list
248  #
249  RDFNode instproc getPropertyNodeChild {name attrList} {
250    regexp "^[my set rdfNSPrefix]:(.*)" $name _ name
251    set parser [my set parser]
252    if {$name eq "type" && [my istype RDFResource]} {
253      # seek for resource attribute and append type to list
254      set rp [my prependRDFPrefix resource]
255      set rdfns [$parser set rdfNamespace]
256      foreach {n v} $attrList {
257	if {![my istype RDFContainerNodeClass]} {
258	  if {$n eq $rp || $n eq "resource"} {
259	    foreach c {Bag Alt Seq} {
260	      if {$v eq "$rdfns$c"} {
261		my class RDF$c
262		my set memberNr 0
263		my set ID [my set bagID]
264		my unset bagID
265		my set content [my prependRDFPrefix $c]
266		# reclass existing li props to member
267		set li [my prependRDFPrefix li]
268		foreach child [lsort [my info children]] {
269		  if {[namespace tail [$child info class]] eq "RDFProperty"} {
270		    if {[$child set content] eq $li || 
271			[$child set content] eq "li"} {
272		      $child class RDFMember
273		      my giveMemberNr $child
274		      $child set content $li
275		    }
276		  }
277		}
278	      }
279	    }
280	  }
281	}
282	my appendRDFType $v
283      }
284      return [self]
285    } else {
286      set nf [$parser set nodeFactory]
287      set r [$nf getNode RDFProperty [self]::[my nextChild prop] $parser]
288      $r set content $name
289      $r parseAttributes $name $attrList
290      set r
291    }
292  }
293
294  #
295  # property in abbr syntax (as attribute)
296  #
297  RDFNode instproc propertyAttribute {n v} {
298    set r [my getPropertyNodeChild $n ""]
299    $r parseData $v
300    set r
301  }
302
303  #
304  # check whether an attribute name matches an attributed RDFNode
305  # of this class or not
306  # return the corresponding node class
307  #
308  RDFNode instproc isAttribute {n} {
309    regexp "^[my set rdfNSPrefix]:(.*)" $n _ n
310    if {[lsearch [[my info class] set attributeList] $n] != -1} {
311      return $n
312    } elseif {$n eq "xml:lang"} {
313      # we create attribute for xml_lang (for recreation purposes)
314      return $n
315    }
316    return ""
317  }
318
319  #
320  # check if name matches an node class that may be nested in [self]
321  #
322  RDFNode instproc isNestingNode {n} {
323    regexp "^[my set rdfNSPrefix]:(.*)" $n _ n
324    set cl [my info class]
325    if {[$cl exists nestingList($n)]} {
326      return [$cl set nestingList($n)]
327    }
328    return ""
329  }
330
331  RDFNode instproc getNestingNode {node name attrList} {
332    set parser [my set parser]
333    set nf [$parser set nodeFactory]
334    switch [namespace tail $node] {
335      "RDFMember" - "RDFProperty" {set objName prop} 
336      default {set objName res}
337    }
338    set r [$nf getNode $node [self]::[my nextChild $objName] $parser]
339    $r set content $name
340    $r parseAttributes $name $attrList
341    set r
342  }
343
344  #
345  # check whether the RDF namespace is redefined to another prefix
346  #
347  RDFNode instproc makeIndividualNSEntry {prefix entry} {
348    if {$entry eq [[my set parser] rdfNamespace]} {
349      if {[my set rdfNSPrefix] eq "" || $prefix ne "xmlns"} {
350	my set rdfNSPrefix $prefix
351      }
352      #puts stderr "RDF Prefix redefined in [self] to $prefix"
353    }
354    next
355  }
356
357  RDFNode instproc qualifyWithRdfNsPrefix t {
358    set ns [my set rdfNSPrefix]
359    if {$ns eq "xmlns"} {return $t}
360    return $ns:$t
361  }
362
363  #
364  # checks whether a given attribute is part of the attributes array
365  # and returns the varname, otherwise ""
366  #
367  RDFNode instproc getAttribute {n nsFullName} {
368    set ns [my resolveNS]
369    set xmlns [$ns searchPrefix xmlns]
370    if {$xmlns eq $nsFullName && [my exists attributes($n)]} {
371      return attributes($n)
372    }
373    set prefix [$ns searchFullName $nsFullName]
374    if {$prefix ne "" &&
375	[my exists attributes($prefix:$n)]} {
376      return attributes($prefix:$n)
377    }
378    return ""
379  }
380
381  #
382  # searches for attribute "n" with rdf namespace prefix
383  #
384  RDFNode instproc getRDFAttribute {n} {
385    if {[my exists attributes($n)]} {
386      return [my set attributes($n)]
387    }
388    set rdfNSPrefix [my set rdfNSPrefix]
389    if {$rdfNSPrefix ne "xmlns"} {
390      set n $rdfNSPrefix:$n
391      if {[my exists attributes($n)]} {
392	return [my set attributes($n)]
393      }
394    }
395    return ""
396  }
397
398  RDFNode instproc prependRDFPrefix ts {
399    set rdfNSPrefix [my set rdfNSPrefix]
400    if {$rdfNSPrefix ne "xmlns"} {set ts $rdfNSPrefix:$ts}
401    return $ts
402  }
403
404  ##############################################################################
405  #
406  # superclass for all resources (like Description, Alt, Seq, Beg)
407  # used directly in the parse tree ... resource nodes are mixed in
408  #
409  ##############################################################################
410
411  Class RDFResource -superclass RDFNode
412
413  RDFResource instproc print {} {
414    set t [my array names rdfTypes]
415    if {$t eq ""} {return [next]} else {return "[next]\nTYPES: $t"}
416  }
417
418
419  ##############################################################################
420  #
421  # superclasses for container node classes (alt seq bag)
422  #
423  ##############################################################################
424  Class RDFContainerNodeClass -superclass RDFResource
425
426  RDFContainerNodeClass instproc init args {
427    # cache the member number
428    # 0 inidicates, there is currently no member
429    next
430
431    my set memberNr 0
432    my set ID [my makeID]
433    my appendRDFType [my qualifyWithRdfNsPrefix \
434			  [[my info class] set content]]
435  }
436
437  RDFContainerNodeClass instproc parseAttributes {name attrList} {
438    #set index 0
439    foreach {n v} $attrList {
440      if {[set an [my isAttribute $n]] ne ""} {
441	my set attributes($n) $v
442	if {$an eq "ID"} {	
443	  my set subject $v
444	  my set ID [[my set parser] set baseURL]\#$v
445	}
446      }
447      #set attrList [my nextAttrNode $an attrList index]
448    }
449  }
450
451  RDFContainerNodeClass instproc giveMemberNr {member} {
452    set pf [my getContentPrefix]
453    if {$pf ne ""} {append pf ":"}
454    $member set memberIndex "${pf}_[my incr memberNr]"
455  }
456
457  RDFContainerNodeClass instproc parseStart {name attrList} {
458    set r [self]
459    next
460    if {[set node [my isNestingNode $name]] ne ""} {
461      set r [my getNestingNode $node $name $attrList]
462      if {[namespace tail [$r info class]] eq "RDFMember"} {
463	my giveMemberNr $r
464      }
465    } else {
466      set r [my getPropertyNodeChild $name $attrList]
467    }
468    return $r
469  }
470
471  ##############################################################################
472  #
473  # Concrete Factory for creating RDF-style nodes
474  #
475  ##############################################################################
476  Class RDFNodeClassFactory -superclass XMLNodeClassFactory
477  RDFNodeClassFactory instproc content content {
478    my set content $content
479  }
480  RDFNodeClassFactory instproc attributeList attributeList {
481    my set attributeList $attributeList
482  }
483  RDFNodeClassFactory instproc nestingTo nestingTo {
484    set name [string trimleft [self] :]
485    foreach cl $nestingTo {
486      $cl set nestingList([my set content]) $name
487    }
488  }
489
490  RDFNodeClassFactory proc create args {
491    # create the class
492    set name [next]
493    switch -exact $name {
494      RDFDescription - RDFProperty - RDFMember {
495	my array set attributeList {}
496      }
497      RDFMember - RDFProperty {
498	my array set nestingList {}
499      }
500    }
501  }
502  ##########################################################################
503  #
504  # now create a factory and build all the node classes
505  # needed for the RDF Parser/Interpreter
506  #
507  ##########################################################################
508  RDFNodeClassFactory proc createFactories {} {
509    foreach {name superclasses content attributeList} {
510      RDFTag 	  RDFNode		        RDF     {}
511      RDFBag 	  RDFContainerNodeClass 	Bag     {ID}
512      RDFSeq 	  RDFContainerNodeClass 	Seq     {ID}
513      RDFAlt 	  RDFContainerNodeClass 	Alt     {ID}
514      RDFProperty RDFNode	    	""      {bagID ID resource parseType}
515      RDFMember   RDFProperty           li      {resource parseType}
516      RDFDescription  RDFResource	Description {ID bagID about type aboutEach aboutEachPrefix}
517    } {
518      #puts "Create class: $name -superclass $superclasses"
519      RDFNodeClassFactory create $name -superclass $superclasses \
520	  -content $content \
521	  -attributeList $attributeList
522    }
523  }
524  RDFNodeClassFactory createFactories
525
526  #
527  # define nesting constraints
528  #
529  RDFTag nestingTo {}
530  RDFBag nestingTo {RDFTag RDFProperty}
531  RDFSeq nestingTo {RDFTag RDFProperty}
532  RDFAlt nestingTo {RDFTag RDFProperty}
533  RDFMember nestingTo {RDFContainerNodeClass RDFBag RDFSeq RDFAlt}
534  RDFProperty nestingTo {}
535  RDFDescription nestingTo {RDFTag RDFMember RDFProperty}
536
537  ##############################################################################
538  #
539  # add some methods to the property node class
540  #
541  ##############################################################################
542
543  RDFProperty instproc parseAttributes {name attrList} {
544    set r [self]
545    #set index 0
546    foreach {n v} $attrList {
547      if {[my checkForXmlNS $n $v]} {continue}
548      if {[set an [my isAttribute $n]] ne ""} {
549	my set attributes($n) $v
550	if {$an eq "parseType"} {my handleParseType $v}
551      } else {
552	if {![info exists abbrvPropResource]} {
553	  set abbrvPropResource \
554	      [my getNestingNode RDFDescription \
555		   [my qualifyWithRdfNsPrefix Description] {}]
556	}
557	$abbrvPropResource propertyAttribute $n $v
558      }
559      #set attrList [my nextAttrNode $an attrList index]
560    }
561
562    if {[info exists abbrvPropResource]} {
563      # if resource attribute is given -> use it for abbr property 
564      # description as about attr  
565      if {[my exists attributes(resource)]} {
566	set about [my set attributes(resource)]
567	my unset attributes(resource)
568      }
569      if  {[my exists attributes([set resource [my qualifyWithRdfNsPrefix resource]])]} {
570	set about [my set attributes($resource)]
571	my unset attributes($resource)
572      }
573      if {[info exists about]} {
574	$abbrvPropResource set attributes(about) $about
575	$abbrvPropResource set subject $about
576      }
577    }
578  }
579  RDFProperty instproc parseStart {name attrList} {
580    if {[my isParseLiteral]} {return [self]}
581    next
582    return [my parseNestedChild $name $attrList]
583  }
584
585  ##############################################################################
586  #
587  # add methods to the member class
588  #
589  ##############################################################################
590
591  RDFMember parameter {
592    memberIndex
593  }
594
595  RDFMember instproc parseAttributes {name attrList} {
596    #set index 0
597    foreach {n v} $attrList {
598      if {[set an [my isAttribute $n]] ne ""} {
599	my set attributes($n) $v
600	if {$an eq "parseType"} {my handleParseType $v}
601      }
602      #set attrList [my nextAttrNode $an attrList index]
603    }
604  }
605
606  RDFMember instproc print {} {
607    return "[next]\nMEMBER-INDEX: [my set memberIndex]"
608  }
609
610  ##############################################################################
611  #
612  # add methods to the description node class
613  #
614  ##############################################################################
615
616  RDFDescription instproc init {args} {
617    next
618    set ID [my makeID]
619    my set subject $ID
620    my set bagID $ID
621  }
622
623  RDFDescription instproc parseAttributes {name attrList} {
624    set r [self]
625
626    # if the parent is a property with an ID -> use it
627    # as description subject
628    set ID [my qualifyWithRdfNsPrefix ID]
629    set parent [my info parent]
630    if {[$parent exists attributes(ID)]} {
631      my set subject [$parent set attributes(ID)]
632    } elseif {[$parent exists attributes($ID)]} {
633      my set subject [$parent set attributes($ID)]
634    }
635
636    foreach {n v} $attrList {
637      if {[my checkForXmlNS $n $v]} {continue}
638      if {[set an [my isAttribute $n]] ne ""} {
639	my set attributes($n) $v
640	switch -exact $an {
641	  about -
642	  ID -
643	  aboutEach -
644	  aboutEachPrefix {
645	    my set subject $v
646	  }
647	  bagID {
648	    my set bagID [[my set parser] set baseURL]\#$v
649	  }
650	  type {
651	    my appendRDFType $v
652	  }
653	}
654      } else {
655	set r [my propertyAttribute $n $v]
656      }
657    }
658    return $r
659  }
660
661  RDFDescription instproc parseStart {name attrList} {
662    next
663    return [my getPropertyNodeChild $name $attrList]
664  }
665
666  ##############################################################################
667  #
668  # add some methods to the <RDF> node class
669  #
670  ##############################################################################
671
672  RDFTag parameter {{startTagOn 0}}
673
674  RDFTag instproc match {c} {
675    # the prefix of the topnode determines initially how the RDF 
676    # namespace is named ... since several examples don't have a 
677    # namespace definition for this ns, we set here a default, which
678    # may be overridden by ns definitions in the XML text
679    if {[regexp {^([^:]*):(.*)} $c _ pre c]} {
680      my makeIndividualNSEntry $pre [[my set parser] rdfNamespace]
681      #puts stderr "Making RDF namespace entry for <$pre>"
682    }
683    #puts "Match for $c --- Content: [[my info class] set content]"
684    expr {$c eq [[my info class] set content]}
685  }
686
687  RDFTag instproc parseStart {name attrList} {
688    set parsed 0
689    if {[set node [my isNestingNode $name]] ne ""} {
690      set r [my getNestingNode $node $name $attrList]
691    } else {
692      set r [my getTypedNode $name $attrList]
693    }
694    next
695    return $r
696  }
697
698  RDFTag instproc parseEnd content {
699    if {!([my startTagOn] && [my match $content])} {
700      [my errorChild $content]
701    }
702    next
703    self ;# return [self]
704  }
705
706  ##############################################################################
707  #
708  # RDF Factory for creating node objects
709  #
710  ##############################################################################
711  Class RDFNodeFactory -superclass XMLNodeFactory
712  RDFNodeFactory create rdfNodeFactory -sharedNodes {RDFDescription RDFTag}
713
714
715  ##############################################################################
716  #
717  # RDF parser class used to access the xml parser and produce the
718  # rdf node tree
719  #
720  ##############################################################################
721  Class RDFParser -superclass XMLParser -parameter {
722    {baseURL "rdfdoc"}
723    {rdfNamespace "http://www.w3.org/1999/02/22-rdf-syntax-ns#"}
724  }
725
726  RDFParser instproc init args {
727    my mixin add RDFParserParseTypeHandling
728
729    ### this special parser handles rdf:RDF tags
730    my topLevelHandlerPattern {^([^:]*):RDF|RDF} RDFTag
731
732    next
733    my set nodeFactory "rdfNodeFactory"
734  }
735
736  RDFParser instproc makeID {} {
737    my autoname [my baseURL]\#id
738  }
739
740  RDFParser instproc reset {} {
741    next
742    set id [my baseURL]\#id
743    my autoname -reset $id
744  }
745
746  RDFParser instproc createTopLevelNode {name attrList} {
747    set tn [next]
748    #$tn makeIndividualNSEntry xmlns [my set rdfNamespace]
749    ### toplevel node must be of type RDFTag
750    if {![$tn istype RDFTag]} {
751      error "Top level node must be of type RDFTag"
752    }
753    if {[$tn match $name]} {
754      $tn set content $name
755      $tn startTagOn 1
756
757      ### use default values for rdf/default (xmlns) namespace
758      #my makeIndividualNSEntry rdfs "http://www.w3.org/TR/1999/PR-rdf-schema-19990303#"
759
760      foreach {n v} $attrList {
761	if {[$tn checkForXmlNS $n $v]} {continue}
762      }
763    }
764    return $tn
765  }
766
767  #RDFParser instproc parse data {
768  #  next
769  #}
770
771  namespace export RDFNodeParseTypeHandling RDFParserParseTypeHandling \
772      RDFNode RDFResource RDFContainerNodeClass RDFNodeClassFactory \
773      RDFNodeFactory RDFParser rdfNodeFactory \
774      RDFTag RDFBag RDFSeq RDFAlt RDFProperty  RDFMember RDFDescription
775}
776
777namespace import ::xotcl::rdf::parser::*
778