1# $Id: RDFTriple.xotcl,v 1.6 2006/09/27 08:12:40 neumann Exp $
2package provide xotcl::rdf::triple 1.0
3
4package require XOTcl
5package require xotcl::rdf::parser
6
7namespace eval ::xotcl::rdf::triple {
8  namespace import ::xotcl::*
9
10  Class RDFTriple -parameter {
11    predicate
12    subject
13    object
14  }
15
16  RDFTriple instproc dump {} {
17    #set o [my object]; if {[info command $o] ne ""} { $o showVars  }
18    #return "P: [my predicate] S: [my subject] O: [my object]\n"
19    return "[my subject] -[my predicate]-> '[my object]'\n"
20  }
21
22  Class NodeInfo -parameter {
23    lastCurrentNode
24    {aboutEach 0}
25    {aboutEachPrefix 0}
26    topID
27    {statements ""}
28  }
29
30  Class DescriptionInfo -superclass NodeInfo -parameter {
31    {bagID 0}
32  }
33
34  Class PropertyInfo -superclass NodeInfo -parameter {
35    {reify 0}
36    generatedParentID
37  }
38
39  Class AboutEachMgr
40
41  AboutEachMgr instproc init args {
42    my array set entries {}
43    next
44  }
45
46  AboutEachMgr instproc reset {} {
47    foreach c [my info children] {$c destroy}
48    my init
49  }
50
51  AboutEachMgr instproc addEntry {name} {
52    my set entries($name) ""
53  }
54
55  AboutEachMgr instproc isEntry {name} {
56    my exists entries($name)
57  }
58
59  AboutEachMgr instproc addTriple {name p s o} {
60    if {[my exists entries($name)]} {
61      set r [RDFTriple create [self]::[my autoname name%08d]]
62      $r set predicate $p
63      $r set subject $s
64      $r set object $o
65      my lappend entries($name) $r
66      return $r
67    }
68    return ""
69  }
70
71  AboutEachMgr instproc getTriples {name} {
72    if {[my exists entries($name)]} {
73      my set entries($name)
74    } else {return ""}
75  }
76
77  Class RDFTripleDB
78  RDFTripleDB instproc add {p s o} {
79    #my showCall
80    set r [RDFTriple create [self]::[my autoname triple%08d]]
81    $r set predicate $p
82    $r set subject $s
83    $r set object $o
84    return $r
85  }
86  RDFTripleDB instproc dump {} {
87    #my showCall
88    set r ""
89    foreach fact [my info children] {append r [$fact dump]}
90    return $r
91  }
92  RDFTripleDB instproc getTriples {} {
93    # for the time being: return only children of type RDFTriple
94    set ch {}
95    foreach c [my info children] {if {[$c istype "RDFTriple"]} {lappend ch $c}}
96    return $ch
97    #my info children
98  }
99  RDFTripleDB instproc reset {} {
100    #my showCall
101    foreach c [my info children] {$c destroy}
102    my autoname -reset triple
103    #my showMsg "children after reset: <[my info children]>'"
104  }
105  # return all triples that match the subject
106  RDFTripleDB instproc querySubject {s} {
107    #my showCall
108    set r ""
109    foreach t [my info children] {
110      if {[string match $s [$t subject]]} {
111	lappend r $t
112      }
113    }
114    return $r
115  }
116
117  RDFTripleDB instproc queryPredicate {p} {
118    #my showCall
119    set r ""
120    foreach t [my info children] {
121      if {[string match $p [$t predicate]]} {
122	lappend r $t
123      }
124    }
125    return $r
126  }
127
128  RDFTripleDB instproc queryPredicateOnSubject {p s} {
129    #my showCall
130    foreach t [my querySubject $s] {
131      if {[string match $p [$t predicate]]} {
132	# there may be only one matching P on a S
133	# return the triple
134	return $t
135      }
136    }
137    return ""
138  }
139  RDFTripleDB instproc prettyTriples {} {
140    my instvar result
141    if {[my exists table]} {my unset table}
142    if {[my exists subjectPrinted]} {my unset subjectPrinted}
143    set result ""
144
145    foreach triple [lsort [my getTriples]] {
146      set subject [$triple set subject]
147      set predicate [$triple set predicate]
148      set object [$triple set object]
149
150      regexp {^http.*w3[.]org.*(\#.*)$} $predicate _ predicate
151      regexp {^http.*w3[.]org.*(\#.*)$} $object _ object
152      my lappend table($subject) $predicate $object
153    }
154    foreach subject [lsort [my array names table]] {
155      if {![regexp {^rdfdoc\#} $subject]} { my prettyStatements "" $subject }
156    }
157    set r $result; set result ""
158    foreach subject [lsort [my array names table]] {
159      if {![my exists subjectPrinted($subject)]} { 
160	my prettyStatements "" $subject 
161      }
162    }
163    if {$result ne ""} {
164      append r "\n=================== unreferenced:\n$result"
165      
166    }
167    return $r
168  }
169  RDFTripleDB instproc prettyStatement {space subject predicate object} {
170    my append result "$space   [format %-35s $subject] [format %-25s $predicate] $object\n"
171  }
172  RDFTripleDB instproc prettyStatements {space subject} {
173    if {![my exists table($subject)]} {
174      my append result "$space NO VALUE FOR $subject\n"
175    } else {
176      if {![my exists subjectPrinted($subject)]} {
177	my set subjectPrinted($subject) 1
178	foreach {predicate object} [my set table($subject)] {
179	  my prettyStatement $space $subject $predicate $object
180	  if {[regexp {^rdfdoc\#} $object]} {
181	    my prettyStatements "$space  " $object
182	  }
183	}
184      }
185    }
186  }
187
188
189  Class TripleVisitor -superclass NodeTreeVisitor -parameter {
190    {descriptionAsBag 0}
191    {currentNode ""}
192    parser
193    rdfNS
194  }
195
196  TripleVisitor instproc getInfo {} {
197    my set openNode([my set currentNode])
198  }
199
200  TripleVisitor instproc getLastInfo {info} {
201    my set openNode([$info set lastCurrentNode])
202  }
203
204  TripleVisitor instproc popInfo {objName} {
205    set i [my getInfo]
206    my set currentNode [$i set lastCurrentNode]
207    my unset openNode($objName)
208    return $i
209  }
210
211  TripleVisitor instproc pushInfo {objName ei} {
212    set lce [$ei set lastCurrentNode [my set currentNode]]
213    if {$lce ne ""} {
214      set lastInfo [my set openNode($lce)]
215      $ei aboutEach [$lastInfo aboutEach]
216      $ei aboutEachPrefix [$lastInfo aboutEachPrefix]
217    }
218    my set openNode($objName) $ei
219    my set currentNode $objName
220  }
221
222  TripleVisitor instproc qualify {obj var} {
223    [$obj resolveNS] getFullName $var
224  }
225
226  TripleVisitor instproc init args {
227    my array set openNode {{} {}}
228    RDFTripleDB create [self]::db
229    AboutEachMgr create [self]::aboutEach
230    AboutEachMgr create [self]::aboutEachPrefix
231    next
232  }
233
234  TripleVisitor instproc resetWithoutDB args {
235    [self]::aboutEach reset
236    [self]::aboutEachPrefix reset
237    next
238  }
239
240  TripleVisitor instproc reset args {
241    [self]::db reset
242    my resetWithoutDB
243    next
244  }
245
246  TripleVisitor instproc addDB {p s o} {
247    #puts "ADDDB: P<$p> S<$s> O<$o>"
248    set info [my getInfo]
249    if {$info ne ""} {
250      set topID [$info set topID]
251      if {[$info aboutEach]} {
252	return [[self]::aboutEach addTriple $topID $p $s $o]
253      } elseif {[$info aboutEachPrefix]} {
254	return [[self]::aboutEachPrefix addTriple $topID $p $s $o]
255      }
256    }
257    return [[self]::db add $p $s $o]
258  }
259
260  TripleVisitor instproc checkReification {triple node} {
261    # for statements that nest inside a description/property, we remember
262    # the statement to be able to reify them
263    # (e.g., bag created for description)
264    if {$triple ne "" && $node ne ""} {
265      set info [my set openNode($node)]
266      if {[my isobject $info] && [$info istype NodeInfo]} {
267	${info} lappend statements $triple
268      }
269    }
270  }
271
272  TripleVisitor instproc qualifyWithBaseURL v {
273    if {[string match "\#*" $v]} {
274      return [[my set parser] baseURL]$v
275    }
276    return $v
277  }
278
279  TripleVisitor instproc RDFTag {objName} {
280    set ns [$objName resolveNS]
281    set rdfNS [$ns searchNamespaceByPrefix rdf]
282    if {$rdfNS eq ""} {
283      set rdfNS [$ns searchNamespaceByPrefix xmlns]
284    }
285    my set rdfNS $rdfNS
286  }
287  TripleVisitor instproc DescriptionNode objName {
288    set di [DescriptionInfo create [self]::[my autoname di]]
289    $di topID [my qualifyWithBaseURL [$objName getSubject]]
290    my pushInfo $objName $di
291    #
292    # if a description nests inside a Member, we need a triple
293    # for the member index (connected to the Description topId)
294    #
295    if {[namespace tail [[set member [$objName info parent]] info class]] \
296	    == "RDFMember"} {
297      set bag_ID [[$member info parent] set ID]
298      my addDB [my qualify $objName [$member set memberIndex]] \
299	  $bag_ID [$di set topID] 
300    }
301  }
302
303  TripleVisitor instproc handlePCData {objName pcdata} {
304    set info [my getInfo]
305
306    if {[set lcn [$info set lastCurrentNode]] == ""} {
307      #puts stderr "cannot determine lastCurrentNode from $info"
308      #$info showVars
309      set selector ""
310    } else {
311      set selector [namespace tail [$lcn info class]]
312    }
313    
314    switch -exact $selector {
315      RDFDescription {
316	set triple [my addDB \
317			[my qualify $objName [$objName set content]] \
318			[$info set topID] $pcdata]
319	my checkReification $triple $lcn
320      }
321      RDFProperty {
322	if {[set rAttr [$lcn getRDFAttribute resource]] != ""} {
323	  set triple [my addDB \
324			  [my qualify $objName [$objName set content]] \
325			  [$lcn set $rAttr] $pcdata]
326	  #$lcn showVars
327	} else {
328	  set lastInfo [my getLastInfo $info]
329	  if {[$lastInfo exists generatedParentID]} {
330	    set parentID [$lastInfo set generatedParentID]
331	  } else {
332	    set parentID [[$objName info parent] set ID]
333	  }
334	  #set parentID [$lastInfo set generatedParentID]
335	  set triple [my addDB \
336			  [my qualify $objName [$objName set content]] \
337			  $parentID $pcdata]
338	}
339      }
340      default {
341	#puts stderr "create a generatedParentID for reification"
342	$info set generatedParentID [[my set parser] makeID]
343	set triple [my addDB \
344			[my qualify $objName [$objName set content]] \
345			[$info set generatedParentID] $pcdata]
346	my checkReification $triple [my set currentNode]
347      }
348    }
349    $info set tripleWritten 1
350  }
351
352  TripleVisitor instproc Property objName {
353    set info [PropertyInfo create [self]::[my autoname pi]]
354    ## if we find no subject and are in Container ->
355    ## reifiy over generatedParentID
356    set propSubject [$objName getSubject]
357
358    $info topID [my qualifyWithBaseURL $propSubject]
359    my pushInfo $objName $info
360    
361    if {[$objName exists pcdata]} {
362      my handlePCData $objName [$objName getFirstPCData]
363    } 
364  }
365
366  TripleVisitor instproc ContainerNode objName {
367    set ID [my qualifyWithBaseURL [$objName set ID]]
368    foreach t [$objName array names rdfTypes] {
369      my addDB [my qualify $objName \
370		    [$objName qualifyWithRdfNsPrefix type]] $ID $t
371    }
372  }
373
374  TripleVisitor instproc Member objName {
375    set container [$objName info parent]
376    set resource [$objName qualifyWithRdfNsPrefix resource]
377    set parseType [$objName qualifyWithRdfNsPrefix parseType]
378    if {[$objName exists pcdata]} {
379      set co [$objName getFirstPCData]
380    } elseif {[$objName exists attributes(resource)]} {
381      set co [$objName set attributes(resource)]
382    } elseif {[$objName exists attributes($resource)]} {
383      set co [$objName set attributes($resource)]
384    }
385    #puts stderr "CONTAINER = [info exists co]"
386    if {[info exists co]} {
387      my addDB \
388	  [my qualify $container [$objName set memberIndex]] \
389	  [$container set ID] $co
390    } else {
391      #$objName showVars
392    }
393  }
394
395  TripleVisitor instproc visit objName {
396    set cl [namespace tail [$objName info class]]
397    $objName instvar attributes
398    set triple ""
399
400    #puts "********Visit $objName -- $cl"
401
402    switch -exact $cl {
403      RDFTag 		{my RDFTag $objName}
404      RDFDescription 	{my DescriptionNode $objName}
405      RDFProperty 	{my Property $objName}
406      RDFBag - RDFSeq - RDFAlt {my ContainerNode $objName}
407      RDFMember 	{my Member $objName}
408    }
409
410    foreach a [array names attributes] {
411      regexp "^([$objName set rdfNSPrefix]:|)(.*)" $a _ __ an
412      switch -exact $an {
413	bagID {
414	  set info [my getInfo]
415	  $info set bagID 1
416	}
417	aboutEach {
418	  set info [my getInfo]
419	  if {[DescriptionInfo info instances $info] eq ""} {
420	    error "AboutEach not in description"
421	  }
422	  $info aboutEach 1
423	  [self]::aboutEach addEntry [my qualifyWithBaseURL [$objName getSubject]]
424	}
425	aboutEachPrefix {
426	  set info [my getInfo]
427	  if {[DescriptionInfo info instances $info] eq ""} {
428	    error "AboutEachPrefix not in description"
429	  }
430	  $info aboutEachPrefix 1
431	  [self]::aboutEachPrefix addEntry [my qualifyWithBaseURL [$objName getSubject]]
432	}
433	resource {
434	  if {$cl eq "RDFProperty"} {
435	    my handlePCData $objName [set attributes($a)]
436	  }
437	}
438      }
439    }
440  }
441
442  TripleVisitor instproc reificate {objName p s o} {
443    set memberID [[my set parser] makeID]
444    my addDB [my qualify $objName \
445		  [$objName qualifyWithRdfNsPrefix predicate]] $memberID $p
446    my addDB [my qualify $objName \
447		  [$objName qualifyWithRdfNsPrefix subject]] $memberID $s
448    my addDB [my qualify $objName \
449		  [$objName qualifyWithRdfNsPrefix object]] $memberID $o
450    my addDB [my qualify $objName \
451		  [$objName qualifyWithRdfNsPrefix type]] $memberID \
452	[my qualify $objName [$objName qualifyWithRdfNsPrefix Statement]]
453    return $memberID
454  }
455
456  TripleVisitor instproc visitEnd objName {
457    switch -exact [namespace tail [$objName info class]] {
458      RDFDescription {
459	set di [my popInfo $objName]
460	if {[my descriptionAsBag] || [$di set bagID]} {
461	  set bagID [$objName set bagID]
462	  my addDB [my qualify $objName [$objName qualifyWithRdfNsPrefix type]] \
463	      $bagID [my qualify $objName [$objName qualifyWithRdfNsPrefix Bag]]
464	  
465	  set bagCount 0
466	  
467	  foreach s [$di set statements] {
468	    set memberID [my reificate $objName \
469			      [$s set predicate] [$s set subject] [$s set object]]
470	    my addDB [my qualify $objName \
471			  [$objName qualifyWithRdfNsPrefix _[incr bagCount]]] \
472		$bagID $memberID
473	  }
474	}
475	foreach t [$objName array names rdfTypes] {
476	  my addDB [my qualify $objName [$objName qualifyWithRdfNsPrefix "type"]] \
477	      [$objName getSubject] $t
478	}
479	$di destroy
480      }
481      RDFProperty {
482	set info [my popInfo $objName]
483	if {![$info exists tripleWritten]} {
484	  set triple ""
485	  foreach fc [$objName info children] {
486	    switch -exact [namespace tail [$fc info class]] {
487	      RDFDescription {
488		set triple [my addDB \
489				[my qualify $objName [$objName set content]] \
490				[my qualifyWithBaseURL [$objName getSubject]] [$fc getSubject]]
491		break
492	      }
493	      RDFBag - RDFSeq - RDFAlt {
494		set triple [my addDB \
495				[my qualify $objName [$objName set content]] \
496				[my qualifyWithBaseURL [$objName getSubject]] [$fc set ID]]
497		break
498	      }
499	    }
500	  }
501	  if {$triple ne ""} {
502	    my checkReification $triple [my set currentNode]
503	  }
504	}
505	$info destroy
506      }
507    }
508  }
509
510  TripleVisitor instproc evaluateAboutEach {} {
511    set triplesWritten ""
512    set rdfNSFullName [[my rdfNS] searchPrefix rdf]
513
514    foreach entry [[self]::aboutEach array names entries] {
515      # matching entry triples should be bag types and their
516      # members -> duplication of aboutEach statements for the
517      # members
518      foreach entryTriple [lsort [[self]::db querySubject $entry]] {
519	if {[regexp "^${rdfNSFullName}_\[0-9\]*$" [$entryTriple predicate]]} {
520	  foreach t [[self]::aboutEach getTriples $entry] {
521	    set subject [$t subject]
522	    # if this is a toplevel elt of an about each tree -> its
523	    # subject is the object of the container member
524	    if {$subject eq $entry} {
525	      [self]::db add [$t predicate] [$entryTriple object] [$t object]
526	    } elseif {[lsearch $triplesWritten $t] == -1} {
527	      [self]::db add [$t predicate] $subject [$t object]
528	      lappend triplesWritten $t
529	    }
530	  }
531	}
532      }
533    }
534  }
535
536  TripleVisitor instproc interpretNodeTree {node} {
537    my set parser [$node set parser]
538    $node accept [self]
539    my evaluateAboutEach
540  }
541
542  namespace export RDFTriple NodeInfo DescriptionInfo PropertyInfo \
543      AboutEachMgr RDFTripleDB TripleVisitor
544}
545namespace import ::xotcl::rdf::triple::*
546