1# $Id: xoRDF.xotcl,v 1.4 2005/09/09 21:09:01 neumann Exp $ 2package provide xotcl::rdf::parser 1.0 3 4package require XOTcl 5package require xotcl::xml::parser 6#package require xotcl::pattern::link 7package require xotcl::trace 8 9namespace eval ::xotcl::rdf::parser { 10 namespace import ::xotcl::* 11 12 ############################################################################## 13 # 14 # RDF Parse Type Handling for RDF Node Class and RDF Parser class 15 # to be used as mixin. Here, we have decomposed the parse type handling 16 # 17 ############################################################################## 18 19 # 20 # Nodes just call "isParseLiteral", "isParseResource", and "handleParseType" 21 # by their template methods -> mixins concretizes implementation 22 # 23 Class RDFNodeParseTypeHandling 24 25 # 26 # parseType=literal nodes are not parsed, but handled as literals 27 # -> the XML parser should parse these nodes -> we have cut them off 28 # if we encounter "parseType = literal" nextParsedLiterals searches the 29 # parseLiterals array and returns the content 30 # 31 RDFNodeParseTypeHandling instproc nextParsedLiterals {} { 32 set parser [my set parser] 33 $parser set parseLiterals([$parser incr parseLiteralsCount]) 34 } 35 36 # 37 # handle attributes that determine the parse type 38 # 39 RDFNodeParseTypeHandling instproc handleParseType value { 40 if {$value eq "Resource"} { 41 my set parseResource 1 42 } else { 43 # with RDF 1.0 all values other than Resource are treated 44 # as parseType = literal 45 my set pcdata [list "" [my nextParsedLiterals]] 46 my set parseLiteral 1 47 } 48 } 49 50 # 51 # two convinience methods that tell us whether the parse type is literal/resource 52 # 53 RDFNodeParseTypeHandling instproc isParseLiteral {} { 54 # 55 # if the parse literal var is set -> one child 56 # is of type ParseTypeLiteral ! 57 # 58 my exists parseLiteral 59 } 60 RDFNodeParseTypeHandling instproc isParseResource {} { 61 # 62 # if the parseResource var is set -> one child 63 # is of type ParseTypeResource ! 64 # 65 my exists parseResource 66 } 67 68 # 69 # and we overload the Parser's parse method in order to cut off 70 # all parseType = "Literal", because we have to hinder the XML 71 # parser to parse RDF text that is marked as parseType = literal 72 # we store the result in an array "parseLiterals" that is used 73 # by the RDFNodeParseTypeHandling Mixin 74 # 75 Class RDFParserParseTypeHandling 76 RDFParserParseTypeHandling instproc parse data { 77 my array set parseLiterals {} 78 my set parseLiteralsCount 0 79 set count 0 80 81 set dt $data 82 83 while {[set pt [string first "parseType" $dt]] != -1} { 84 # we cut the string off manually, because a regexp is slower 85 if {$::tcl_version > 8.0} { 86 set last [string first "=" $dt $pt] 87 } else { 88 set last [string first "=" [string range $dt $pt end]] 89 incr last $pt 90 } 91 set ptStart [expr {[string last "<" [string range $dt 0 $pt]] + 1}] 92 set propName [string range $dt $ptStart $pt] 93 set blank [string first " " $propName] 94 if {$blank != -1} { 95 set propName [string range $propName 0 [expr {$blank -1}]] 96 } 97 set dt [string range $dt $last end] 98 # All parse types != Resource treated as literals 99 if {![regexp {^= *[\"']Resource} $dt]} { 100 regexp -indices ">" $dt idx 101 set start [lindex $idx 1] 102 if {[regexp -indices "</$propName>" $dt idx]} { 103 set endTagLeft [lindex $idx 0] 104 set literal [string range $dt [expr {$start + 1}] [expr {$endTagLeft - 1}]] 105 set dt [string range $dt $endTagLeft end] 106 my set parseLiterals([incr count]) $literal 107 } else { 108 error "end tag for $propName missing" 109 } 110 } 111 } 112 next $data 113 } 114 115 ############################################################################## 116 # 117 # RDFNode Node Class 118 # 119 ############################################################################## 120 121 Class RDFNode -superclass XMLNode -parameter { 122 subject 123 {rdfNSPrefix ""} 124 } 125 @ Class RDFNode -superclass XMLNode { 126 description { 127 general superclass for RDF nodes 128 common properties 129 } 130 } 131 132 # 133 # add mixins for parse type handling 134 # 135 RDFNode instproc init args { 136 next 137 my mixin add RDFNodeParseTypeHandling 138 set p [my info parent] 139 if {[$p exists rdfNSPrefix]} { 140 my set rdfNSPrefix [$p set rdfNSPrefix] 141 #puts stderr "RDF Prefix defined in [self]->init to [$p set rdfNSPrefix]" 142 } 143 } 144 145 RDFNode instproc parseData {text} { 146 if {[my isParseLiteral]} {return} 147 next 148 } 149 150 # 151 # try to find the "subject" of the RDF statement -> 152 # if it not found on the actual node search the parents 153 # 154 # per default subject is ""; subclasses add subjects, 155 # when they encounter ID, about, ... attrs 156 # 157 RDFNode instproc getSubject {} { 158 for {set o [self]} {![$o istype RDFTag]} {set o [$o info parent]} { 159 if {[$o exists subject]} {return [$o set subject]} 160 } 161 return "" 162 } 163 164 165 # 166 # lets the parser construct an unique ID in the parser 167 # 168 RDFNode instproc makeID {} { 169 [my set parser] makeID 170 } 171 172 # 173 # abstract methods that have to be concretized with parse type handling 174 # by a parse type mixin (or in subclass) 175 # 176 RDFNode abstract instproc isParseLiteral {} 177 RDFNode abstract instproc isParseResource {} 178 RDFNode abstract instproc handleParseType value 179 180 RDFNode instproc appendRDFType t { 181 set t [[my resolveNS] getFullName $t] 182 my set rdfTypes($t) 1 183 } 184 185 # 186 # get a typed node abbreviation -> convert it to 187 # a description + a nested rdf:type property 188 # 189 RDFNode instproc getTypedNode {name attrList} { 190 set r [my getNestingNode RDFDescription \ 191 [my qualifyWithRdfNsPrefix Description] $attrList] 192 $r appendRDFType $name 193 set r 194 } 195 196 # 197 # try to parse children corresponding to parse type or if none is given 198 # try to parse a child of type obj -> Description or Container 199 # 200 RDFNode instproc parseNestedChild {name attrList} { 201 if {[my isParseResource]} { 202 if {![my exists resourceDescription]} { 203 my set resourceDescription \ 204 [my getNestingNode RDFDescription \ 205 [my qualifyWithRdfNsPrefix Description] {}] 206 # we have resolved parseType="resource" with a description 207 # -> remove parse type attribute info ... it is not correct anymore, 208 # but remember parseResource flag 209 if {[my exists attributes(parseType)]} { 210 my unset attributes(parseType) 211 } 212 if {[my exists attributes([set parseType [my qualifyWithRdfNsPrefix parseType]])]} { 213 my unset attributes($parseType) 214 } 215 } 216 217 set r [[my set resourceDescription] getPropertyNodeChild $name $attrList] 218 } elseif {[my isParseLiteral]} { 219 set r [self] 220 # literal -> do nothing 221 } else { 222 if {[set node [my isNestingNode $name]] ne ""} { 223 set r [my getNestingNode $node $name $attrList] 224 } else { 225 set r [my getTypedNode $name $attrList] 226 } 227 } 228 return $r 229 } 230 231 # 232 # step forward in the attrList 233 # 234 RDFNode instproc nextAttrNode {node attrList index} { 235 upvar [self callinglevel] $index i $attrList a 236 if {$node ne ""} { 237 set a [lreplace $a $i [expr {$i + 1}]] 238 } else { 239 incr i 2 240 } 241 } 242 243 # 244 # create a child node of Property type and return it 245 # 246 # don't build a node for "type" properties, but append them to 247 # the list 248 # 249 RDFNode instproc getPropertyNodeChild {name attrList} { 250 regexp "^[my set rdfNSPrefix]:(.*)" $name _ name 251 set parser [my set parser] 252 if {$name eq "type" && [my istype RDFResource]} { 253 # seek for resource attribute and append type to list 254 set rp [my prependRDFPrefix resource] 255 set rdfns [$parser set rdfNamespace] 256 foreach {n v} $attrList { 257 if {![my istype RDFContainerNodeClass]} { 258 if {$n eq $rp || $n eq "resource"} { 259 foreach c {Bag Alt Seq} { 260 if {$v eq "$rdfns$c"} { 261 my class RDF$c 262 my set memberNr 0 263 my set ID [my set bagID] 264 my unset bagID 265 my set content [my prependRDFPrefix $c] 266 # reclass existing li props to member 267 set li [my prependRDFPrefix li] 268 foreach child [lsort [my info children]] { 269 if {[namespace tail [$child info class]] eq "RDFProperty"} { 270 if {[$child set content] eq $li || 271 [$child set content] eq "li"} { 272 $child class RDFMember 273 my giveMemberNr $child 274 $child set content $li 275 } 276 } 277 } 278 } 279 } 280 } 281 } 282 my appendRDFType $v 283 } 284 return [self] 285 } else { 286 set nf [$parser set nodeFactory] 287 set r [$nf getNode RDFProperty [self]::[my nextChild prop] $parser] 288 $r set content $name 289 $r parseAttributes $name $attrList 290 set r 291 } 292 } 293 294 # 295 # property in abbr syntax (as attribute) 296 # 297 RDFNode instproc propertyAttribute {n v} { 298 set r [my getPropertyNodeChild $n ""] 299 $r parseData $v 300 set r 301 } 302 303 # 304 # check whether an attribute name matches an attributed RDFNode 305 # of this class or not 306 # return the corresponding node class 307 # 308 RDFNode instproc isAttribute {n} { 309 regexp "^[my set rdfNSPrefix]:(.*)" $n _ n 310 if {[lsearch [[my info class] set attributeList] $n] != -1} { 311 return $n 312 } elseif {$n eq "xml:lang"} { 313 # we create attribute for xml_lang (for recreation purposes) 314 return $n 315 } 316 return "" 317 } 318 319 # 320 # check if name matches an node class that may be nested in [self] 321 # 322 RDFNode instproc isNestingNode {n} { 323 regexp "^[my set rdfNSPrefix]:(.*)" $n _ n 324 set cl [my info class] 325 if {[$cl exists nestingList($n)]} { 326 return [$cl set nestingList($n)] 327 } 328 return "" 329 } 330 331 RDFNode instproc getNestingNode {node name attrList} { 332 set parser [my set parser] 333 set nf [$parser set nodeFactory] 334 switch [namespace tail $node] { 335 "RDFMember" - "RDFProperty" {set objName prop} 336 default {set objName res} 337 } 338 set r [$nf getNode $node [self]::[my nextChild $objName] $parser] 339 $r set content $name 340 $r parseAttributes $name $attrList 341 set r 342 } 343 344 # 345 # check whether the RDF namespace is redefined to another prefix 346 # 347 RDFNode instproc makeIndividualNSEntry {prefix entry} { 348 if {$entry eq [[my set parser] rdfNamespace]} { 349 if {[my set rdfNSPrefix] eq "" || $prefix ne "xmlns"} { 350 my set rdfNSPrefix $prefix 351 } 352 #puts stderr "RDF Prefix redefined in [self] to $prefix" 353 } 354 next 355 } 356 357 RDFNode instproc qualifyWithRdfNsPrefix t { 358 set ns [my set rdfNSPrefix] 359 if {$ns eq "xmlns"} {return $t} 360 return $ns:$t 361 } 362 363 # 364 # checks whether a given attribute is part of the attributes array 365 # and returns the varname, otherwise "" 366 # 367 RDFNode instproc getAttribute {n nsFullName} { 368 set ns [my resolveNS] 369 set xmlns [$ns searchPrefix xmlns] 370 if {$xmlns eq $nsFullName && [my exists attributes($n)]} { 371 return attributes($n) 372 } 373 set prefix [$ns searchFullName $nsFullName] 374 if {$prefix ne "" && 375 [my exists attributes($prefix:$n)]} { 376 return attributes($prefix:$n) 377 } 378 return "" 379 } 380 381 # 382 # searches for attribute "n" with rdf namespace prefix 383 # 384 RDFNode instproc getRDFAttribute {n} { 385 if {[my exists attributes($n)]} { 386 return [my set attributes($n)] 387 } 388 set rdfNSPrefix [my set rdfNSPrefix] 389 if {$rdfNSPrefix ne "xmlns"} { 390 set n $rdfNSPrefix:$n 391 if {[my exists attributes($n)]} { 392 return [my set attributes($n)] 393 } 394 } 395 return "" 396 } 397 398 RDFNode instproc prependRDFPrefix ts { 399 set rdfNSPrefix [my set rdfNSPrefix] 400 if {$rdfNSPrefix ne "xmlns"} {set ts $rdfNSPrefix:$ts} 401 return $ts 402 } 403 404 ############################################################################## 405 # 406 # superclass for all resources (like Description, Alt, Seq, Beg) 407 # used directly in the parse tree ... resource nodes are mixed in 408 # 409 ############################################################################## 410 411 Class RDFResource -superclass RDFNode 412 413 RDFResource instproc print {} { 414 set t [my array names rdfTypes] 415 if {$t eq ""} {return [next]} else {return "[next]\nTYPES: $t"} 416 } 417 418 419 ############################################################################## 420 # 421 # superclasses for container node classes (alt seq bag) 422 # 423 ############################################################################## 424 Class RDFContainerNodeClass -superclass RDFResource 425 426 RDFContainerNodeClass instproc init args { 427 # cache the member number 428 # 0 inidicates, there is currently no member 429 next 430 431 my set memberNr 0 432 my set ID [my makeID] 433 my appendRDFType [my qualifyWithRdfNsPrefix \ 434 [[my info class] set content]] 435 } 436 437 RDFContainerNodeClass instproc parseAttributes {name attrList} { 438 #set index 0 439 foreach {n v} $attrList { 440 if {[set an [my isAttribute $n]] ne ""} { 441 my set attributes($n) $v 442 if {$an eq "ID"} { 443 my set subject $v 444 my set ID [[my set parser] set baseURL]\#$v 445 } 446 } 447 #set attrList [my nextAttrNode $an attrList index] 448 } 449 } 450 451 RDFContainerNodeClass instproc giveMemberNr {member} { 452 set pf [my getContentPrefix] 453 if {$pf ne ""} {append pf ":"} 454 $member set memberIndex "${pf}_[my incr memberNr]" 455 } 456 457 RDFContainerNodeClass instproc parseStart {name attrList} { 458 set r [self] 459 next 460 if {[set node [my isNestingNode $name]] ne ""} { 461 set r [my getNestingNode $node $name $attrList] 462 if {[namespace tail [$r info class]] eq "RDFMember"} { 463 my giveMemberNr $r 464 } 465 } else { 466 set r [my getPropertyNodeChild $name $attrList] 467 } 468 return $r 469 } 470 471 ############################################################################## 472 # 473 # Concrete Factory for creating RDF-style nodes 474 # 475 ############################################################################## 476 Class RDFNodeClassFactory -superclass XMLNodeClassFactory 477 RDFNodeClassFactory instproc content content { 478 my set content $content 479 } 480 RDFNodeClassFactory instproc attributeList attributeList { 481 my set attributeList $attributeList 482 } 483 RDFNodeClassFactory instproc nestingTo nestingTo { 484 set name [string trimleft [self] :] 485 foreach cl $nestingTo { 486 $cl set nestingList([my set content]) $name 487 } 488 } 489 490 RDFNodeClassFactory proc create args { 491 # create the class 492 set name [next] 493 switch -exact $name { 494 RDFDescription - RDFProperty - RDFMember { 495 my array set attributeList {} 496 } 497 RDFMember - RDFProperty { 498 my array set nestingList {} 499 } 500 } 501 } 502 ########################################################################## 503 # 504 # now create a factory and build all the node classes 505 # needed for the RDF Parser/Interpreter 506 # 507 ########################################################################## 508 RDFNodeClassFactory proc createFactories {} { 509 foreach {name superclasses content attributeList} { 510 RDFTag RDFNode RDF {} 511 RDFBag RDFContainerNodeClass Bag {ID} 512 RDFSeq RDFContainerNodeClass Seq {ID} 513 RDFAlt RDFContainerNodeClass Alt {ID} 514 RDFProperty RDFNode "" {bagID ID resource parseType} 515 RDFMember RDFProperty li {resource parseType} 516 RDFDescription RDFResource Description {ID bagID about type aboutEach aboutEachPrefix} 517 } { 518 #puts "Create class: $name -superclass $superclasses" 519 RDFNodeClassFactory create $name -superclass $superclasses \ 520 -content $content \ 521 -attributeList $attributeList 522 } 523 } 524 RDFNodeClassFactory createFactories 525 526 # 527 # define nesting constraints 528 # 529 RDFTag nestingTo {} 530 RDFBag nestingTo {RDFTag RDFProperty} 531 RDFSeq nestingTo {RDFTag RDFProperty} 532 RDFAlt nestingTo {RDFTag RDFProperty} 533 RDFMember nestingTo {RDFContainerNodeClass RDFBag RDFSeq RDFAlt} 534 RDFProperty nestingTo {} 535 RDFDescription nestingTo {RDFTag RDFMember RDFProperty} 536 537 ############################################################################## 538 # 539 # add some methods to the property node class 540 # 541 ############################################################################## 542 543 RDFProperty instproc parseAttributes {name attrList} { 544 set r [self] 545 #set index 0 546 foreach {n v} $attrList { 547 if {[my checkForXmlNS $n $v]} {continue} 548 if {[set an [my isAttribute $n]] ne ""} { 549 my set attributes($n) $v 550 if {$an eq "parseType"} {my handleParseType $v} 551 } else { 552 if {![info exists abbrvPropResource]} { 553 set abbrvPropResource \ 554 [my getNestingNode RDFDescription \ 555 [my qualifyWithRdfNsPrefix Description] {}] 556 } 557 $abbrvPropResource propertyAttribute $n $v 558 } 559 #set attrList [my nextAttrNode $an attrList index] 560 } 561 562 if {[info exists abbrvPropResource]} { 563 # if resource attribute is given -> use it for abbr property 564 # description as about attr 565 if {[my exists attributes(resource)]} { 566 set about [my set attributes(resource)] 567 my unset attributes(resource) 568 } 569 if {[my exists attributes([set resource [my qualifyWithRdfNsPrefix resource]])]} { 570 set about [my set attributes($resource)] 571 my unset attributes($resource) 572 } 573 if {[info exists about]} { 574 $abbrvPropResource set attributes(about) $about 575 $abbrvPropResource set subject $about 576 } 577 } 578 } 579 RDFProperty instproc parseStart {name attrList} { 580 if {[my isParseLiteral]} {return [self]} 581 next 582 return [my parseNestedChild $name $attrList] 583 } 584 585 ############################################################################## 586 # 587 # add methods to the member class 588 # 589 ############################################################################## 590 591 RDFMember parameter { 592 memberIndex 593 } 594 595 RDFMember instproc parseAttributes {name attrList} { 596 #set index 0 597 foreach {n v} $attrList { 598 if {[set an [my isAttribute $n]] ne ""} { 599 my set attributes($n) $v 600 if {$an eq "parseType"} {my handleParseType $v} 601 } 602 #set attrList [my nextAttrNode $an attrList index] 603 } 604 } 605 606 RDFMember instproc print {} { 607 return "[next]\nMEMBER-INDEX: [my set memberIndex]" 608 } 609 610 ############################################################################## 611 # 612 # add methods to the description node class 613 # 614 ############################################################################## 615 616 RDFDescription instproc init {args} { 617 next 618 set ID [my makeID] 619 my set subject $ID 620 my set bagID $ID 621 } 622 623 RDFDescription instproc parseAttributes {name attrList} { 624 set r [self] 625 626 # if the parent is a property with an ID -> use it 627 # as description subject 628 set ID [my qualifyWithRdfNsPrefix ID] 629 set parent [my info parent] 630 if {[$parent exists attributes(ID)]} { 631 my set subject [$parent set attributes(ID)] 632 } elseif {[$parent exists attributes($ID)]} { 633 my set subject [$parent set attributes($ID)] 634 } 635 636 foreach {n v} $attrList { 637 if {[my checkForXmlNS $n $v]} {continue} 638 if {[set an [my isAttribute $n]] ne ""} { 639 my set attributes($n) $v 640 switch -exact $an { 641 about - 642 ID - 643 aboutEach - 644 aboutEachPrefix { 645 my set subject $v 646 } 647 bagID { 648 my set bagID [[my set parser] set baseURL]\#$v 649 } 650 type { 651 my appendRDFType $v 652 } 653 } 654 } else { 655 set r [my propertyAttribute $n $v] 656 } 657 } 658 return $r 659 } 660 661 RDFDescription instproc parseStart {name attrList} { 662 next 663 return [my getPropertyNodeChild $name $attrList] 664 } 665 666 ############################################################################## 667 # 668 # add some methods to the <RDF> node class 669 # 670 ############################################################################## 671 672 RDFTag parameter {{startTagOn 0}} 673 674 RDFTag instproc match {c} { 675 # the prefix of the topnode determines initially how the RDF 676 # namespace is named ... since several examples don't have a 677 # namespace definition for this ns, we set here a default, which 678 # may be overridden by ns definitions in the XML text 679 if {[regexp {^([^:]*):(.*)} $c _ pre c]} { 680 my makeIndividualNSEntry $pre [[my set parser] rdfNamespace] 681 #puts stderr "Making RDF namespace entry for <$pre>" 682 } 683 #puts "Match for $c --- Content: [[my info class] set content]" 684 expr {$c eq [[my info class] set content]} 685 } 686 687 RDFTag instproc parseStart {name attrList} { 688 set parsed 0 689 if {[set node [my isNestingNode $name]] ne ""} { 690 set r [my getNestingNode $node $name $attrList] 691 } else { 692 set r [my getTypedNode $name $attrList] 693 } 694 next 695 return $r 696 } 697 698 RDFTag instproc parseEnd content { 699 if {!([my startTagOn] && [my match $content])} { 700 [my errorChild $content] 701 } 702 next 703 self ;# return [self] 704 } 705 706 ############################################################################## 707 # 708 # RDF Factory for creating node objects 709 # 710 ############################################################################## 711 Class RDFNodeFactory -superclass XMLNodeFactory 712 RDFNodeFactory create rdfNodeFactory -sharedNodes {RDFDescription RDFTag} 713 714 715 ############################################################################## 716 # 717 # RDF parser class used to access the xml parser and produce the 718 # rdf node tree 719 # 720 ############################################################################## 721 Class RDFParser -superclass XMLParser -parameter { 722 {baseURL "rdfdoc"} 723 {rdfNamespace "http://www.w3.org/1999/02/22-rdf-syntax-ns#"} 724 } 725 726 RDFParser instproc init args { 727 my mixin add RDFParserParseTypeHandling 728 729 ### this special parser handles rdf:RDF tags 730 my topLevelHandlerPattern {^([^:]*):RDF|RDF} RDFTag 731 732 next 733 my set nodeFactory "rdfNodeFactory" 734 } 735 736 RDFParser instproc makeID {} { 737 my autoname [my baseURL]\#id 738 } 739 740 RDFParser instproc reset {} { 741 next 742 set id [my baseURL]\#id 743 my autoname -reset $id 744 } 745 746 RDFParser instproc createTopLevelNode {name attrList} { 747 set tn [next] 748 #$tn makeIndividualNSEntry xmlns [my set rdfNamespace] 749 ### toplevel node must be of type RDFTag 750 if {![$tn istype RDFTag]} { 751 error "Top level node must be of type RDFTag" 752 } 753 if {[$tn match $name]} { 754 $tn set content $name 755 $tn startTagOn 1 756 757 ### use default values for rdf/default (xmlns) namespace 758 #my makeIndividualNSEntry rdfs "http://www.w3.org/TR/1999/PR-rdf-schema-19990303#" 759 760 foreach {n v} $attrList { 761 if {[$tn checkForXmlNS $n $v]} {continue} 762 } 763 } 764 return $tn 765 } 766 767 #RDFParser instproc parse data { 768 # next 769 #} 770 771 namespace export RDFNodeParseTypeHandling RDFParserParseTypeHandling \ 772 RDFNode RDFResource RDFContainerNodeClass RDFNodeClassFactory \ 773 RDFNodeFactory RDFParser rdfNodeFactory \ 774 RDFTag RDFBag RDFSeq RDFAlt RDFProperty RDFMember RDFDescription 775} 776 777namespace import ::xotcl::rdf::parser::* 778