1#!/usr/bin/perl -w 2 3use strict; 4 5use XML::LibXML; 6use IO::File; 7 8# ------------------------------------------------------------------------- # 9# (c) 2003 christian p. glahn 10# ------------------------------------------------------------------------- # 11 12# ------------------------------------------------------------------------- # 13# This is an example how to use the DOM interface of XML::LibXML The 14# script reads a XML File with a module specification. If the module 15# contains several classes, the script fetches them and stores the 16# data into different POD Files. 17# 18# Note this is just an example, to demonstrate how XML::LibXML works. 19# The code works for the XML::LibXML documentation, but may not work 20# for any other docbook file. 21# 22# If you are interested what the results are, check the README and the POD 23# files shipped with XML::LibXML. 24# ------------------------------------------------------------------------- # 25 26# ------------------------------------------------------------------------- # 27# SYNOPSIS: 28# xmllibxmldocs.pl $dokbook_file $targetdir 29# 30my $srcfile = shift @ARGV; 31my $targetdir = shift @ARGV; 32 33unless ( $targetdir =~ /\/$/ ) { 34 $targetdir .= "/"; 35} 36 37# ------------------------------------------------------------------------- # 38# 39# ------------------------------------------------------------------------- # 40# init the parser 41my $parser = XML::LibXML->new(); 42$parser->load_ext_dtd(0); 43# ------------------------------------------------------------------------- # 44# 45# ------------------------------------------------------------------------- # 46# load the document into memory. 47my $doc = $parser->parse_file( $srcfile ); 48# ------------------------------------------------------------------------- # 49# 50# ------------------------------------------------------------------------- # 51# good implementations would use XSLT to convert a docbook to anyother 52# text format. Since the module does not presume libxslt installed, we 53# have to do the dirty job. 54my $ch = ChapterHandler->new($targetdir); 55 56# ------------------------------------------------------------------------- # 57# init the common parts in all pods 58my ( $bookinfo ) = $doc->findnodes( "//bookinfo" ); 59$ch->set_general_info( $bookinfo ); 60# ------------------------------------------------------------------------- # 61 62# ------------------------------------------------------------------------- # 63# then process each chapter of the XML::LibXML book 64my @chapters = $doc->findnodes( "//chapter" ); 65foreach my $chap ( @chapters ) { 66 $ch->handle( $chap ); 67} 68# ------------------------------------------------------------------------- # 69# ------------------------------------------------------------------------- # 70 71# ------------------------------------------------------------------------- # 72# the class to process our dokbook file 73# ------------------------------------------------------------------------- # 74package ChapterHandler; 75 76# ------------------------------------------------------------------------- # 77# the constructor 78# ------------------------------------------------------------------------- # 79sub new{ 80 my $class = shift; 81 my $dir = shift; 82 my $self = bless {directory => $dir}, $class; 83 84 return $self; 85} 86# ------------------------------------------------------------------------- # 87 88# ------------------------------------------------------------------------- # 89# set_general_info 90# ------------------------------------------------------------------------- # 91# processes the bookinfo tag of XML::LibXML to extract common information such 92# as version or copyright information 93sub set_general_info { 94 my $self = shift; 95 my $infonode = shift; 96 return unless defined $infonode; 97 98 my $infostr = "=head1 AUTHORS\n\n"; 99 my @authors = $infonode->findnodes( "authorgroup/author" ); 100 foreach my $author ( @authors ) { 101 my ( $node_fn ) = $author->getChildrenByTagName( "firstname" ); 102 my ( $node_sn ) = $author->getChildrenByTagName( "surname" ); 103 if ( defined $node_fn ) { 104 $infostr .= $node_fn->string_value(); 105 } 106 if ( defined $node_sn ) { 107 $infostr .= " ". $node_sn->string_value(); 108 } 109 if ( defined $author->nextSibling() ) { 110 $infostr .= ", \n"; 111 } 112 else { 113 $infostr .= "\n\n"; 114 } 115 } 116 117 my ( $version ) = $infonode->findnodes( "edition" ); 118 if ( defined $version ) { 119 $infostr .= "\n=head1 VERSION\n\n" . $version->string_value() . "\n\n"; 120 } 121 122 my ( $copyright ) = $infonode->findnodes( "copyright" ); 123 if ( defined $copyright ) { 124 $infostr .= "=head1 COPYRIGHT\n\n"; 125 my $node_y = $copyright->getChildrenByTagName( "year" ); 126 my $node_h = $copyright->getChildrenByTagName( "holder" ); 127 if ( defined $node_y ) { 128 $infostr .= $node_y->string_value() . ", "; 129 } 130 if ( defined $node_h ) { 131 $infostr .= $node_h->string_value(); 132 } 133 $infostr .= ", All rights reserved.\n\n=cut\n" 134 } 135 136 $self->{infoblock} = $infostr; 137} 138 139# ------------------------------------------------------------------------- # 140# handle 141# ------------------------------------------------------------------------- # 142# This function opens the output file and decides how the chapter is 143# processed 144sub handle { 145 my $self = shift; 146 my $chapter = shift; 147 148 my ( $abbr ) = $chapter->findnodes( "titleabbrev" ); 149 if ( defined $abbr ) { 150 # create a new file. 151 my $filename = $abbr->string_value(); 152 $filename =~ s/^\s*|\s*$//g; 153 my $dir = $self->{directory}; 154 155 $filename =~ s/XML\:\:LibXML//g; 156 $filename =~ s/^-|^\:\://g; # remove the first colon or minus. 157 $filename =~ s/\:\:/\//g; # transform remaining colons to paths. 158 # the previous statement should work for existing modules. This could be 159 # dangerous for nested modules, which do not exist at the time of writing 160 # this code. 161 162 unless ( length $filename ) { 163 $dir = ""; 164 $filename = "LibXML"; 165 } 166 167 if ( $filename ne "README" and $filename ne "LICENSE" ) { 168 $filename .= ".pod"; 169 } 170 else { 171 $dir = ""; 172 } 173 174 $self->{OFILE} = IO::File->new(); 175 $self->{OFILE}->open(">".$dir.$filename); 176 177 if ( $abbr->string_value() eq "README" 178 or $abbr->string_value() eq "LICENSE" ) { 179 180 # Text only chapters in the documentation 181 $self->dump_text( $chapter ); 182 } 183 else { 184 # print header 185 # print synopsis 186 # process the information itself 187 # dump the info block 188 $self->dump_pod( $chapter ); 189 $self->{OFILE}->print( $self->{infoblock} ); 190 } 191 # close the file 192 $self->{OFILE}->close(); 193 } 194} 195 196# ------------------------------------------------------------------------- # 197# dump_text 198# ------------------------------------------------------------------------- # 199# convert the chapter into a textfile, such as README. 200sub dump_text { 201 my $self = shift; 202 my $chap = shift; 203 204 if ( $chap->nodeName() eq "chapter" ) { 205 my ( $title ) = $chap->getChildrenByTagName( "title" ); 206 my $str = $title->string_value(); 207 my $len = length $str; 208 $self->{OFILE}->print( uc($str) . "\n" ); 209 $self->{OFILE}->print( "=" x $len ); 210 $self->{OFILE}->print( "\n\n" ); 211 } 212 213 foreach my $node ( $chap->childNodes() ) { 214 if ( $node->nodeName() eq "para" ) { 215 # we split at the last whitespace before 80 chars 216 my $string = $node->string_value(); 217 $string =~ s/^\s*|\s*$//g; 218 219 my $os = ""; 220 my @words = split /\s+/, $string; 221 foreach my $word ( @words ) { 222 if ( (length( $os ) + length( $word ) + 1) < 80 ) { 223 if ( length $os ) { $os .= " "; } 224 $os .= $word; 225 } 226 else { 227 $self->{OFILE}->print( $os . "\n" ); 228 $os = $word; 229 } 230 } 231 $self->{OFILE}->print( $os ); 232 $self->{OFILE}->print( "\n\n" ); 233 } 234 elsif ( $node->nodeName() eq "sect1" ) { 235 my ( $title ) = $node->getChildrenByTagName( "title" ); 236 my $str = $title->string_value(); 237 my $len = length $str; 238 239 $self->{OFILE}->print( "\n" . uc($str) . "\n" ); 240 $self->{OFILE}->print( "=" x $len ); 241 $self->{OFILE}->print( "\n\n" ); 242 $self->dump_text( $node ); 243 } 244 elsif ( $node->nodeName() eq "sect2" ) { 245 my ( $title ) = $node->getChildrenByTagName( "title" ); 246 my $str = $title->string_value(); 247 my $len = length $str; 248 249 $self->{OFILE}->print( "\n" . $str . "\n" ); 250 $self->{OFILE}->print( "=" x $len ); 251 $self->{OFILE}->print( "\n\n" ); 252 $self->dump_text( $node ); 253 } 254 elsif ( $node->nodeName() eq "itemizedlist" ) { 255 my @items = $node->findnodes( "listitem" ); 256 my $sp= " "; 257 foreach my $item ( @items ) { 258 $self->{OFILE}->print( "$sp o " ); 259 my $str = $item->string_value(); 260 $str =~ s/^\s*|\s*$//g; 261 $self->{OFILE}->print( $str ); 262 $self->{OFILE}->print( "\n" ); 263 } 264 $self->{OFILE}->print( "\n" ); 265 } 266 elsif ( $node->nodeName() eq "orderedlist" ) { 267 my @items = $node->findnodes( "listitem" ); 268 my $i = 0; 269 my $sp= " "; 270 foreach my $item ( @items ) { 271 $i++; 272 $self->{OFILE}->print( "$sp $i " ); 273 my $str = $item->string_value(); 274 $str =~ s/^\s*|\s*$//g; 275 $self->{OFILE}->print( $str ); 276 $self->{OFILE}->print( "\n" ); 277 } 278 $self->{OFILE}->print( "\n" ); 279 } 280 elsif ( $node->nodeName() eq "programlisting" ) { 281 my $str = $node->string_value(); 282 $str =~ s/\n/\n> /g; 283 $self->{OFILE}->print( "> ". $str ); 284 $self->{OFILE}->print( "\n\n" ); 285 } 286 } 287} 288 289# ------------------------------------------------------------------------- # 290# dump_pod 291# ------------------------------------------------------------------------- # 292# This method is used to create the real POD files for XML::LibXML. It is not 293# too sophisticated, but it already does quite a good job. 294sub dump_pod { 295 my $self = shift; 296 my $chap = shift; 297 298 if ( $chap->nodeName() eq "chapter" ) { 299 my ( $title ) = $chap->getChildrenByTagName( "title" ); 300 my ( $ttlabbr ) = $chap->getChildrenByTagName( "titleabbrev" ); 301 my $str = $ttlabbr->string_value() . " - ".$title->string_value(); 302 $self->{OFILE}->print( "=head1 NAME\n\n$str\n\n" ); 303 my ($synopsis) = $chap->findnodes( "sect1[title='Synopsis']" ); 304 my @funcs = $chap->findnodes( ".//funcsynopsis" ); 305 if ($synopsis or scalar @funcs) { 306 $self->{OFILE}->print( "=head1 SYNOPSIS\n\n" ) 307 } 308 if ($synopsis) { 309 $self->dump_pod( $synopsis ); 310 } 311 if ( scalar @funcs ) { 312 foreach my $s ( @funcs ) { 313 $self->dump_pod( $s ); 314 } 315 $self->{OFILE}->print( "\n\n=head1 DESCRIPTION\n\n" ); 316 } 317 } 318 319 foreach my $node ( $chap->childNodes() ) { 320 if ( $node->nodeName() eq "para" ) { 321 # we split at the last whitespace before 80 chars 322 my $string = $node->string_value(); 323 $string =~ s/^\s*|\s*$//g; 324 325 my $os = ""; 326 my @words = split /\s+/, $string; 327 foreach my $word ( @words ) { 328 if ( (length( $os ) + length( $word ) + 1) < 80 ) { 329 if ( length $os ) { $os .= " "; } 330 $os .= $word; 331 } 332 else { 333 $self->{OFILE}->print( $os . "\n" ); 334 $os = $word; 335 } 336 } 337 $self->{OFILE}->print( $os ); 338 $self->{OFILE}->print( "\n\n" ); 339 } 340 elsif ( $node->nodeName() eq "sect1" ) { 341 my ( $title ) = $node->getChildrenByTagName( "title" ); 342 my $str = $title->string_value(); 343 unless ($chap->nodeName eq "chapter" and $str eq 'Synopsis') { 344 $self->{OFILE}->print( "\n=head1 " . uc($str) ); 345 $self->{OFILE}->print( "\n\n" ); 346 $self->dump_pod( $node ); 347 } 348 } 349 elsif ( $node->nodeName() eq "sect2" ) { 350 my ( $title ) = $node->getChildrenByTagName( "title" ); 351 my $str = $title->string_value(); 352 my $len = length $str; 353 354 $self->{OFILE}->print( "\n=head2 " . $str . "\n\n" ); 355 356 $self->dump_pod( $node ); 357 } 358 elsif ( $node->nodeName() eq "itemizedlist" ) { 359 my @items = $node->findnodes( "listitem" ); 360 my $sp= " "; 361 $self->{OFILE}->print( "\n=over 4\n\n" ); 362 foreach my $item ( @items ) { 363 $self->{OFILE}->print( "=item *\n\n" ); 364 $self->dump_pod( $item ); 365 $self->{OFILE}->print( "\n\n" ); 366 } 367 $self->{OFILE}->print( "=back\n\n" ); 368 } 369 elsif ( $node->nodeName() eq "orderedlist" ) { 370 my @items = $node->findnodes( "listitem" ); 371 my $i = 0; 372 my $sp= " "; 373 374 $self->{OFILE}->print( "=over 4\n\n" ); 375 376 foreach my $item ( @items ) { 377 $i++; 378 $self->{OFILE}->print( "=item $i " ); 379 my $str = $item->string_value(); 380 $str =~ s/^\s*|\s*$//g; 381 $self->{OFILE}->print( $str ); 382 $self->{OFILE}->print( "\n\n" ); 383 } 384 $self->{OFILE}->print( "=back\n\n" ); 385 } 386 elsif ( $node->nodeName() eq "variablelist" ) { 387 $self->{OFILE}->print( "=over 4\n\n" ); 388 my @nodes = $node->findnodes( "varlistentry" ); 389 $self->dump_pod( $node ); 390 $self->{OFILE}->print( "\n=back\n\n" ); 391 } 392 elsif ( $node->nodeName() eq "varlistentry" ) { 393 my ( $term ) = $node->findnodes( "term" ); 394 $self->{OFILE}->print( "=item " ); 395 if ( defined $term ) { 396 $self->{OFILE}->print( "B<".$term->string_value().">" ); 397 } 398 $self->{OFILE}->print( "\n\n" ); 399 my @nodes =$node->findnodes( "listitem" ); 400 foreach my $it ( @nodes ) { 401 $self->dump_pod( $it ); 402 } 403 $self->{OFILE}->print( "\n" ); 404 } 405 elsif ( $node->nodeName() eq "programlisting" ) { 406 my $str = $node->string_value(); 407 $str =~ s/\n/\n /g; 408 $self->{OFILE}->print( " ". $str ); 409 $self->{OFILE}->print( "\n\n" ); 410 } 411 elsif ( $node->nodeName() eq "funcsynopsis" ) { 412 $self->dump_pod($node); 413 $self->{OFILE}->print( "\n" ); 414 } 415 elsif( $node->nodeName() eq "funcsynopsisinfo" ) { 416 my $str = $node->string_value() ; 417 $str =~ s/\n/\n /g; 418 $self->{OFILE}->print( " $str\n" ); 419 420 } 421 } 422} 423 4241; 425