1#! /bin/sh 2# -*- tcl -*- \ 3exec tclsh "$0" ${1+"$@"} 4 5# Extract and report oscon schedule 6 7package require struct 8package require csv 9package require report 10package require htmlparse 11package require textutil 12package require log 13 14# Restrict logging to levels 'info' and higher. 15log::lvSuppressLE debug 16 17# 1. CSV structure filled by the parser = main data table 18# ---------------------------------------------------- 19# Day Time/Start Time/End Track Tower Room Speaker Title 20# 21# Matrices: "dmain" and "dmainr" 22# 23# Difference: dmainr contains gratituous newlines in the 24# speaker column which make for a better TXT report (less 25# wide). 26# 27# This is also report 'main'. 28# 29# 2. Schedule report to see conflicts, CSV structure 30# ---------------------------------------------- 31# Day Time Location-Columns, one per Room 32# (15min granularity) (Content: Speaker + Topic) 33# 34# Matrices: "sched" and "schedr". Difference as for dmain(r) 35# and the location columns 36# 37# This will be report 'sched'. 38 39proc main {} { 40 global pfx argv 41 42 set pfx [lindex $argv 0] 43 set files [lrange $argv 1 end] 44 45 if {($pfx == {}) || ([llength $files] == 0)} { 46 usage 47 exit -1 48 } 49 50 initialize 51 foreach f $files { 52 log::log info "Scanning \"$f\" ..." 53 parse $f 54 } 55 gen_schedule 56 dump_main 57 dump_schedule 58 postscript 59 return 60} 61 62proc usage {} { 63 global argv0 64 puts "usage: $argv0 prefix file..." 65} 66 67 68proc initialize {} { 69 global rooms tracks 70 ::struct::matrix::matrix dmain ; # data 1 71 ::struct::matrix::matrix dmainr ; # data 1r 72 ::struct::matrix::matrix sched ; # data 2 73 ::struct::matrix::matrix schedr ; # data 2r 74 array set rooms {} 75 array set tracks {} 76 dmain add columns 8 77 dmain add row {Day Start End Track Tower Room Speaker Title} 78 dmainr add columns 8 79 dmainr add row {Day Start End Track Tower Room Speaker Title} 80 return 81} 82 83proc parse {htmlfile} { 84 global rooms tracks 85 86 ::struct::tree::tree t 87 88 log::log info "Reading \"$htmlfile\" ..." 89 set html [read [set fh [open $htmlfile]]] 90 close $fh 91 92 log::log info "Parsing \"$htmlfile\" ..." 93 htmlparse::2tree $html t 94 htmlparse::removeVisualFluff t 95 htmlparse::removeFormDefs t 96 97 log::log info "Extracting information" 98 99 #puts ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 100 # Navigate and extract the information 101 #t walk root -command {print %t %n} 102 #exit 103 104 set base [walk {1 1 0 1 1 0 1 0 1 0}] 105 set day [walkf $base {0 0}] 106 set day [escape [t get $day -key data]] 107 log::log debug "Day = $day" 108 set day [string range $day 0 2] 109 110 # Walk through the sessions of that day. 111 112 set sess [t next $base] 113 while {$sess != {}} { 114 set start [cvtdate [escape [t get [walkf $sess {0 0}] -key data]]] 115 set track [string trim [escape [t get [walkf $sess {1 0}] -key data]]] 116 set loc [escape [t get [walkf $sess {1 1 0}] -key data]] 117 set loc [string trimright $loc "\n\r\t:"] 118 119 log::log debug " $start - $track - $loc" 120 121 # Separate Room/Tower information ... 122 regexp {(.*) in the (.*) Tower} $loc -> room tower 123 set room [string trim $room] 124 set tower [string trim $tower] 125 set rooms($tower/$room) . 126 set tracks($track) . 127 128 set talk [walkf $sess {1 1 3}] 129 while {$talk != {}} { 130 set time [escape [t get $talk -key data]] 131 set talk [t next $talk] 132 set title [escape [t get [walkf $talk {0 0 0}] -key data]] 133 set speaker [escape [t get [walkf $talk {0 2}] -key data]] 134 135 # Now we have everything to fill the main table ... 136 # (After a bit of munging of the strings we got) 137 138 foreach {start end} [split $time -] break 139 set start [cvtdate $start] 140 set end [cvtdate $end] 141 142 regsub -all \r $speaker \n speaker 143 regsub -all \n+ $speaker \n speaker 144 regsub -all " *\n *" $speaker "\n" speaker 145 set speakerc [split $speaker "\n"] 146 set speakerc [join $speakerc ", "] 147 log::log debug " $start - $end - $speakerc - $title" 148 149 #puts >>$speakerc<< 150 #puts >>$speaker<< 151 152 # Day Time/Start Time/End Tower Room Speaker Title 153 dmainr add row [list $day $start $end $track $tower $room $speaker $title] 154 dmain add row [list $day $start $end $track $tower $room $speakerc $title] 155 156 # Forward to next talk 157 catch {set talk [t next $talk]} 158 catch {set talk [t next $talk]} 159 } 160 161 set sess [t next $sess] 162 } 163 164 t destroy 165 return 166} 167 168proc print {t n} { 169 set tp [$t get $n -key type] 170 set d [$t depth $n] 171 set idx "" 172 catch {set idx [$t index $n]} 173 incr d $d 174 incr d $d 175 176 switch -exact -- $tp { 177 a { 178 log::log debug "[textutil::strRepeat " " $d]$idx $tp ([$t get $n -key data]...)" 179 } 180 PCDATA { 181 log::log debug "[textutil::strRepeat " " $d]$idx $tp ([string range [$t get $n -key data] 0 20]...)" 182 } 183 default { 184 log::log debug "[textutil::strRepeat " " $d]$idx $tp" 185 } 186 } 187} 188 189proc walkf {n p} { 190 #log::log info "$n + $p =" 191 foreach idx $p { 192 if {$n == ""} {break} 193 set n [lindex [t children $n] $idx] 194 #log::log info "$idx :- $n" 195 } 196 return $n 197} 198 199proc walk {p} { 200 return [walkf root $p] 201} 202 203proc cvtdate {date} { 204 clock format [clock scan $date] -format "%H:%M" 205} 206 207proc escape {text} { 208 # Special escape for nbsp, convert into space and not the 209 # character specified by the standard. 210 211 regsub -all { } $text { } text 212 htmlparse::mapEscapes $text 213} 214 215 216proc gen_schedule {} { 217 global rooms tracks 218 219 dmain set rect 0 1 [lsort -decreasing -index 0 [lsort -index 1 [dmain get rect 0 1 end end]]] 220 dmainr set rect 0 1 [lsort -decreasing -index 0 [lsort -index 1 [dmainr get rect 0 1 end end]]] 221 222 sched add columns 2 223 schedr add columns 2 224 #sched add columns [array size rooms] 225 #schedr add columns [array size rooms] 226 sched add columns [array size tracks] 227 schedr add columns [array size tracks] 228 229 #log::log info Tracks=[array size tracks] 230 #log::log info Rooms.=[array size rooms] 231 232 set res [list Day Time] 233 set c 2 234 foreach k [lsort [array names tracks]] { 235 lappend res $k 236 set tracks($k) $c 237 incr c 238 } 239 240 sched add row $res 241 schedr add row $res 242 243 # Data in dmain is already sorted by day. By starting time only 244 # partially, there are back references. 245 # Just move them to the correct rooms and rows! 246 247 #-- Day Time Location-Columns, one per Room -- 248 249 set n [dmain rows] 250 set p 0 251 252 array set rmap {} 253 254 for {set r 1} {$r < $n} {incr r} { 255 foreach {day start end track tower room speaker title} [dmain get row $r] break 256 #[list $day $start $end $tower $room $speakerc $title] 257 258 set key $day,$start 259 if {![info exists rmap($key)]} { 260 log::log info "Track schedule $day $start" 261 sched add row 262 schedr add row 263 incr p 264 265 set rmap($key) $p 266 sched set cell 0 $p $day 267 sched set cell 1 $p $start 268 schedr set cell 0 $p $day 269 schedr set cell 1 $p $start 270 } 271 272 sched set cell $tracks($track) $rmap($key) "$tower; $room; $speaker; $title" 273 schedr set cell $tracks($track) $rmap($key) "$tower $room\n$speaker\n$title" 274 } 275 276 # Squeeze the columns 2+ in the report matrix 277 278 set cols [schedr columns] 279 for {set c 2} {$c < $cols} {incr c} { 280 281 if {[schedr columnwidth $c] > 21} { 282 log::log debug "Squeezing $c" 283 set col [schedr get column $c] 284 set res [list] 285 foreach item $col { 286 lappend res [wrap $item 21] 287 } 288 schedr set column $c $res 289 } 290 } 291 292 # Now sort by day (primary key) and starting time (secondary key). 293 # (Meaning we have to sort by time first, and then the day) 294 295 # sched setrect 0 0 [lsort -decreasing -index 0 [lsort -index 1 [sched getrect 0 0 end end]]] 296 # schedr setrect 0 0 [lsort -decreasing -index 0 [lsort -index 1 [schedr getrect 0 0 end end]]] 297 298 return 299} 300 301proc dump_main {} { 302 global pfx 303 log::log info "Writing talk information /CSV" 304 305 set f [open ${pfx}.main.csv w] 306 csv::writematrix dmain $f 307 close $f 308 309 log::log info "Writing talk information /TXT" 310 311 # Compute width of report and squeeze the title column to fit 312 # below 80 char/line 313 314 # Day Time/Start Time/End Track Tower Room Speaker Title 315 316 set total 0 317 incr total [dmain columnwidth 0] 318 incr total [dmain columnwidth 1] 319 incr total [dmain columnwidth 2] 320 incr total [dmain columnwidth 3] 321 incr total [dmain columnwidth 4] 322 incr total [dmain columnwidth 5] 323 incr total [dmain columnwidth 6] 324 325 #log::log info Total=$total 326 327 if {$total < 80} { 328 set total [expr {80 - $total}] 329 set titles [dmain getcolumn 7] 330 set res [list] 331 foreach t $titles { 332 lappend res [textutil::adjust $t -length $total] 333 } 334 dmain setcolumn 7 $res 335 } 336 337 ::report::report r [dmainr columns] style captionedtable 1 338 set f [open ${pfx}.main.txt w] 339 r printmatrix2channel dmainr $f 340 close $f 341 r destroy 342 343 # Now the HTML report, use 'dmain' as base, actually formatting 344 # into lines is done by the browser. 345 346 log::log info "Writing talk information /HTML" 347 348 ::report::report r [dmain columns] style html 349 350 set f [open ${pfx}.main.html w] 351 puts $f "<html><head><title>Talk information and schedule</title></head><body>" 352 puts $f "<h1>Talk information and schedule</h1>" 353 puts $f "<p><table border=1>" 354 r printmatrix2channel dmain $f 355 puts $f "</table></p></body></html>" 356 close $f 357 r destroy 358} 359 360proc dump_schedule {} { 361 global pfx 362 log::log info "Writing track schedule /CSV" 363 364 set f [open ${pfx}.sched.csv w] 365 csv::writematrix sched $f 366 close $f 367 368 log::log info "Writing track schedule /TXT" 369 370 ::report::report r [schedr columns] style captionedtable 1 371 r datasep set [r top get] 372 r datasep enable 373 374 set f [open ${pfx}.sched.txt w] 375 r printmatrix2channel schedr $f 376 close $f 377 r destroy 378 379 # Now the HTML report, use 'sched' as base, actually formatting 380 # into lines is done by the browser. 381 382 log::log info "Writing track schedule /HTML" 383 384 ::report::report r [sched columns] style html 385 386 set f [open ${pfx}.sched.html w] 387 puts $f "<html><head><title>Track schedules</title></head><body>" 388 puts $f "<h1>Track schedules</h1>" 389 puts $f "<p><table border=1>" 390 r printmatrix2channel sched $f 391 puts $f "</table></p></body></html>" 392 close $f 393 r destroy 394} 395 396proc postscript {} { 397 global pfx 398 # Transforms texts into printable postscript, using a2ps (if available) 399 400 catch {exec a2ps -o ${pfx}.main.ps -1 -B -r -f7 ${pfx}.main.txt} 401 catch {exec a2ps -o ${pfx}.sched.ps -1 -B -r -f4 ${pfx}.sched.txt} 402 return 403} 404 405proc wrap {text len} { 406 # @author Jeffrey Hobbs <jeff at hobbs org> 407 # 408 # @c Wraps the given <a text> into multiple lines not 409 # @c exceeding <a len> characters each. Lines shorter 410 # @c than <a len> characters might get filled up. 411 # 412 # @a text: The string to operate on. 413 # @a len: The maximum allowed length of a single line. 414 # 415 # @r Basically <a text>, but with changed newlines to 416 # @r restrict the length of individual lines to at most 417 # @r <a len> characters. 418 419 # @n This procedure is not checked by the testsuite. 420 421 # @i wrap, word wrap 422 423 # Convert all newlines into spaces and initialize the result 424 # see ::pool::string::oneLine too. 425 426 regsub -all "\n" $text { } text 427 incr len -1 428 429 set out {} 430 431 # As long as the string is longer than the intended length of 432 # lines in the result: 433 434 while {[string len $text] > $len} { 435 # - Find position of last space in the part of the text 436 # which could a line in the result. 437 438 # - We jump out of the loop if there is none and the whole 439 # text does not contain spaces anymore. In the latter case 440 # the rest of the text is one word longer than an intended 441 # line, we cannot avoid the longer line. 442 443 set i [string last { } [string range $text 0 $len]] 444 445 if {$i == -1 && [set i [string first { } $text]] == -1} { 446 break 447 } 448 449 # Get the just fitting part of the text, remove any heading 450 # and trailing spaces, then append it to the result string, 451 # don't close it with a newline! 452 453 append out [string trim [string range $text 0 [incr i -1]]]\n 454 455 # Shorten the text by the length of the processed part and 456 # the space used to split it, then iterate. 457 458 set text [string range $text [incr i 2] end] 459 } 460 461 return $out$text 462} 463 464# ------------------------------------------- 465# Define the required reports styles 466 467::report::defstyle simpletable {} { 468 data set [split "[string repeat "| " [columns]]|"] 469 top set [split "[string repeat "+ - " [columns]]+"] 470 bottom set [top get] 471 top enable 472 bottom enable 473} 474::report::defstyle captionedtable {{n 1}} { 475 simpletable 476 topdata set [data get] 477 topcapsep set [top get] 478 topcapsep enable 479 tcaption $n 480} 481::report::defstyle html {} { 482 set c [columns] 483 set cl $c ; incr cl -1 484 data set "<tr> [split [string repeat " " $cl] ""] </tr>" 485 for {set col 0} {$col < $c} {incr col} { 486 pad $col left "<td>" 487 pad $col right "</td>" 488 } 489 return 490} 491 492# ------------------------------------------- 493 494main 495exit 496