1#! /bin/sh
2# -*- tcl -*- \
3exec tclsh "$0" ${1+"$@"}
4
5# Extract and report oscon schedule
6
7package require struct
8package require csv
9package require report
10package require htmlparse
11package require textutil
12package require log
13
14# Restrict logging to levels 'info' and higher.
15log::lvSuppressLE debug
16
17# 1. CSV structure filled by the parser = main data table
18#    ----------------------------------------------------
19#    Day Time/Start Time/End Track Tower Room Speaker Title
20#
21#    Matrices: "dmain" and "dmainr"
22#
23#    Difference: dmainr contains gratituous newlines in the
24#    speaker column which make for a better TXT report (less
25#    wide).
26#
27#    This is also report 'main'.
28#
29# 2. Schedule report to see conflicts, CSV structure
30#    ----------------------------------------------
31#    Day Time                Location-Columns, one per Room
32#        (15min granularity) (Content: Speaker + Topic)
33#
34#    Matrices: "sched" and "schedr". Difference as for dmain(r)
35#	and the location columns
36#
37#    This will be report 'sched'.
38
39proc main {} {
40    global pfx argv
41
42    set pfx   [lindex $argv 0]
43    set files [lrange $argv 1 end]
44
45    if {($pfx == {}) || ([llength $files] == 0)} {
46	usage
47	exit -1
48    }
49
50    initialize
51    foreach f $files {
52	log::log info "Scanning \"$f\" ..."
53	parse $f
54    }
55    gen_schedule
56    dump_main
57    dump_schedule
58    postscript
59    return
60}
61
62proc usage {} {
63    global argv0
64    puts "usage: $argv0 prefix file..."
65}
66
67
68proc initialize {} {
69    global rooms tracks
70    ::struct::matrix::matrix dmain  ; # data 1
71    ::struct::matrix::matrix dmainr ; # data 1r
72    ::struct::matrix::matrix sched  ; # data 2
73    ::struct::matrix::matrix schedr ; # data 2r
74    array set rooms  {}
75    array set tracks {}
76    dmain  add columns 8
77    dmain  add row {Day Start End Track Tower Room Speaker Title}
78    dmainr add columns 8
79    dmainr add row {Day Start End Track Tower Room Speaker Title}
80    return
81}
82
83proc parse {htmlfile} {
84    global rooms tracks
85
86    ::struct::tree::tree t
87
88    log::log info "Reading \"$htmlfile\" ..."
89    set html [read [set fh [open $htmlfile]]]
90    close $fh
91
92    log::log info "Parsing \"$htmlfile\" ..."
93    htmlparse::2tree $html t
94    htmlparse::removeVisualFluff t
95    htmlparse::removeFormDefs t
96
97    log::log info "Extracting information"
98
99    #puts ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
100    # Navigate and extract the information
101    #t walk root -command {print %t %n}
102    #exit
103
104    set base [walk {1 1 0 1 1 0 1 0 1 0}]
105    set day  [walkf $base {0 0}]
106    set day  [escape [t get $day -key data]]
107    log::log debug "Day = $day"
108    set day [string range $day 0 2]
109
110    # Walk through the sessions of that day.
111
112    set sess [t next $base]
113    while {$sess != {}} {
114	set start [cvtdate [escape [t get [walkf $sess {0 0}] -key data]]]
115	set track [string trim [escape [t get [walkf $sess {1 0}] -key data]]]
116	set loc   [escape [t get [walkf $sess {1 1 0}] -key data]]
117	set loc   [string trimright $loc "\n\r\t:"]
118
119	log::log debug "    $start - $track - $loc"
120
121	# Separate Room/Tower information ...
122	regexp {(.*) in the (.*) Tower} $loc -> room tower
123	set room  [string trim $room]
124	set tower [string trim $tower]
125	set rooms($tower/$room) .
126	set tracks($track) .
127
128	set talk [walkf $sess {1 1 3}]
129	while {$talk != {}} {
130	    set time    [escape [t get $talk -key data]]
131	    set talk    [t next $talk]
132	    set title   [escape [t get [walkf $talk {0 0 0}] -key data]]
133	    set speaker [escape [t get [walkf $talk {0 2}]   -key data]]
134
135	    # Now we have everything to fill the main table ...
136	    # (After a bit of munging of the strings we got)
137
138	    foreach {start end} [split $time -] break
139	    set start [cvtdate $start]
140	    set end   [cvtdate $end]
141
142	    regsub -all \r  $speaker \n speaker
143	    regsub -all \n+ $speaker \n speaker
144	    regsub -all " *\n *" $speaker "\n" speaker
145	    set speakerc [split $speaker "\n"]
146	    set speakerc [join $speakerc ", "]
147	    log::log debug "        $start - $end - $speakerc - $title"
148
149	    #puts >>$speakerc<<
150	    #puts >>$speaker<<
151
152	    #                Day Time/Start Time/End Tower Room Speaker Title
153	    dmainr add row [list $day $start $end $track $tower $room $speaker  $title]
154	    dmain  add row [list $day $start $end $track $tower $room $speakerc $title]
155
156	    # Forward to next talk
157	    catch {set talk [t next $talk]}
158	    catch {set talk [t next $talk]}
159	}
160
161	set sess [t next $sess]
162    }
163
164    t destroy
165    return
166}
167
168proc print {t n} {
169    set  tp  [$t get $n -key type]
170    set  d   [$t depth $n]
171    set idx ""
172    catch {set  idx [$t index $n]}
173    incr d  $d
174    incr d  $d
175
176    switch -exact -- $tp {
177        a {
178            log::log debug "[textutil::strRepeat " " $d]$idx $tp ([$t get $n -key data]...)"
179        }
180        PCDATA {
181            log::log debug "[textutil::strRepeat " " $d]$idx $tp ([string range [$t get $n -key data] 0 20]...)"
182        }
183        default {
184            log::log debug "[textutil::strRepeat " " $d]$idx $tp"
185        }
186    }
187}
188
189proc walkf {n p} {
190    #log::log info "$n + $p ="
191    foreach idx $p {
192        if {$n == ""} {break}
193        set n [lindex [t children $n] $idx]
194        #log::log info "$idx :- $n"
195    }
196    return $n
197}
198
199proc walk {p} {
200    return [walkf root $p]
201}
202
203proc cvtdate {date} {
204    clock format [clock scan $date] -format "%H:%M"
205}
206
207proc escape {text} {
208    # Special escape for nbsp, convert into space and not the
209    # character specified by the standard.
210
211    regsub -all {&nbsp;} $text { } text
212    htmlparse::mapEscapes $text
213}
214
215
216proc gen_schedule {} {
217    global rooms tracks
218
219    dmain  set rect 0 1 [lsort -decreasing -index 0 [lsort -index 1 [dmain  get rect 0 1 end end]]]
220    dmainr set rect 0 1 [lsort -decreasing -index 0 [lsort -index 1 [dmainr get rect 0 1 end end]]]
221
222    sched  add columns 2
223    schedr add columns 2
224    #sched  add columns [array size rooms]
225    #schedr add columns [array size rooms]
226    sched  add columns [array size tracks]
227    schedr add columns [array size tracks]
228
229    #log::log info Tracks=[array size tracks]
230    #log::log info Rooms.=[array size rooms]
231
232    set res [list Day Time]
233    set c 2
234    foreach k [lsort [array names tracks]] {
235	lappend res $k
236	set tracks($k) $c
237	incr c
238    }
239
240    sched  add row $res
241    schedr add row $res
242
243    # Data in dmain is already sorted by day. By starting time only
244    # partially, there are back references.
245    # Just move them to the correct rooms and rows!
246
247    #-- Day Time Location-Columns, one per Room --
248
249    set n [dmain rows]
250    set p 0
251
252    array set rmap {}
253
254    for {set r 1} {$r < $n} {incr r} {
255	foreach {day start end track tower room speaker title} [dmain get row $r] break
256	#[list $day $start $end $tower $room $speakerc $title]
257
258	set key $day,$start
259	if {![info exists rmap($key)]} {
260	    log::log info "Track schedule $day $start"
261	    sched  add row
262	    schedr add row
263	    incr p
264
265	    set rmap($key) $p
266	    sched  set cell 0 $p $day
267	    sched  set cell 1 $p $start
268	    schedr set cell 0 $p $day
269	    schedr set cell 1 $p $start
270	}
271
272	sched  set cell $tracks($track) $rmap($key) "$tower; $room; $speaker; $title"
273	schedr set cell $tracks($track) $rmap($key) "$tower $room\n$speaker\n$title"
274    }
275
276    # Squeeze the columns 2+ in the report matrix
277
278    set cols [schedr columns]
279    for {set c 2} {$c < $cols} {incr c} {
280
281	if {[schedr columnwidth $c] > 21} {
282	    log::log debug "Squeezing $c"
283	    set col [schedr get column $c]
284	    set res [list]
285	    foreach item $col {
286		lappend res [wrap $item 21]
287	    }
288	    schedr set column $c $res
289	}
290    }
291
292    # Now sort by day (primary key) and starting time (secondary key).
293    # (Meaning we have to sort by time first, and then the day)
294
295    # sched  setrect 0 0 [lsort -decreasing -index 0 [lsort -index 1 [sched  getrect 0 0 end end]]]
296    # schedr setrect 0 0 [lsort -decreasing -index 0 [lsort -index 1 [schedr getrect 0 0 end end]]]
297
298    return
299}
300
301proc dump_main {} {
302    global pfx
303    log::log info "Writing talk information /CSV"
304
305    set f [open ${pfx}.main.csv w]
306    csv::writematrix dmain $f
307    close $f
308
309    log::log info "Writing talk information /TXT"
310
311    # Compute width of report and squeeze the title column to fit
312    # below 80 char/line
313
314    # Day Time/Start Time/End Track Tower Room Speaker Title
315
316    set total 0
317    incr total [dmain columnwidth 0]
318    incr total [dmain columnwidth 1]
319    incr total [dmain columnwidth 2]
320    incr total [dmain columnwidth 3]
321    incr total [dmain columnwidth 4]
322    incr total [dmain columnwidth 5]
323    incr total [dmain columnwidth 6]
324
325    #log::log info Total=$total
326
327    if {$total < 80} {
328	set total [expr {80 - $total}]
329	set titles [dmain getcolumn 7]
330	set res [list]
331	foreach t $titles {
332	    lappend res [textutil::adjust $t -length $total]
333	}
334	dmain setcolumn 7 $res
335    }
336
337    ::report::report r [dmainr columns] style captionedtable 1
338    set f [open ${pfx}.main.txt w]
339    r printmatrix2channel dmainr $f
340    close $f
341    r destroy
342
343    # Now the HTML report, use 'dmain' as base, actually formatting
344    # into lines is done by the browser.
345
346    log::log info "Writing talk information /HTML"
347
348    ::report::report r [dmain columns] style html
349
350    set f [open ${pfx}.main.html w]
351    puts $f "<html><head><title>Talk information and schedule</title></head><body>"
352    puts $f "<h1>Talk information and schedule</h1>"
353    puts $f "<p><table border=1>"
354    r printmatrix2channel dmain $f
355    puts $f "</table></p></body></html>"
356    close $f
357    r destroy
358}
359
360proc dump_schedule {} {
361    global pfx
362    log::log info "Writing track schedule /CSV"
363
364    set f [open ${pfx}.sched.csv w]
365    csv::writematrix sched $f
366    close $f
367
368    log::log info "Writing track schedule /TXT"
369
370    ::report::report r [schedr columns] style captionedtable 1
371    r datasep set [r top get]
372    r datasep enable
373
374    set f [open ${pfx}.sched.txt w]
375    r printmatrix2channel schedr $f
376    close $f
377    r destroy
378
379    # Now the HTML report, use 'sched' as base, actually formatting
380    # into lines is done by the browser.
381
382    log::log info "Writing track schedule /HTML"
383
384    ::report::report r [sched columns] style html
385
386    set f [open ${pfx}.sched.html w]
387    puts $f "<html><head><title>Track schedules</title></head><body>"
388    puts $f "<h1>Track schedules</h1>"
389    puts $f "<p><table border=1>"
390    r printmatrix2channel sched $f
391    puts $f "</table></p></body></html>"
392    close $f
393    r destroy
394}
395
396proc postscript {} {
397    global pfx
398    # Transforms texts into printable postscript, using a2ps (if available)
399
400    catch {exec a2ps -o ${pfx}.main.ps  -1 -B -r -f7 ${pfx}.main.txt}
401    catch {exec a2ps -o ${pfx}.sched.ps -1 -B -r -f4 ${pfx}.sched.txt}
402    return
403}
404
405proc wrap {text len} {
406    # @author Jeffrey Hobbs <jeff at hobbs org>
407    #
408    # @c Wraps the given <a text> into multiple lines not
409    # @c exceeding <a len> characters each. Lines shorter
410    # @c than <a len> characters might get filled up.
411    #
412    # @a text: The string to operate on.
413    # @a len: The maximum allowed length of a single line.
414    #
415    # @r Basically <a text>, but with changed newlines to
416    # @r restrict the length of individual lines to at most
417    # @r <a len> characters.
418
419    # @n This procedure is not checked by the testsuite.
420
421    # @i wrap, word wrap
422
423    # Convert all newlines into spaces and initialize the result
424    # see ::pool::string::oneLine too.
425
426    regsub -all "\n" $text { } text
427    incr len -1
428
429    set out {}
430
431    # As long as the string is longer than the intended length of
432    # lines in the result:
433
434    while {[string len $text] > $len} {
435	# - Find position of last space in the part of the text
436	#   which could a line in the result.
437
438	# - We jump out of the loop if there is none and the whole
439	#   text does not contain spaces anymore. In the latter case
440	#   the rest of the text is one word longer than an intended
441	#   line, we cannot avoid the longer line.
442
443	set i [string last { } [string range $text 0 $len]]
444
445	if {$i == -1 && [set i [string first { } $text]] == -1} {
446	    break
447	}
448
449	# Get the just fitting part of the text, remove any heading
450	# and trailing spaces, then append it to the result string,
451	# don't close it with a newline!
452
453	append out [string trim [string range $text 0 [incr i -1]]]\n
454
455	# Shorten the text by the length of the processed part and
456	# the space used to split it, then iterate.
457
458	set text [string range $text [incr i 2] end]
459    }
460
461    return $out$text
462}
463
464# -------------------------------------------
465# Define the required reports styles
466
467::report::defstyle simpletable {} {
468    data   set [split "[string repeat "| "   [columns]]|"]
469    top    set [split "[string repeat "+ - " [columns]]+"]
470    bottom set [top get]
471    top	   enable
472    bottom enable
473}
474::report::defstyle captionedtable {{n 1}} {
475    simpletable
476    topdata   set [data get]
477    topcapsep set [top  get]
478    topcapsep enable
479    tcaption $n
480}
481::report::defstyle html {} {
482    set c  [columns]
483    set cl $c ; incr cl -1
484    data set "<tr> [split [string repeat " " $cl] ""] </tr>"
485    for {set col 0} {$col < $c} {incr col} {
486	pad $col left  "<td>"
487	pad $col right "</td>"
488    }
489    return
490}
491
492# -------------------------------------------
493
494main
495exit
496