1#! /bin/sh
2# the next line restarts with tclsh \
3exec tclsh "$0" ${1+"$@"}
4
5# webviewer.tcl - Copyright (C) 2004 Pat Thoyts <patthoyts@users.sourceforge.net>
6#
7# This is a sample application to demonstrate the use of the htmlparse package.
8#
9# Given the URL of a web page, this application will display just the text of
10# the page - that is the contents of header, paragraph and pre tags.
11#
12# As an aside, this also illustrates the use of the autoproxy package to
13# cope with http proxy servers (if present) and handles HTTP redirections and
14# so on.
15#
16# Usage: webviewer.tcl http://tip.tcl.tk/2
17#
18# $Id: webviewer.tcl,v 1.2 2009/01/30 04:18:14 andreas_kupries Exp $
19
20package require htmlparse;              # tcllib
21package require http;                   # tcl
22package require autoproxy;              # tcllib
23autoproxy::init
24
25# -------------------------------------------------------------------------
26# The driver.
27# - Fetch the page
28# - parse it to extract the text
29# - sort out the html escaped chars
30# - eliminate excessive newlines
31#
32proc webview {url} {
33    set html [fetchurl $url]
34    if {[string length $html] > 0} {
35        variable parsed ""
36        htmlparse::parse -cmd [list parser [namespace current]::parsed] $html
37        set parsed [htmlparse::mapEscapes $parsed]
38        set parsed [regsub -all -line "\n{2,}" $parsed "\n\n"]
39        Display $parsed
40    } else {
41        Error "error: no data available from \"$url\""
42    }
43}
44
45# -------------------------------------------------------------------------
46# This implements our text extracting parser. This will pretty much turn
47# an HTML page into an outline-mode text file.
48#
49proc parser {outvar tag end attr text} {
50    upvar \#0 $outvar out
51    set tag [string tolower $tag]
52    set end [string length $end]
53    if {$end == 0} {
54        if {[string equal "hmstart" $tag]} {
55            set out ""
56        } elseif {[regexp {h(\d+)} $tag -> level]} {
57            append out "\n\n" [string repeat * $level] " " $text
58        } elseif {[lsearch -exact {p pre td} $tag] != -1} {
59            append out "\n" $text
60        } elseif {[lsearch -exact {a span i b} $tag] != -1} {
61            append out $text
62        }
63    }
64}
65
66# -------------------------------------------------------------------------
67# Fetch the target page and cope with HTTP problems. This
68# deals with server errors and proxy authentication failure
69# and handles HTTP redirection.
70#
71proc fetchurl {url} {
72    set html ""
73    set err ""
74    set tok [http::geturl $url -timeout 30000]
75    if {[string equal [http::status $tok] "ok"]} {
76        if {[http::ncode $tok] >= 500} {
77            set err "server error: [http::code $tok]"
78        } elseif {[http::ncode $tok] >= 400} {
79            set err "authentication error: [http::code $tok]"
80        } elseif {[http::ncode $tok] >= 300} {
81            upvar \#0 $tok state
82            array set meta $state(meta)
83            if {[info exists meta(Location)]} {
84                return [fetchurl $meta(Location)]
85            } else {
86                set err [http::code $tok]
87            }
88        } else {
89            set html [http::data $tok]
90        }
91    } else {
92        set err [http::error $tok]
93    }
94    http::cleanup $tok
95
96    if {[string length $err] > 0} {
97        Error $err
98    }
99    return $html
100}
101
102# -------------------------------------------------------------------------
103# Abstract out the display functions so we can run this using either wish or
104# tclsh. This makes life easier on windows where the default is to use wish
105# for tcl files.
106#
107proc Display {msg} {
108    if {[string length [package provide Tk]] > 0} {
109        toplevel .dlg -class Dialog
110        wm title .dlg "webview output."
111        text .dlg.txt -yscrollcommand {.dlg.sb set}
112        scrollbar .dlg.sb -command {.dlg.txt yview}
113        button .dlg.b -command {destroy .dlg} -text Exit -underline 1
114        .dlg.txt insert 0.0 $msg
115        bind .dlg <Control-F2> {console show}
116        bind .dlg <Escape> {.dlg.b invoke}
117        grid .dlg.txt .dlg.sb -sticky news
118        grid .dlg.b  - -sticky e -pady {3 0} -ipadx 4
119        grid rowconfigure .dlg 0 -weight 1
120        grid columnconfigure .dlg 0 -weight 1
121        tkwait window .dlg
122    } else {
123        puts $msg
124    }
125}
126
127proc Error {msg} {
128    if {[string length [package provide Tk]] > 0} {
129        tk_messageBox -title "webviewer error" -icon error -message $msg
130    } else {
131        puts stderr $msg
132    }
133    exit 1
134}
135
136# -------------------------------------------------------------------------
137
138if {!$tcl_interactive} {
139    if {[string length [package provide Tk]] > 0} {
140        wm withdraw .
141        if {$argc < 1} {
142            toplevel .dlg -class Dialog
143            wm title .dlg "Enter URL"
144            label .dlg.l -text "Enter a URL"
145            entry .dlg.e -textvariable argv -width 40
146            button .dlg.ok -text OK -default active -command {destroy .dlg}
147            button .dlg.ca -text Cancel -command {set ::argv ""; destroy .dlg}
148            bind .dlg <Return> {.dlg.ok invoke}
149            bind .dlg <Escape> {.dlg.ca invoke}
150            bind .dlg <Control-F2> {console show}
151            grid .dlg.l - -sticky nws
152            grid .dlg.e - -sticky news
153            grid .dlg.ok .dlg.ca -sticky news
154            tkwait window .dlg
155            if {[llength $argv] < 1} {
156                exit 1
157            }
158        }
159    } else {
160
161        if {$argc != 1} {
162            Error "usage: webviewer URL"
163        }
164
165    }
166    eval [linsert $argv 0 webview]
167}
168
169