1#! /bin/sh 2# the next line restarts with tclsh \ 3exec tclsh "$0" ${1+"$@"} 4 5# webviewer.tcl - Copyright (C) 2004 Pat Thoyts <patthoyts@users.sourceforge.net> 6# 7# This is a sample application to demonstrate the use of the htmlparse package. 8# 9# Given the URL of a web page, this application will display just the text of 10# the page - that is the contents of header, paragraph and pre tags. 11# 12# As an aside, this also illustrates the use of the autoproxy package to 13# cope with http proxy servers (if present) and handles HTTP redirections and 14# so on. 15# 16# Usage: webviewer.tcl http://tip.tcl.tk/2 17# 18# $Id: webviewer.tcl,v 1.2 2009/01/30 04:18:14 andreas_kupries Exp $ 19 20package require htmlparse; # tcllib 21package require http; # tcl 22package require autoproxy; # tcllib 23autoproxy::init 24 25# ------------------------------------------------------------------------- 26# The driver. 27# - Fetch the page 28# - parse it to extract the text 29# - sort out the html escaped chars 30# - eliminate excessive newlines 31# 32proc webview {url} { 33 set html [fetchurl $url] 34 if {[string length $html] > 0} { 35 variable parsed "" 36 htmlparse::parse -cmd [list parser [namespace current]::parsed] $html 37 set parsed [htmlparse::mapEscapes $parsed] 38 set parsed [regsub -all -line "\n{2,}" $parsed "\n\n"] 39 Display $parsed 40 } else { 41 Error "error: no data available from \"$url\"" 42 } 43} 44 45# ------------------------------------------------------------------------- 46# This implements our text extracting parser. This will pretty much turn 47# an HTML page into an outline-mode text file. 48# 49proc parser {outvar tag end attr text} { 50 upvar \#0 $outvar out 51 set tag [string tolower $tag] 52 set end [string length $end] 53 if {$end == 0} { 54 if {[string equal "hmstart" $tag]} { 55 set out "" 56 } elseif {[regexp {h(\d+)} $tag -> level]} { 57 append out "\n\n" [string repeat * $level] " " $text 58 } elseif {[lsearch -exact {p pre td} $tag] != -1} { 59 append out "\n" $text 60 } elseif {[lsearch -exact {a span i b} $tag] != -1} { 61 append out $text 62 } 63 } 64} 65 66# ------------------------------------------------------------------------- 67# Fetch the target page and cope with HTTP problems. This 68# deals with server errors and proxy authentication failure 69# and handles HTTP redirection. 70# 71proc fetchurl {url} { 72 set html "" 73 set err "" 74 set tok [http::geturl $url -timeout 30000] 75 if {[string equal [http::status $tok] "ok"]} { 76 if {[http::ncode $tok] >= 500} { 77 set err "server error: [http::code $tok]" 78 } elseif {[http::ncode $tok] >= 400} { 79 set err "authentication error: [http::code $tok]" 80 } elseif {[http::ncode $tok] >= 300} { 81 upvar \#0 $tok state 82 array set meta $state(meta) 83 if {[info exists meta(Location)]} { 84 return [fetchurl $meta(Location)] 85 } else { 86 set err [http::code $tok] 87 } 88 } else { 89 set html [http::data $tok] 90 } 91 } else { 92 set err [http::error $tok] 93 } 94 http::cleanup $tok 95 96 if {[string length $err] > 0} { 97 Error $err 98 } 99 return $html 100} 101 102# ------------------------------------------------------------------------- 103# Abstract out the display functions so we can run this using either wish or 104# tclsh. This makes life easier on windows where the default is to use wish 105# for tcl files. 106# 107proc Display {msg} { 108 if {[string length [package provide Tk]] > 0} { 109 toplevel .dlg -class Dialog 110 wm title .dlg "webview output." 111 text .dlg.txt -yscrollcommand {.dlg.sb set} 112 scrollbar .dlg.sb -command {.dlg.txt yview} 113 button .dlg.b -command {destroy .dlg} -text Exit -underline 1 114 .dlg.txt insert 0.0 $msg 115 bind .dlg <Control-F2> {console show} 116 bind .dlg <Escape> {.dlg.b invoke} 117 grid .dlg.txt .dlg.sb -sticky news 118 grid .dlg.b - -sticky e -pady {3 0} -ipadx 4 119 grid rowconfigure .dlg 0 -weight 1 120 grid columnconfigure .dlg 0 -weight 1 121 tkwait window .dlg 122 } else { 123 puts $msg 124 } 125} 126 127proc Error {msg} { 128 if {[string length [package provide Tk]] > 0} { 129 tk_messageBox -title "webviewer error" -icon error -message $msg 130 } else { 131 puts stderr $msg 132 } 133 exit 1 134} 135 136# ------------------------------------------------------------------------- 137 138if {!$tcl_interactive} { 139 if {[string length [package provide Tk]] > 0} { 140 wm withdraw . 141 if {$argc < 1} { 142 toplevel .dlg -class Dialog 143 wm title .dlg "Enter URL" 144 label .dlg.l -text "Enter a URL" 145 entry .dlg.e -textvariable argv -width 40 146 button .dlg.ok -text OK -default active -command {destroy .dlg} 147 button .dlg.ca -text Cancel -command {set ::argv ""; destroy .dlg} 148 bind .dlg <Return> {.dlg.ok invoke} 149 bind .dlg <Escape> {.dlg.ca invoke} 150 bind .dlg <Control-F2> {console show} 151 grid .dlg.l - -sticky nws 152 grid .dlg.e - -sticky news 153 grid .dlg.ok .dlg.ca -sticky news 154 tkwait window .dlg 155 if {[llength $argv] < 1} { 156 exit 1 157 } 158 } 159 } else { 160 161 if {$argc != 1} { 162 Error "usage: webviewer URL" 163 } 164 165 } 166 eval [linsert $argv 0 webview] 167} 168 169