1# sgml-8.0.tcl -- 2# 3# This file provides generic parsing services for SGML-based 4# languages, namely HTML and XML. 5# This file supports Tcl 8.0 characters and regular expressions. 6# 7# NB. It is a misnomer. There is no support for parsing 8# arbitrary SGML as such. 9# 10# Copyright (c) 1998,1999 Zveno Pty Ltd 11# http://www.zveno.com/ 12# 13# Zveno makes this software available free of charge for any purpose. 14# Copies may be made of this software but all of this notice must be included 15# on any copy. 16# 17# The software was developed for research purposes only and Zveno does not 18# warrant that it is error free or fit for any purpose. Zveno disclaims any 19# liability for all claims, expenses, losses, damages and costs any user may 20# incur as a result of using, copying or modifying this software. 21# 22# Copyright (c) 1997 ANU and CSIRO on behalf of the 23# participants in the CRC for Advanced Computational Systems ('ACSys'). 24# 25# ACSys makes this software and all associated data and documentation 26# ('Software') available free of charge for any purpose. You may make copies 27# of the Software but you must include all of this notice on any copy. 28# 29# The Software was developed for research purposes and ACSys does not warrant 30# that it is error free or fit for any purpose. ACSys disclaims any 31# liability for all claims, expenses, losses, damages and costs any user may 32# incur as a result of using, copying or modifying the Software. 33# 34# $Id: sgml-8.0.tcl,v 1.3 2002/08/30 07:52:16 balls Exp $ 35 36package require -exact Tcl 8.0 37 38package provide sgml 1.9 39 40namespace eval sgml { 41 42 # Convenience routine 43 proc cl x { 44 return "\[$x\]" 45 } 46 47 # Define various regular expressions 48 49 # Character classes 50 variable Char \t\n\r\ -\xFF 51 variable BaseChar A-Za-z 52 variable Letter $BaseChar 53 variable Digit 0-9 54 variable CombiningChar {} 55 variable Extender {} 56 variable Ideographic {} 57 58 # white space 59 variable Wsp " \t\r\n" 60 variable noWsp [cl ^$Wsp] 61 62 # Various XML names 63 variable NameChar \[-$Letter$Digit._:$CombiningChar$Extender\] 64 variable Name \[_:$BaseChar$Ideographic\]$NameChar* 65 variable Names ${Name}(?:$Wsp$Name)* 66 variable Nmtoken $NameChar+ 67 variable Nmtokens ${Nmtoken}(?:$Wsp$Nmtoken)* 68 69 # table of predefined entities for XML 70 71 variable EntityPredef 72 array set EntityPredef { 73 lt < gt > amp & quot \" apos ' 74 } 75 76} 77 78# These regular expressions are defined here once for better performance 79 80namespace eval sgml { 81 variable Wsp 82 83 # Watch out for case-sensitivity 84 85 set attlist_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*(#REQUIRED|#IMPLIED) 86 set attlist_enum_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*\\(([cl ^)]*)\\)[cl $Wsp]*("([cl ^")])")? ;# " 87 set attlist_fixed_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*(#FIXED)[cl $Wsp]*([cl ^$Wsp]+) 88 89 set param_entity_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*([cl ^"$Wsp]*)[cl $Wsp]*"([cl ^"]*)" 90 91 set notation_exp [cl $Wsp]*([cl ^$Wsp]+)[cl $Wsp]*(.*) 92 93} 94 95### Utility procedures 96 97# sgml::noop -- 98# 99# A do-nothing proc 100# 101# Arguments: 102# args arguments 103# 104# Results: 105# Nothing. 106 107proc sgml::noop args { 108 return 0 109} 110 111# sgml::identity -- 112# 113# Identity function. 114# 115# Arguments: 116# a arbitrary argument 117# 118# Results: 119# $a 120 121proc sgml::identity a { 122 return $a 123} 124 125# sgml::Error -- 126# 127# Throw an error 128# 129# Arguments: 130# args arguments 131# 132# Results: 133# Error return condition. 134 135proc sgml::Error args { 136 uplevel return -code error [list $args] 137} 138 139### Following procedures are based on html_library 140 141# sgml::zapWhite -- 142# 143# Convert multiple white space into a single space. 144# 145# Arguments: 146# data plain text 147# 148# Results: 149# As above 150 151proc sgml::zapWhite data { 152 regsub -all "\[ \t\r\n\]+" $data { } data 153 return $data 154} 155 156proc sgml::Boolean value { 157 regsub {1|true|yes|on} $value 1 value 158 regsub {0|false|no|off} $value 0 value 159 return $value 160} 161 162