1#!/usr/bin/python -u 2# 3# The exercise of rewriting xsltproc on top of the python 4# bindings, not complete yet and shows up the things missing 5# from the existing python interfaces 6# 7import sys 8import time 9import os 10import string 11import libxml2 12# Memory debug specific 13libxml2.debugMemory(1) 14import libxslt 15 16debug = 0 17repeat = 0 18timing = 0 19novalid = 0 20noout = 0 21docbook = 0 22html = 0 23xinclude = 0 24profile = 0 25params = {} 26output = None 27errorno = 0 28 29# 30# timing 31# 32begin = 0 33endtime = 0 34def startTimer(): 35 global begin 36 37 begin = time.time() 38 39def endTimer(msg): 40 global begin 41 global endtime 42 43 endtime = time.time() 44 print "%s took %d ms" % (msg, (endtime - begin) * 1000) 45 46def xsltProcess(doc, cur, filename): 47 global timing 48 global xinclude 49 global params 50 global html 51 52 if xinclude: 53 if timing: 54 startTimer() 55 doc.XIncludeProcess() 56 if timing: 57 endTimer("XInclude processing %s" % (filename)) 58 59 if timing: 60 startTimer() 61 if output == None: 62 if repeat != 0: 63 for j in range(1, repeat): 64 res = cur.applyStylesheet(doc, params) 65 res.freeDoc() 66 doc.freeDoc() 67 if html == 1: 68 doc = libxml2.htmlParseFile(filename, None) 69 else: 70 doc = libxml2.parseFile(filename, None) 71# ctxt = libxslt.newTransformContext(doc) 72# if ctxt == None: 73# return 74 if profile: 75 print "TODO: Profiling not yet supported" 76 else: 77 res = cur.applyStylesheet(doc, params) 78 if timing: 79 if repeat != 0: 80 endTimer("Applying stylesheet %d times" % (repeat)) 81 else: 82 endTimer("Applying stylesheet") 83 doc.freeDoc() 84 if res == None: 85 print "no result for %s" % (filename) 86 return 87 if noout != 0: 88 res.freeDoc() 89 return 90 if debug == 1: 91 res.debugDumpDocument(None) 92 else: 93 if timing: 94 startTimer() 95 cur.saveResultToFilename("-", res, 0) 96 if timing: 97 endTimer("Saving result") 98 res.freeDoc() 99 else: 100 print "TODO: xsltRunStylesheet not yet mapped" 101 102def usage(name = 'pyxsltproc'): 103 print "Usage: %s [options] stylesheet file [file ...]" % (name) 104 print "a reimplementation of xsltproc(1) on top of libxslt-python" 105 print " Options:" 106 print "\t--version or -V: show the version of libxml and libxslt used" 107 print "\t--verbose or -v: show logs of what's happening" 108 print "\t--output file or -o file: save to a given file" 109 print "\t--timing: display the time used" 110 print "\t--repeat: run the transformation 20 times" 111 print "\t--debug: dump the tree of the result instead" 112 print "\t--novalid skip the Dtd loading phase" 113 print "\t--noout: do not dump the result" 114 print "\t--maxdepth val : increase the maximum depth" 115 print "\t--html: the input document is(are) an HTML file(s)" 116 print "\t--param name value : pass a (parameter,value) pair" 117 print "\t value is an XPath expression." 118 print "\t string values must be quoted like \"'string'\"" 119 print "\t or use stringparam to avoid it" 120 print "\t--stringparam name value : pass a (parameter,string value) pair" 121 print "\t--nonet refuse to fetch DTDs or entities over network" 122 print "\t--catalogs : use SGML catalogs from $SGML_CATALOG_FILES" 123 print "\t otherwise XML Catalogs starting from " 124 print "\t file:///etc/xml/catalog are activated by default" 125 print "\t--xinclude : do XInclude processing on document input" 126 print "\t--profile or --norman : dump profiling informations " 127 print "\nProject libxslt home page: http://xmlsoft.org/XSLT/" 128 print "To report bugs and get help: http://xmlsoft.org/XSLT/bugs.html" 129 130def main(args = None): 131 global debug 132 global repeat 133 global timing 134 global novalid 135 global noout 136 global docbook 137 global html 138 global xinclude 139 global profile 140 global params 141 global output 142 global errorno 143 144 done = 0 145 cur = None 146 147 if not args: 148 args = sys.argv[1:] 149 if len(args) <= 0: 150 usage(sys.argv[0]) 151 152 153 i = 0 154 while i < len(args): 155 if args[i] == "-": 156 break 157 if args[i][0] != '-': 158 i = i + 1 159 continue 160 if args[i] == "-timing" or args[i] == "--timing": 161 timing = 1 162 elif args[i] == "-debug" or args[i] == "--debug": 163 debug = 1 164 elif args[i] == "-verbose" or args[i] == "--verbose" or \ 165 args[i] == "-v": 166 print "TODO: xsltSetGenericDebugFunc() mapping missing" 167 elif args[i] == "-version" or args[i] == "--version" or \ 168 args[i] == "-V": 169 print "TODO: version informations mapping missing" 170 elif args[i] == "-verbose" or args[i] == "--verbose" or \ 171 args[i] == "-v": 172 if repeat == 0: 173 repeat = 20 174 else: 175 repeat = 100 176 elif args[i] == "-novalid" or args[i] == "--novalid": 177 print "TODO: xmlLoadExtDtdDefaultValue mapping missing" 178 novalid = 1 179 elif args[i] == "-noout" or args[i] == "--noout": 180 noout = 1 181 elif args[i] == "-html" or args[i] == "--html": 182 html = 1 183 elif args[i] == "-nonet" or args[i] == "--nonet": 184 print "TODO: xmlSetExternalEntityLoader mapping missing" 185 nonet = 1 186 elif args[i] == "-catalogs" or args[i] == "--catalogs": 187 try: 188 catalogs = os.environ['SGML_CATALOG_FILES'] 189 except: 190 catalogs = None 191 if catalogs != none: 192 libxml2.xmlLoadCatalogs(catalogs) 193 else: 194 print "Variable $SGML_CATALOG_FILES not set" 195 elif args[i] == "-xinclude" or args[i] == "--xinclude": 196 xinclude = 1 197 libxslt.setXIncludeDefault(1) 198 elif args[i] == "-param" or args[i] == "--param": 199 i = i + 1 200 params[args[i]] = args[i + 1] 201 i = i + 1 202 elif args[i] == "-stringparam" or args[i] == "--stringparam": 203 i = i + 1 204 params[args[i]] = "'%s'" % (args[i + 1]) 205 i = i + 1 206 elif args[i] == "-maxdepth" or args[i] == "--maxdepth": 207 print "TODO: xsltMaxDepth mapping missing" 208 else: 209 print "Unknown option %s" % (args[i]) 210 usage() 211 return(3) 212 213 214 215 216 i = i + 1 217 218 libxml2.lineNumbersDefault(1) 219 libxml2.substituteEntitiesDefault(1) 220 # TODO: xmlLoadExtDtdDefaultValue = XML_DETECT_IDS | XML_COMPLETE_ATTRS 221 # if novalid: 222 # TODO: xmlLoadExtDtdDefaultValue = 0 223 224 # TODO libxslt.exsltRegisterAll(); 225 libxslt.registerTestModule() 226 227 i = 0 228 while i < len(args) and done == 0: 229 if args[i] == "-maxdepth" or args[i] == "--maxdepth": 230 i = i + 2 231 continue 232 if args[i] == "-o" or args[i] == "-output" or args[i] == "--output": 233 i = i + 2 234 continue 235 if args[i] == "-param" or args[i] == "--param": 236 i = i + 3 237 continue 238 if args[i] == "-stringparam" or args[i] == "--stringparam": 239 i = i + 3 240 continue 241 if args[i] != "-" and args[i][0] == '-': 242 i = i + 1 243 continue 244 if timing: 245 startTimer() 246 style = libxml2.parseFile(args[i]) 247 if timing: 248 endTimer("Parsing stylesheet %s" % (args[i])) 249 if style == None: 250 print "cannot parse %s" % (args[i]) 251 cur = None 252 errorno = 4 253 done = 1 254 else: 255 cur = libxslt.loadStylesheetPI(style) 256 if cur != None: 257 xsltProcess(style, cur, args[i]) 258 cur = None 259 else: 260 cur = libxslt.parseStylesheetDoc(style) 261 if cur == None: 262 style.freeDoc() 263 errorno = 5 264 done = 1 265 i = i + 1 266 break 267 268 while i < len(args) and done == 0 and cur != None: 269 if timing: 270 startTimer() 271 if html: 272 doc = libxml2.htmlParseFile(args[i], None) 273 else: 274 doc = libxml2.parseFile(args[i]) 275 if doc == None: 276 print "unable to parse %s" % (args[i]) 277 errorno = 6 278 i = i + 1 279 continue 280 if timing: 281 endTimer("Parsing document %s" % (args[i])) 282 xsltProcess(doc, cur, args[i]) 283 i = i + 1 284 285 if cur != None: 286 cur.freeStylesheet() 287 params = None 288 289if __name__ == "__main__": 290 main() 291 292# Memory debug specific 293libxslt.cleanup() 294if libxml2.debugMemory(1) != 0: 295 print "Memory leak %d bytes" % (libxml2.debugMemory(1)) 296 libxml2.dumpMemory() 297 298sys.exit(errorno) 299