1# Removed provision of the backward compatible name. Moved to separate
2# file/package.
3package provide vfs::zip 1.0.3
4
5package require vfs
6
7# Using the vfs, memchan and Trf extensions, we ought to be able
8# to write a Tcl-only zip virtual filesystem.  What we have below
9# is basically that.
10
11namespace eval vfs::zip {}
12
13# Used to execute a zip archive.  This is rather like a jar file
14# but simpler.  We simply mount it and then source a toplevel
15# file called 'main.tcl'.
16proc vfs::zip::Execute {zipfile} {
17    Mount $zipfile $zipfile
18    source [file join $zipfile main.tcl]
19}
20
21proc vfs::zip::Mount {zipfile local} {
22    set fd [::zip::open [::file normalize $zipfile]]
23    vfs::filesystem mount $local [list ::vfs::zip::handler $fd]
24    # Register command to unmount
25    vfs::RegisterMount $local [list ::vfs::zip::Unmount $fd]
26    return $fd
27}
28
29proc vfs::zip::Unmount {fd local} {
30    vfs::filesystem unmount $local
31    ::zip::_close $fd
32}
33
34proc vfs::zip::handler {zipfd cmd root relative actualpath args} {
35    #::vfs::log [list $zipfd $cmd $root $relative $actualpath $args]
36    if {$cmd == "matchindirectory"} {
37	eval [list $cmd $zipfd $relative $actualpath] $args
38    } else {
39	eval [list $cmd $zipfd $relative] $args
40    }
41}
42
43proc vfs::zip::attributes {zipfd} { return [list "state"] }
44proc vfs::zip::state {zipfd args} {
45    vfs::attributeCantConfigure "state" "readonly" $args
46}
47
48# If we implement the commands below, we will have a perfect
49# virtual file system for zip files.
50
51proc vfs::zip::matchindirectory {zipfd path actualpath pattern type} {
52    #::vfs::log [list matchindirectory $path $actualpath $pattern $type]
53
54    # This call to zip::getdir handles empty patterns properly as asking
55    # for the existence of a single file $path only
56    set res [::zip::getdir $zipfd $path $pattern]
57    #::vfs::log "got $res"
58    if {![string length $pattern]} {
59	if {![::zip::exists $zipfd $path]} { return {} }
60	set res [list $actualpath]
61	set actualpath ""
62    }
63
64    set newres [list]
65    foreach p [::vfs::matchCorrectTypes $type $res $actualpath] {
66	lappend newres [file join $actualpath $p]
67    }
68    #::vfs::log "got $newres"
69    return $newres
70}
71
72proc vfs::zip::stat {zipfd name} {
73    #::vfs::log "stat $name"
74    ::zip::stat $zipfd $name sb
75    #::vfs::log [array get sb]
76    array get sb
77}
78
79proc vfs::zip::access {zipfd name mode} {
80    #::vfs::log "zip-access $name $mode"
81    if {$mode & 2} {
82	vfs::filesystem posixerror $::vfs::posix(EROFS)
83    }
84    # Readable, Exists and Executable are treated as 'exists'
85    # Could we get more information from the archive?
86    if {[::zip::exists $zipfd $name]} {
87	return 1
88    } else {
89	error "No such file"
90    }
91
92}
93
94proc vfs::zip::open {zipfd name mode permissions} {
95    #::vfs::log "open $name $mode $permissions"
96    # return a list of two elements:
97    # 1. first element is the Tcl channel name which has been opened
98    # 2. second element (optional) is a command to evaluate when
99    #    the channel is closed.
100
101    switch -- $mode {
102	"" -
103	"r" {
104	    if {![::zip::exists $zipfd $name]} {
105		vfs::filesystem posixerror $::vfs::posix(ENOENT)
106	    }
107
108	    ::zip::stat $zipfd $name sb
109
110	    set nfd [vfs::memchan]
111	    fconfigure $nfd -translation binary
112
113	    seek $zipfd $sb(ino) start
114	    set data [zip::Data $zipfd sb 0]
115
116	    puts -nonewline $nfd $data
117
118	    fconfigure $nfd -translation auto
119	    seek $nfd 0
120	    return [list $nfd]
121	}
122	default {
123	    vfs::filesystem posixerror $::vfs::posix(EROFS)
124	}
125    }
126}
127
128proc vfs::zip::createdirectory {zipfd name} {
129    #::vfs::log "createdirectory $name"
130    vfs::filesystem posixerror $::vfs::posix(EROFS)
131}
132
133proc vfs::zip::removedirectory {zipfd name recursive} {
134    #::vfs::log "removedirectory $name"
135    vfs::filesystem posixerror $::vfs::posix(EROFS)
136}
137
138proc vfs::zip::deletefile {zipfd name} {
139    #::vfs::log "deletefile $name"
140    vfs::filesystem posixerror $::vfs::posix(EROFS)
141}
142
143proc vfs::zip::fileattributes {zipfd name args} {
144    #::vfs::log "fileattributes $args"
145    switch -- [llength $args] {
146	0 {
147	    # list strings
148	    return [list]
149	}
150	1 {
151	    # get value
152	    set index [lindex $args 0]
153	    return ""
154	}
155	2 {
156	    # set value
157	    set index [lindex $args 0]
158	    set val [lindex $args 1]
159	    vfs::filesystem posixerror $::vfs::posix(EROFS)
160	}
161    }
162}
163
164proc vfs::zip::utime {fd path actime mtime} {
165    vfs::filesystem posixerror $::vfs::posix(EROFS)
166}
167
168# Below copied from TclKit distribution
169
170#
171# ZIP decoder:
172#
173# See the ZIP file format specification:
174#   http://www.pkware.com/documents/casestudies/APPNOTE.TXT
175#
176# Format of zip file:
177# [ Data ]* [ TOC ]* EndOfArchive
178#
179# Note: TOC is refered to in ZIP doc as "Central Archive"
180#
181# This means there are two ways of accessing:
182#
183# 1) from the begining as a stream - until the header
184#	is not "PK\03\04" - ideal for unzipping.
185#
186# 2) for table of contents without reading entire
187#	archive by first fetching EndOfArchive, then
188#	just loading the TOC
189#
190
191namespace eval zip {
192    array set methods {
193	0	{stored - The file is stored (no compression)}
194	1	{shrunk - The file is Shrunk}
195	2	{reduce1 - The file is Reduced with compression factor 1}
196	3	{reduce2 - The file is Reduced with compression factor 2}
197	4	{reduce3 - The file is Reduced with compression factor 3}
198	5	{reduce4 - The file is Reduced with compression factor 4}
199	6	{implode - The file is Imploded}
200	7	{reserved - Reserved for Tokenizing compression algorithm}
201	8	{deflate - The file is Deflated}
202	9	{reserved - Reserved for enhanced Deflating}
203	10	{pkimplode - PKWARE Date Compression Library Imploding}
204        11	{reserved - Reserved by PKWARE}
205        12	{bzip2 - The file is compressed using BZIP2 algorithm}
206        13	{reserved - Reserved by PKWARE}
207        14	{lzma - LZMA (EFS)}
208        15	{reserved - Reserved by PKWARE}
209    }
210    # Version types (high-order byte)
211    array set systems {
212	0	{dos}
213	1	{amiga}
214	2	{vms}
215	3	{unix}
216	4	{vm cms}
217	5	{atari}
218	6	{os/2}
219	7	{macos}
220	8	{z system 8}
221	9	{cp/m}
222	10	{tops20}
223	11	{windows}
224	12	{qdos}
225	13	{riscos}
226	14	{vfat}
227	15	{mvs}
228	16	{beos}
229	17	{tandem}
230	18	{theos}
231    }
232    # DOS File Attrs
233    array set dosattrs {
234	1	{readonly}
235	2	{hidden}
236	4	{system}
237	8	{unknown8}
238	16	{directory}
239	32	{archive}
240	64	{unknown64}
241	128	{normal}
242    }
243
244    proc u_short {n}  { return [expr { ($n+0x10000)%0x10000 }] }
245}
246
247proc zip::DosTime {date time} {
248    set time [u_short $time]
249    set date [u_short $date]
250
251    # time = fedcba9876543210
252    #        HHHHHmmmmmmSSSSS (sec/2 actually)
253
254    # data = fedcba9876543210
255    #        yyyyyyyMMMMddddd
256
257    set sec  [expr { ($time & 0x1F) * 2 }]
258    set min  [expr { ($time >> 5) & 0x3F }]
259    set hour [expr { ($time >> 11) & 0x1F }]
260
261    set mday [expr { $date & 0x1F }]
262    set mon  [expr { (($date >> 5) & 0xF) }]
263    set year [expr { (($date >> 9) & 0xFF) + 1980 }]
264
265    # Fix up bad date/time data, no need to fail
266    while {$sec  > 59} {incr sec  -60}
267    while {$min  > 59} {incr sec  -60}
268    while {$hour > 23} {incr hour -24}
269    if {$mday < 1}  {incr mday}
270    if {$mon  < 1}  {incr mon}
271    while {$mon > 12} {incr hour -12}
272
273    while {[catch {
274	set dt [format {%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d} \
275		    $year $mon $mday $hour $min $sec]
276	set res [clock scan $dt -gmt 1]
277    }]} {
278	# Only mday can be wrong, at end of month
279	incr mday -1
280    }
281    return $res
282}
283
284
285proc zip::Data {fd arr verify} {
286    upvar 1 $arr sb
287
288    # APPNOTE A: Local file header
289    set buf [read $fd 30]
290    set n [binary scan $buf A4sssssiiiss \
291               hdr sb(ver) sb(flags) sb(method) time date \
292               crc csize size namelen xtralen]
293
294    if { ![string equal "PK\03\04" $hdr] } {
295	binary scan $hdr H* x
296	return -code error "bad header: $x"
297    }
298    set sb(ver)	   [expr {$sb(ver) & 0xffff}]
299    set sb(flags)  [expr {$sb(flags) & 0xffff}]
300    set sb(method) [expr {$sb(method) & 0xffff}]
301    set sb(mtime)  [DosTime $date $time]
302    if {!($sb(flags) & (1<<3))} {
303        set sb(crc)    [expr {$crc & 0xffffffff}]
304        set sb(csize)  [expr {$csize & 0xffffffff}]
305        set sb(size)   [expr {$size & 0xffffffff}]
306    }
307
308    set sb(name)   [read $fd [expr {$namelen & 0xffff}]]
309    set sb(extra)  [read $fd [expr {$xtralen & 0xffff}]]
310    if {$sb(flags) & (1 << 10)} {
311        set sb(name) [encoding convertfrom utf-8 $sb(name)]
312    }
313    set sb(name) [string trimleft $sb(name) "./"]
314
315    # APPNOTE B: File data
316    #   if bit 3 of flags is set the csize comes from the central directory
317    set data [read $fd $sb(csize)]
318
319    # APPNOTE C: Data descriptor
320    if { $sb(flags) & (1<<3) } {
321        binary scan [read $fd 4] i ddhdr
322        if {($ddhdr & 0xffffffff) == 0x08074b50} {
323            binary scan [read $fd 12] iii sb(crc) sb(csize) sb(size)
324        } else {
325            set sb(crc) $ddhdr
326            binary scan [read $fd 8] ii sb(csize) sb(size)
327        }
328        set sb(crc) [expr {$sb(crc) & 0xffffffff}]
329        set sb(csize) [expr {$sb(csize) & 0xffffffff}]
330        set sb(size) [expr {$sb(size) & 0xffffffff}]
331    }
332
333    switch -exact -- $sb(method) {
334        0 {
335            # stored; no compression
336        }
337        8 {
338            # deflated
339            if {[catch {
340                set data [vfs::zip -mode decompress -nowrap 1 $data]
341            } err]} then {
342                return -code error "error inflating \"$sb(name)\": $err"
343            }
344        }
345        default {
346            set method $sb(method)
347            if {[info exists methods($method)]} {
348                set method $methods($method)
349            }
350            return -code error "unsupported compression method
351                \"$method\" used for \"$sb(name)\""
352        }
353    }
354
355    if { $verify && $sb(method) != 0} {
356	set ncrc [vfs::crc $data]
357	if { ($ncrc & 0xffffffff) != $sb(crc) } {
358	    vfs::log [format {%s: crc mismatch: expected 0x%x, got 0x%x} \
359                          $sb(name) $sb(crc) $ncrc]
360	}
361    }
362    return $data
363}
364
365proc zip::EndOfArchive {fd arr} {
366    upvar 1 $arr cb
367
368    # [SF Tclvfs Bug 1003574]. Do not seek over beginning of file.
369    seek $fd 0 end
370
371    # Just looking in the last 512 bytes may be enough to handle zip
372    # archives without comments, however for archives which have
373    # comments the chunk may start at an arbitrary distance from the
374    # end of the file. So if we do not find the header immediately
375    # we have to extend the range of our search, possibly until we
376    # have a large part of the archive in memory. We can fail only
377    # after the whole file has been searched.
378
379    set sz  [tell $fd]
380    set len 512
381    set at  512
382    while {1} {
383	if {$sz < $at} {set n -$sz} else {set n -$at}
384
385	seek $fd $n end
386	set hdr [read $fd $len]
387
388	# We are using 'string last' as we are searching the first
389	# from the end, which is the last from the beginning. See [SF
390	# Bug 2256740]. A zip archive stored in a zip archive can
391	# confuse the unmodified code, triggering on the magic
392	# sequence for the inner, uncompressed archive.
393	set pos [string last "PK\05\06" $hdr]
394	if {$pos == -1} {
395	    if {$at >= $sz} {
396		return -code error "no header found"
397	    }
398	    set len 540 ; # after 1st iteration we force overlap with last buffer
399	    incr at 512 ; # to ensure that the pattern we look for is not split at
400	    #           ; # a buffer boundary, nor the header itself
401	} else {
402	    break
403	}
404    }
405
406    set hdr [string range $hdr [expr {$pos + 4}] [expr {$pos + 21}]]
407    set pos [expr {[tell $fd] + $pos - 512}]
408
409    binary scan $hdr ssssiis \
410	cb(ndisk) cb(cdisk) \
411	cb(nitems) cb(ntotal) \
412	cb(csize) cb(coff) \
413	cb(comment)
414
415    set cb(ndisk)	[u_short $cb(ndisk)]
416    set cb(nitems)	[u_short $cb(nitems)]
417    set cb(ntotal)	[u_short $cb(ntotal)]
418    set cb(comment)	[u_short $cb(comment)]
419
420    # Compute base for situations where ZIP file
421    # has been appended to another media (e.g. EXE)
422    set cb(base)	[expr { $pos - $cb(csize) - $cb(coff) }]
423}
424
425proc zip::TOC {fd arr} {
426    upvar 1 $arr sb
427
428    set buf [read $fd 46]
429
430    binary scan $buf A4ssssssiiisssssii hdr \
431      sb(vem) sb(ver) sb(flags) sb(method) time date \
432      sb(crc) sb(csize) sb(size) \
433      flen elen clen sb(disk) sb(attr) \
434      sb(atx) sb(ino)
435
436    if { ![string equal "PK\01\02" $hdr] } {
437	binary scan $hdr H* x
438	return -code error "bad central header: $x"
439    }
440
441    foreach v {vem ver flags method disk attr} {
442	set sb($v) [expr {$sb($v) & 0xffff}]
443    }
444    set sb(crc) [expr {$sb(crc) & 0xffffffff}]
445    set sb(csize) [expr {$sb(csize) & 0xffffffff}]
446    set sb(size) [expr {$sb(size) & 0xffffffff}]
447    set sb(mtime) [DosTime $date $time]
448    set sb(mode) [expr { ($sb(atx) >> 16) & 0xffff }]
449    if { ( $sb(atx) & 0xff ) & 16 } {
450	set sb(type) directory
451    } else {
452	set sb(type) file
453    }
454    set sb(name) [read $fd [u_short $flen]]
455    set sb(extra) [read $fd [u_short $elen]]
456    set sb(comment) [read $fd [u_short $clen]]
457    if {$sb(flags) & (1 << 10)} {
458        set sb(name) [encoding convertfrom utf-8 $sb(name)]
459        set sb(comment) [encoding convertfrom utf-8 $sb(comment)]
460    }
461    set sb(name) [string trimleft $sb(name) "./"]
462}
463
464proc zip::open {path} {
465    #vfs::log [list open $path]
466    set fd [::open $path]
467
468    if {[catch {
469	upvar #0 zip::$fd cb
470	upvar #0 zip::$fd.toc toc
471
472	fconfigure $fd -translation binary ;#-buffering none
473
474	zip::EndOfArchive $fd cb
475
476	seek $fd $cb(coff) start
477
478	set toc(_) 0; unset toc(_); #MakeArray
479
480	for {set i 0} {$i < $cb(nitems)} {incr i} {
481	    zip::TOC $fd sb
482
483	    set sb(depth) [llength [file split $sb(name)]]
484
485	    set name [string tolower $sb(name)]
486	    set toc($name) [array get sb]
487	    FAKEDIR toc [file dirname $name]
488	}
489    } err]} {
490	close $fd
491	return -code error $err
492    }
493
494    return $fd
495}
496
497proc zip::FAKEDIR {arr path} {
498    upvar 1 $arr toc
499
500    if { $path == "."} { return }
501
502
503    if { ![info exists toc($path)] } {
504	# Implicit directory
505	lappend toc($path) \
506		name $path \
507		type directory mtime 0 size 0 mode 0777 \
508		ino -1 depth [llength [file split $path]]
509    }
510    FAKEDIR toc [file dirname $path]
511}
512
513proc zip::exists {fd path} {
514    #::vfs::log "$fd $path"
515    if {$path == ""} {
516	return 1
517    } else {
518	upvar #0 zip::$fd.toc toc
519	info exists toc([string tolower $path])
520    }
521}
522
523proc zip::stat {fd path arr} {
524    upvar #0 zip::$fd.toc toc
525    upvar 1 $arr sb
526    #vfs::log [list stat $fd $path $arr [info level -1]]
527
528    set name [string tolower $path]
529    if { $name == "" || $name == "." } {
530	array set sb {
531	    type directory mtime 0 size 0 mode 0777
532	    ino -1 depth 0 name ""
533	}
534    } elseif {![info exists toc($name)] } {
535	return -code error "could not read \"$path\": no such file or directory"
536    } else {
537	array set sb $toc($name)
538    }
539    set sb(dev) -1
540    set sb(uid)	-1
541    set sb(gid)	-1
542    set sb(nlink) 1
543    set sb(atime) $sb(mtime)
544    set sb(ctime) $sb(mtime)
545    return ""
546}
547
548# Treats empty pattern as asking for a particular file only
549proc zip::getdir {fd path {pat *}} {
550    #::vfs::log [list getdir $fd $path $pat]
551    upvar #0 zip::$fd.toc toc
552
553    if { $path == "." || $path == "" } {
554	set path [set tmp [string tolower $pat]]
555    } else {
556        set globmap [list "\[" "\\\[" "*" "\\*" "?" "\\?"]
557	set tmp [string tolower $path]
558        set path [string map $globmap $tmp]
559	if {$pat != ""} {
560	    append tmp /[string tolower $pat]
561	    append path /[string tolower $pat]
562	}
563    }
564    # file split can be confused by the glob quoting so split tmp string
565    set depth [llength [file split $tmp]]
566
567    #vfs::log "getdir $fd $path $depth $pat [array names toc $path]"
568    if {$depth} {
569	set ret {}
570	foreach key [array names toc $path] {
571	    if {[string index $key end] == "/"} {
572		# Directories are listed twice: both with and without
573		# the trailing '/', so we ignore the one with
574		continue
575	    }
576	    array set sb $toc($key)
577
578	    if { $sb(depth) == $depth } {
579		if {[info exists toc(${key}/)]} {
580		    array set sb $toc(${key}/)
581		}
582		lappend ret [file tail $sb(name)]
583	    } else {
584		#::vfs::log "$sb(depth) vs $depth for $sb(name)"
585	    }
586	    unset sb
587	}
588	return $ret
589    } else {
590	# just the 'root' of the zip archive.  This obviously exists and
591	# is a directory.
592	return [list {}]
593    }
594}
595
596proc zip::_close {fd} {
597    variable $fd
598    variable $fd.toc
599    unset $fd
600    unset $fd.toc
601    ::close $fd
602}
603