1# cfront.tcl --
2#
3#	Generator frontend for compiler of magic(5) files into recognizers
4#	based on the 'rtcore'. Parses magic(5) into a basic 'script'.
5#
6# Copyright (c) 2004-2005 Colin McCormack <coldstore@users.sourceforge.net>
7# Copyright (c) 2005      Andreas Kupries <andreas_kupries@users.sourceforge.net>
8#
9# See the file "license.terms" for information on usage and redistribution
10# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
11#
12# RCS: @(#) $Id: cfront.tcl,v 1.7 2008/03/22 01:10:32 andreas_kupries Exp $
13
14#####
15#
16# "mime type recognition in pure tcl"
17# http://wiki.tcl.tk/12526
18#
19# Tcl code harvested on:  10 Feb 2005, 04:06 GMT
20# Wiki page last updated: ???
21#
22#####
23
24# ### ### ### ######### ######### #########
25## Requirements
26
27package require Tcl 8.4
28
29# file to compile the magic file from magic(5) into a tcl program
30package require fileutil              ; # File processing (input)
31package require fileutil::magic::cgen ; # Code generator.
32package require fileutil::magic::rt   ; # Runtime (typemap)
33package require struct::list          ; # lrepeat.
34
35package provide fileutil::magic::cfront 1.0
36
37# ### ### ### ######### ######### #########
38## Implementation
39
40namespace eval ::fileutil::magic::cfront {
41    # Configuration flag. (De)activate debugging output.
42    # This is done during initialization.
43    # Changes at runtime have no effect.
44
45    variable debug 0
46
47    # Constants
48
49    variable hashprotection  [list "\#" "\\#" \" \\\" \{ \\\{ \} \\\}]      ;#"
50    variable hashprotectionB [list "\#" "\\\#" \" \\\" \} \\\} ( \\( ) \\)] ;#"
51
52    # Make backend functionality accessible
53    namespace import ::fileutil::magic::cgen::*
54
55    namespace export compile procdef install
56}
57
58# parse an individual line
59proc ::fileutil::magic::cfront::parseline {line {maxlevel 10000}} {
60    # calculate the line's level
61    set unlevel [string trimleft $line >]
62    set level   [expr {[string length $line] - [string length $unlevel]}]
63    if {$level > $maxlevel} {
64   	return -code continue "Skip - too high a level"
65    }
66
67    # regexp parse line into (offset, type, value, command)
68    set parse [regexp -expanded -inline {^(\S+)\s+(\S+)\s*((\S|(\B\s))*)\s*(.*)$} $unlevel]
69    if {$parse == {}} {
70   	error "Can't parse: '$unlevel'"
71    }
72
73    # unpack parsed line
74    set value   ""
75    set command ""
76    foreach {junk offset type value junk1 junk2 command} $parse break
77
78    # handle trailing spaces
79    if {[string index $value end] eq "\\"} {
80   	append value " "
81    }
82    if {[string index $command end] eq "\\"} {
83   	append command " "
84    }
85
86    if {$value eq ""} {
87	# badly formatted line
88   	return -code error "no value"
89    }
90
91    ::fileutil::magic::cfront::Debug {
92   	puts "level:$level offset:$offset type:$type value:'$value' command:'$command'"
93    }
94
95    # return the line's fields
96    return [list $level $offset $type $value $command]
97}
98
99# process a magic file
100proc ::fileutil::magic::cfront::process {file {maxlevel 10000}} {
101    variable hashprotection
102    variable hashprotectionB
103    variable level	;# level of line
104    variable linenum	;# line number
105
106    set level  0
107    set script {}
108
109    set linenum 0
110    ::fileutil::foreachLine line $file {
111   	incr linenum
112   	set line [string trim $line " "]
113   	if {[string index $line 0] eq "#"} {
114   	    continue	;# skip comments
115   	} elseif {$line == ""} {
116   	    continue	;# skip blank lines
117   	} else {
118   	    # parse line
119   	    if {[catch {parseline $line $maxlevel} parsed]} {
120   		continue	;# skip erroring lines
121   	    }
122
123   	    # got a valid line
124   	    foreach {level offset type value message} $parsed break
125
126   	    # strip comparator out of value field,
127   	    # (they are combined)
128   	    set compare [string index $value 0]
129   	    switch -glob --  $value {
130   		[<>]=* {
131   		    set compare [string range $value 0 1]
132   		    set value   [string range $value 2 end]
133   		}
134
135   		<* - >* - &* - ^* {
136   		    set value [string range $value 1 end]
137   		}
138
139   		=* {
140   		    set compare "=="
141   		    set value   [string range $value 1 end]
142   		}
143
144   		!* {
145   		    set compare "!="
146   		    set value   [string range $value 1 end]
147   		}
148
149   		x {
150   		    # this is the 'don't care' match
151   		    # used for collecting values
152   		    set value ""
153   		}
154
155   		default {
156   		    # the default comparator is equals
157   		    set compare "=="
158   		    if {[string match {\\[<!>=]*} $value]} {
159   			set value [string range $value 1 end]
160   		    }
161   		}
162   	    }
163
164   	    # process type field
165   	    set qual ""
166   	    switch -glob -- $type {
167   		pstring* - string* {
168   		    # String or Pascal string type
169
170   		    # extract string match qualifiers
171		    foreach {type qual} [split $type /] break
172
173   		    # convert pstring to string + qualifier
174   		    if {$type eq "pstring"} {
175   			append qual "p"
176   			set type "string"
177   		    }
178
179   		    # protect hashes in output script value
180   		    set value [string map $hashprotection $value]
181
182   		    if {($value eq "\\0") && ($compare eq ">")} {
183   			# record 'any string' match
184   			set value   ""
185   			set compare x
186   		    } elseif {$compare eq "!="} {
187   			# string doesn't allow !match
188   			set value   !$value
189   			set compare "=="
190   		    }
191
192   		    if {$type ne "string"} {
193   			# don't let any odd string types sneak in
194   			puts stderr "Reject String: ${file}:$linenum $type - $line"
195   			continue
196   		    }
197   		}
198
199   		regex {
200   		    # I am *not* going to handle regex
201   		    puts stderr "Reject Regex: ${file}:$linenum $type - $line"
202   		    continue
203   		}
204
205   		*byte* - *short* - *long* - *date* {
206   		    # Numeric types
207
208   		    # extract numeric match &qualifiers
209   		    set type [split  $type &]
210   		    set qual [lindex $type 1]
211
212   		    if {$qual ne ""} {
213   			# this is an &-qualifier
214   			set qual &$qual
215   		    } else {
216   			# extract -qualifier from type
217   			set type [split  $type -]
218   			set qual [lindex $type 1]
219   			if {$qual ne ""} {
220   			    set qual -$qual
221   			}
222   		    }
223   		    set type [lindex $type 0]
224
225   		    # perform value adjustments
226   		    if {$compare ne "x"} {
227   			# trim redundant Long value qualifier
228   			set value [string trimright $value L]
229
230   			if {[catch {set value [expr $value]} x]} {
231			    upvar #0 errorInfo eo
232   			    # check that value is representable in tcl
233   			    puts stderr "Reject Value Error: ${file}:$linenum '$value' '$line' - $eo"
234   			    continue;
235   			}
236
237   			# coerce numeric value into hex
238   			set value [format "0x%x" $value]
239   		    }
240   		}
241
242   		default {
243   		    # this is not a type we can handle
244   		    puts stderr "Reject Unknown Type: ${file}:$linenum $type - $line"
245   		    continue
246   		}
247   	    }
248   	}
249
250   	# collect some summaries
251   	::fileutil::magic::cfront::Debug {
252   	    variable types
253   	    set types($type) $type
254   	    variable quals
255   	    set quals($qual) $qual
256   	}
257
258   	#puts $linenum level:$level offset:$offset type:$type
259	#puts qual:$qual compare:$compare value:'$value' message:'$message'
260
261   	# protect hashes in output script message
262   	set message [string map $hashprotectionB $message]
263
264   	if {![string match "(*)" $offset]} {
265   	    catch {set offset [expr $offset]}
266   	}
267
268   	# record is the complete match command,
269   	# encoded for tcl code generation
270   	set record [list $linenum $type $qual $compare $offset $value $message]
271   	if {$script == {}} {
272   	    # the original script has level 0,
273   	    # regardless of what the script says
274   	    set level 0
275   	}
276
277   	if {$level == 0} {
278   	    # add a new 0-level record
279   	    lappend script $record
280   	} else {
281   	    # find the growing edge of the script
282   	    set depth [::struct::list repeat [expr $level] end]
283   	    while {[catch {
284   		# get the insertion point
285   		set insertion [eval [linsert $depth 0 lindex $script]]
286		# 8.5 #	set insertion [lindex $script {*}$depth]
287   	    }]} {
288   		# handle scripts which jump levels,
289   		# reduce depth to current-depth+1
290   		set depth [lreplace $depth end end]
291   	    }
292
293   	    # add the record at the insertion point
294   	    lappend insertion $record
295
296   	    # re-insert the record into its correct position
297   	    eval [linsert [linsert $depth 0 lset script] end $insertion]
298   	    # 8.5 # lset script {*}$depth $insertion
299   	}
300    }
301    #puts "Script: $script"
302    return $script
303}
304
305# compile up magic files or directories of magic files into a single recognizer.
306proc ::fileutil::magic::cfront::compile {args} {
307    set tcl ""
308    set script {}
309    foreach arg $args {
310   	if {[file type $arg] == "directory"} {
311   	    foreach file [glob [file join $arg *]] {
312   		set script1 [process $file]
313		eval [linsert $script1 0 lappend script [list file $file]]
314   		# 8.5 # lappend script [list file $file] {*}$script1
315
316   		#append tcl "magic::file_start $file" \n
317   		#append tcl [run $script1] \n
318   	    }
319   	} else {
320   	    set file $arg
321   	    set script1 [process $file]
322   	     eval [linsert $script1 0 lappend script [list file $file]]
323   	    # 8.5 # lappend script [list file $file] {*}$script1
324
325   	    #append tcl "magic::file_start $file" \n
326   	    #append tcl [run $script1] \n
327   	}
328    }
329
330    #puts stderr $script
331    ::fileutil::magic::cfront::Debug {puts "\# $args"}
332
333    set    t   [2tree $script]
334    set    tcl [treegen $t root]
335    append tcl "\nreturn \{\}"
336
337    ::fileutil::magic::cfront::Debug {puts [treedump $t]}
338    #set tcl [run $script]
339
340    return $tcl
341}
342
343proc ::fileutil::magic::cfront::procdef {procname args} {
344
345    set pspace [namespace qualifiers $procname]
346
347    if {$pspace eq ""} {
348	return -code error "Cannot generate recognizer in the global namespace"
349    }
350
351    set     script {}
352    lappend script "package require fileutil::magic::rt"
353    lappend script "namespace eval [list ${pspace}] \{"
354    lappend script "    namespace import ::fileutil::magic::rt::*"
355    lappend script "\}"
356    lappend script ""
357    lappend script [list proc ${procname} {} \n[eval [linsert $args 0 compile]]\n]
358    return [join $script \n]
359}
360
361proc ::fileutil::magic::cfront::install {args} {
362    foreach arg $args {
363	set path [file tail $arg]
364	eval [procdef ::fileutil::magic::/${path}::run $arg]
365    }
366    return
367}
368
369# ### ### ### ######### ######### #########
370## Internal, debugging.
371
372if {!$::fileutil::magic::cfront::debug} {
373    # This procedure definition is optimized out of using code by the
374    # core bcc. It knows that neither argument checks are required,
375    # nor is anything done. So neither results, nor errors are
376    # possible, a true no-operation.
377    proc ::fileutil::magic::cfront::Debug {args} {}
378
379} else {
380    proc ::fileutil::magic::cfront::Debug {script} {
381	# Run the commands in the debug script. This usually generates
382	# some output. The uplevel is required to ensure the proper
383	# resolution of all variables found in the script.
384	uplevel 1 $script
385	return
386    }
387}
388
389#set script [magic::compile {} /usr/share/misc/file/magic]
390#puts "\# types:[array names magic::types]"
391#puts "\# quals:[array names magic::quals]"
392#puts "Script: $script"
393
394# ### ### ### ######### ######### #########
395## Ready for use.
396# EOF
397