1# cfront.tcl -- 2# 3# Generator frontend for compiler of magic(5) files into recognizers 4# based on the 'rtcore'. Parses magic(5) into a basic 'script'. 5# 6# Copyright (c) 2004-2005 Colin McCormack <coldstore@users.sourceforge.net> 7# Copyright (c) 2005 Andreas Kupries <andreas_kupries@users.sourceforge.net> 8# 9# See the file "license.terms" for information on usage and redistribution 10# of this file, and for a DISCLAIMER OF ALL WARRANTIES. 11# 12# RCS: @(#) $Id: cfront.tcl,v 1.7 2008/03/22 01:10:32 andreas_kupries Exp $ 13 14##### 15# 16# "mime type recognition in pure tcl" 17# http://wiki.tcl.tk/12526 18# 19# Tcl code harvested on: 10 Feb 2005, 04:06 GMT 20# Wiki page last updated: ??? 21# 22##### 23 24# ### ### ### ######### ######### ######### 25## Requirements 26 27package require Tcl 8.4 28 29# file to compile the magic file from magic(5) into a tcl program 30package require fileutil ; # File processing (input) 31package require fileutil::magic::cgen ; # Code generator. 32package require fileutil::magic::rt ; # Runtime (typemap) 33package require struct::list ; # lrepeat. 34 35package provide fileutil::magic::cfront 1.0 36 37# ### ### ### ######### ######### ######### 38## Implementation 39 40namespace eval ::fileutil::magic::cfront { 41 # Configuration flag. (De)activate debugging output. 42 # This is done during initialization. 43 # Changes at runtime have no effect. 44 45 variable debug 0 46 47 # Constants 48 49 variable hashprotection [list "\#" "\\#" \" \\\" \{ \\\{ \} \\\}] ;#" 50 variable hashprotectionB [list "\#" "\\\#" \" \\\" \} \\\} ( \\( ) \\)] ;#" 51 52 # Make backend functionality accessible 53 namespace import ::fileutil::magic::cgen::* 54 55 namespace export compile procdef install 56} 57 58# parse an individual line 59proc ::fileutil::magic::cfront::parseline {line {maxlevel 10000}} { 60 # calculate the line's level 61 set unlevel [string trimleft $line >] 62 set level [expr {[string length $line] - [string length $unlevel]}] 63 if {$level > $maxlevel} { 64 return -code continue "Skip - too high a level" 65 } 66 67 # regexp parse line into (offset, type, value, command) 68 set parse [regexp -expanded -inline {^(\S+)\s+(\S+)\s*((\S|(\B\s))*)\s*(.*)$} $unlevel] 69 if {$parse == {}} { 70 error "Can't parse: '$unlevel'" 71 } 72 73 # unpack parsed line 74 set value "" 75 set command "" 76 foreach {junk offset type value junk1 junk2 command} $parse break 77 78 # handle trailing spaces 79 if {[string index $value end] eq "\\"} { 80 append value " " 81 } 82 if {[string index $command end] eq "\\"} { 83 append command " " 84 } 85 86 if {$value eq ""} { 87 # badly formatted line 88 return -code error "no value" 89 } 90 91 ::fileutil::magic::cfront::Debug { 92 puts "level:$level offset:$offset type:$type value:'$value' command:'$command'" 93 } 94 95 # return the line's fields 96 return [list $level $offset $type $value $command] 97} 98 99# process a magic file 100proc ::fileutil::magic::cfront::process {file {maxlevel 10000}} { 101 variable hashprotection 102 variable hashprotectionB 103 variable level ;# level of line 104 variable linenum ;# line number 105 106 set level 0 107 set script {} 108 109 set linenum 0 110 ::fileutil::foreachLine line $file { 111 incr linenum 112 set line [string trim $line " "] 113 if {[string index $line 0] eq "#"} { 114 continue ;# skip comments 115 } elseif {$line == ""} { 116 continue ;# skip blank lines 117 } else { 118 # parse line 119 if {[catch {parseline $line $maxlevel} parsed]} { 120 continue ;# skip erroring lines 121 } 122 123 # got a valid line 124 foreach {level offset type value message} $parsed break 125 126 # strip comparator out of value field, 127 # (they are combined) 128 set compare [string index $value 0] 129 switch -glob -- $value { 130 [<>]=* { 131 set compare [string range $value 0 1] 132 set value [string range $value 2 end] 133 } 134 135 <* - >* - &* - ^* { 136 set value [string range $value 1 end] 137 } 138 139 =* { 140 set compare "==" 141 set value [string range $value 1 end] 142 } 143 144 !* { 145 set compare "!=" 146 set value [string range $value 1 end] 147 } 148 149 x { 150 # this is the 'don't care' match 151 # used for collecting values 152 set value "" 153 } 154 155 default { 156 # the default comparator is equals 157 set compare "==" 158 if {[string match {\\[<!>=]*} $value]} { 159 set value [string range $value 1 end] 160 } 161 } 162 } 163 164 # process type field 165 set qual "" 166 switch -glob -- $type { 167 pstring* - string* { 168 # String or Pascal string type 169 170 # extract string match qualifiers 171 foreach {type qual} [split $type /] break 172 173 # convert pstring to string + qualifier 174 if {$type eq "pstring"} { 175 append qual "p" 176 set type "string" 177 } 178 179 # protect hashes in output script value 180 set value [string map $hashprotection $value] 181 182 if {($value eq "\\0") && ($compare eq ">")} { 183 # record 'any string' match 184 set value "" 185 set compare x 186 } elseif {$compare eq "!="} { 187 # string doesn't allow !match 188 set value !$value 189 set compare "==" 190 } 191 192 if {$type ne "string"} { 193 # don't let any odd string types sneak in 194 puts stderr "Reject String: ${file}:$linenum $type - $line" 195 continue 196 } 197 } 198 199 regex { 200 # I am *not* going to handle regex 201 puts stderr "Reject Regex: ${file}:$linenum $type - $line" 202 continue 203 } 204 205 *byte* - *short* - *long* - *date* { 206 # Numeric types 207 208 # extract numeric match &qualifiers 209 set type [split $type &] 210 set qual [lindex $type 1] 211 212 if {$qual ne ""} { 213 # this is an &-qualifier 214 set qual &$qual 215 } else { 216 # extract -qualifier from type 217 set type [split $type -] 218 set qual [lindex $type 1] 219 if {$qual ne ""} { 220 set qual -$qual 221 } 222 } 223 set type [lindex $type 0] 224 225 # perform value adjustments 226 if {$compare ne "x"} { 227 # trim redundant Long value qualifier 228 set value [string trimright $value L] 229 230 if {[catch {set value [expr $value]} x]} { 231 upvar #0 errorInfo eo 232 # check that value is representable in tcl 233 puts stderr "Reject Value Error: ${file}:$linenum '$value' '$line' - $eo" 234 continue; 235 } 236 237 # coerce numeric value into hex 238 set value [format "0x%x" $value] 239 } 240 } 241 242 default { 243 # this is not a type we can handle 244 puts stderr "Reject Unknown Type: ${file}:$linenum $type - $line" 245 continue 246 } 247 } 248 } 249 250 # collect some summaries 251 ::fileutil::magic::cfront::Debug { 252 variable types 253 set types($type) $type 254 variable quals 255 set quals($qual) $qual 256 } 257 258 #puts $linenum level:$level offset:$offset type:$type 259 #puts qual:$qual compare:$compare value:'$value' message:'$message' 260 261 # protect hashes in output script message 262 set message [string map $hashprotectionB $message] 263 264 if {![string match "(*)" $offset]} { 265 catch {set offset [expr $offset]} 266 } 267 268 # record is the complete match command, 269 # encoded for tcl code generation 270 set record [list $linenum $type $qual $compare $offset $value $message] 271 if {$script == {}} { 272 # the original script has level 0, 273 # regardless of what the script says 274 set level 0 275 } 276 277 if {$level == 0} { 278 # add a new 0-level record 279 lappend script $record 280 } else { 281 # find the growing edge of the script 282 set depth [::struct::list repeat [expr $level] end] 283 while {[catch { 284 # get the insertion point 285 set insertion [eval [linsert $depth 0 lindex $script]] 286 # 8.5 # set insertion [lindex $script {*}$depth] 287 }]} { 288 # handle scripts which jump levels, 289 # reduce depth to current-depth+1 290 set depth [lreplace $depth end end] 291 } 292 293 # add the record at the insertion point 294 lappend insertion $record 295 296 # re-insert the record into its correct position 297 eval [linsert [linsert $depth 0 lset script] end $insertion] 298 # 8.5 # lset script {*}$depth $insertion 299 } 300 } 301 #puts "Script: $script" 302 return $script 303} 304 305# compile up magic files or directories of magic files into a single recognizer. 306proc ::fileutil::magic::cfront::compile {args} { 307 set tcl "" 308 set script {} 309 foreach arg $args { 310 if {[file type $arg] == "directory"} { 311 foreach file [glob [file join $arg *]] { 312 set script1 [process $file] 313 eval [linsert $script1 0 lappend script [list file $file]] 314 # 8.5 # lappend script [list file $file] {*}$script1 315 316 #append tcl "magic::file_start $file" \n 317 #append tcl [run $script1] \n 318 } 319 } else { 320 set file $arg 321 set script1 [process $file] 322 eval [linsert $script1 0 lappend script [list file $file]] 323 # 8.5 # lappend script [list file $file] {*}$script1 324 325 #append tcl "magic::file_start $file" \n 326 #append tcl [run $script1] \n 327 } 328 } 329 330 #puts stderr $script 331 ::fileutil::magic::cfront::Debug {puts "\# $args"} 332 333 set t [2tree $script] 334 set tcl [treegen $t root] 335 append tcl "\nreturn \{\}" 336 337 ::fileutil::magic::cfront::Debug {puts [treedump $t]} 338 #set tcl [run $script] 339 340 return $tcl 341} 342 343proc ::fileutil::magic::cfront::procdef {procname args} { 344 345 set pspace [namespace qualifiers $procname] 346 347 if {$pspace eq ""} { 348 return -code error "Cannot generate recognizer in the global namespace" 349 } 350 351 set script {} 352 lappend script "package require fileutil::magic::rt" 353 lappend script "namespace eval [list ${pspace}] \{" 354 lappend script " namespace import ::fileutil::magic::rt::*" 355 lappend script "\}" 356 lappend script "" 357 lappend script [list proc ${procname} {} \n[eval [linsert $args 0 compile]]\n] 358 return [join $script \n] 359} 360 361proc ::fileutil::magic::cfront::install {args} { 362 foreach arg $args { 363 set path [file tail $arg] 364 eval [procdef ::fileutil::magic::/${path}::run $arg] 365 } 366 return 367} 368 369# ### ### ### ######### ######### ######### 370## Internal, debugging. 371 372if {!$::fileutil::magic::cfront::debug} { 373 # This procedure definition is optimized out of using code by the 374 # core bcc. It knows that neither argument checks are required, 375 # nor is anything done. So neither results, nor errors are 376 # possible, a true no-operation. 377 proc ::fileutil::magic::cfront::Debug {args} {} 378 379} else { 380 proc ::fileutil::magic::cfront::Debug {script} { 381 # Run the commands in the debug script. This usually generates 382 # some output. The uplevel is required to ensure the proper 383 # resolution of all variables found in the script. 384 uplevel 1 $script 385 return 386 } 387} 388 389#set script [magic::compile {} /usr/share/misc/file/magic] 390#puts "\# types:[array names magic::types]" 391#puts "\# quals:[array names magic::quals]" 392#puts "Script: $script" 393 394# ### ### ### ######### ######### ######### 395## Ready for use. 396# EOF 397