msdos revision 290152
1
2#------------------------------------------------------------------------------
3# $File: msdos,v 1.101 2015/08/24 05:08:48 christos Exp $
4# msdos:  file(1) magic for MS-DOS files
5#
6
7# .BAT files (Daniel Quinlan, quinlan@yggdrasil.com)
8# updated by Joerg Jenderek at Oct 2008,Apr 2011
90	string/t	@			
10>1	string/cW	\ echo\ off	DOS batch file text
11!:mime	text/x-msdos-batch
12>1	string/cW	echo\ off	DOS batch file text
13!:mime	text/x-msdos-batch
14>1	string/cW	rem		DOS batch file text
15!:mime	text/x-msdos-batch
16>1	string/cW	set\ 		DOS batch file text
17!:mime	text/x-msdos-batch
18
19
20# OS/2 batch files are REXX. the second regex is a bit generic, oh well
21# the matched commands seem to be common in REXX and uncommon elsewhere
22100	search/0xffff   rxfuncadd
23>100	regex/c =^[\ \t]{0,10}call[\ \t]{1,10}rxfunc	OS/2 REXX batch file text
24100	search/0xffff   say
25>100	regex/c =^[\ \t]{0,10}say\ ['"]			OS/2 REXX batch file text
26
270	leshort		0x14c	MS Windows COFF Intel 80386 object file
28#>4	ledate		x	stamp %s
290	leshort		0x166	MS Windows COFF MIPS R4000 object file
30#>4	ledate		x	stamp %s
310	leshort		0x184	MS Windows COFF Alpha object file
32#>4	ledate		x	stamp %s
330	leshort		0x268	MS Windows COFF Motorola 68000 object file
34#>4	ledate		x	stamp %s
350	leshort		0x1f0	MS Windows COFF PowerPC object file
36#>4	ledate		x	stamp %s
370	leshort		0x290	MS Windows COFF PA-RISC object file
38#>4	ledate		x	stamp %s
39
40# Tests for various EXE types.
41#
42# Many of the compressed formats were extraced from IDARC 1.23 source code.
43#
440	string/b	MZ
45# All non-DOS EXE extensions have the relocation table more than 0x40 bytes into the file.
46>0x18	leshort <0x40 MS-DOS executable
47!:mime	application/x-dosexec
48# These traditional tests usually work but not always.  When test quality support is
49# implemented these can be turned on.
50#>>0x18	leshort	0x1c	(Borland compiler)
51#>>0x18	leshort	0x1e	(MS compiler)
52
53# If the relocation table is 0x40 or more bytes into the file, it's definitely
54# not a DOS EXE.
55>0x18  leshort >0x3f
56
57# Maybe it's a PE?
58>>(0x3c.l) string PE\0\0 PE
59!:mime	application/x-dosexec
60>>>(0x3c.l+24)	leshort		0x010b	\b32 executable
61>>>(0x3c.l+24)	leshort		0x020b	\b32+ executable
62>>>(0x3c.l+24)	leshort		0x0107	ROM image
63>>>(0x3c.l+24)	default		x	Unknown PE signature
64>>>>&0 		leshort		x	0x%x
65>>>(0x3c.l+22)	leshort&0x2000	>0	(DLL)
66>>>(0x3c.l+92)	leshort		1	(native)
67>>>(0x3c.l+92)	leshort		2	(GUI)
68>>>(0x3c.l+92)	leshort		3	(console)
69>>>(0x3c.l+92)	leshort		7	(POSIX)
70>>>(0x3c.l+92)	leshort		9	(Windows CE)
71>>>(0x3c.l+92)	leshort		10	(EFI application)
72>>>(0x3c.l+92)	leshort		11	(EFI boot service driver)
73>>>(0x3c.l+92)	leshort		12	(EFI runtime driver)
74>>>(0x3c.l+92)	leshort		13	(EFI ROM)
75>>>(0x3c.l+92)	leshort		14	(XBOX)
76>>>(0x3c.l+92)	leshort		15	(Windows boot application)
77>>>(0x3c.l+92)	default		x	(Unknown subsystem
78>>>>&0		leshort		x	0x%x)
79>>>(0x3c.l+4)	leshort		0x14c	Intel 80386
80>>>(0x3c.l+4)	leshort		0x166	MIPS R4000
81>>>(0x3c.l+4)	leshort		0x168	MIPS R10000
82>>>(0x3c.l+4)	leshort		0x184	Alpha
83>>>(0x3c.l+4)	leshort		0x1a2	Hitachi SH3
84>>>(0x3c.l+4)	leshort		0x1a6	Hitachi SH4
85>>>(0x3c.l+4)	leshort		0x1c0	ARM
86>>>(0x3c.l+4)	leshort		0x1c2	ARM Thumb
87>>>(0x3c.l+4)	leshort		0x1c4	ARMv7 Thumb
88>>>(0x3c.l+4)	leshort		0x1f0	PowerPC
89>>>(0x3c.l+4)	leshort		0x200	Intel Itanium
90>>>(0x3c.l+4)	leshort		0x266	MIPS16
91>>>(0x3c.l+4)	leshort		0x268	Motorola 68000
92>>>(0x3c.l+4)	leshort		0x290	PA-RISC
93>>>(0x3c.l+4)	leshort		0x366	MIPSIV
94>>>(0x3c.l+4)	leshort		0x466	MIPS16 with FPU
95>>>(0x3c.l+4)	leshort		0xebc	EFI byte code
96>>>(0x3c.l+4)	leshort		0x8664	x86-64
97>>>(0x3c.l+4)	leshort		0xc0ee	MSIL
98>>>(0x3c.l+4)	default		x	Unknown processor type
99>>>>&0		leshort		x	0x%x
100>>>(0x3c.l+22)	leshort&0x0200	>0	(stripped to external PDB)
101>>>(0x3c.l+22)	leshort&0x1000	>0	system file
102>>>(0x3c.l+24)	leshort		0x010b
103>>>>(0x3c.l+232) lelong	>0	Mono/.Net assembly
104>>>(0x3c.l+24)	leshort		0x020b
105>>>>(0x3c.l+248) lelong	>0	Mono/.Net assembly
106
107# hooray, there's a DOS extender using the PE format, with a valid PE
108# executable inside (which just prints a message and exits if run in win)
109>>>(8.s*16)		string		32STUB	\b, 32rtm DOS extender
110>>>(8.s*16)		string		!32STUB	\b, for MS Windows
111>>>(0x3c.l+0xf8)	string		UPX0 \b, UPX compressed
112>>>(0x3c.l+0xf8)	search/0x140	PEC2 \b, PECompact2 compressed
113>>>(0x3c.l+0xf8)	search/0x140	UPX2
114>>>>(&0x10.l+(-4))	string		PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
115>>>(0x3c.l+0xf8)	search/0x140	.idata
116>>>>(&0xe.l+(-4))	string		PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
117>>>>(&0xe.l+(-4))	string		ZZ0 \b, ZZip self-extracting archive
118>>>>(&0xe.l+(-4))	string		ZZ1 \b, ZZip self-extracting archive
119>>>(0x3c.l+0xf8)	search/0x140	.rsrc
120>>>>(&0x0f.l+(-4))	string		a\\\4\5 \b, WinHKI self-extracting archive
121>>>>(&0x0f.l+(-4))	string		Rar! \b, RAR self-extracting archive
122>>>>(&0x0f.l+(-4))	search/0x3000	MSCF \b, InstallShield self-extracting archive
123>>>>(&0x0f.l+(-4))	search/32	Nullsoft \b, Nullsoft Installer self-extracting archive
124>>>(0x3c.l+0xf8)	search/0x140	.data
125>>>>(&0x0f.l)		string		WEXTRACT \b, MS CAB-Installer self-extracting archive
126>>>(0x3c.l+0xf8)	search/0x140	.petite\0 \b, Petite compressed
127>>>>(0x3c.l+0xf7)	byte		x
128>>>>>(&0x104.l+(-4))	string		=!sfx! \b, ACE self-extracting archive
129>>>(0x3c.l+0xf8)	search/0x140	.WISE \b, WISE installer self-extracting archive
130>>>(0x3c.l+0xf8)	search/0x140	.dz\0\0\0 \b, Dzip self-extracting archive
131>>>&(0x3c.l+0xf8)	search/0x100	_winzip_ \b, ZIP self-extracting archive (WinZip)
132>>>&(0x3c.l+0xf8)	search/0x100	SharedD \b, Microsoft Installer self-extracting archive
133>>>0x30			string		Inno \b, InnoSetup self-extracting archive
134
135# Hmm, not a PE but the relocation table is too high for a traditional DOS exe,
136# must be one of the unusual subformats.
137>>(0x3c.l) string !PE\0\0 MS-DOS executable
138!:mime	application/x-dosexec
139
140>>(0x3c.l)		string		NE \b, NE
141!:mime	application/x-dosexec
142>>>(0x3c.l+0x36)	byte		1 for OS/2 1.x
143>>>(0x3c.l+0x36)	byte		2 for MS Windows 3.x
144>>>(0x3c.l+0x36)	byte		3 for MS-DOS
145>>>(0x3c.l+0x36)	byte		4 for Windows 386
146>>>(0x3c.l+0x36)	byte		5 for Borland Operating System Services
147>>>(0x3c.l+0x36)	default		x
148>>>>(0x3c.l+0x36)	byte		x (unknown OS %x)
149>>>(0x3c.l+0x36)	byte		0x81 for MS-DOS, Phar Lap DOS extender
150>>>(0x3c.l+0x0c)	leshort&0x8003	0x8002 (DLL)
151>>>(0x3c.l+0x0c)	leshort&0x8003	0x8001 (driver)
152>>>&(&0x24.s-1)		string		ARJSFX \b, ARJ self-extracting archive
153>>>(0x3c.l+0x70)	search/0x80	WinZip(R)\ Self-Extractor \b, ZIP self-extracting archive (WinZip)
154
155>>(0x3c.l)		string		LX\0\0 \b, LX
156!:mime	application/x-dosexec
157>>>(0x3c.l+0x0a)	leshort		<1 (unknown OS)
158>>>(0x3c.l+0x0a)	leshort		1 for OS/2
159>>>(0x3c.l+0x0a)	leshort		2 for MS Windows
160>>>(0x3c.l+0x0a)	leshort		3 for DOS
161>>>(0x3c.l+0x0a)	leshort		>3 (unknown OS)
162>>>(0x3c.l+0x10)	lelong&0x28000	=0x8000 (DLL)
163>>>(0x3c.l+0x10)	lelong&0x20000	>0 (device driver)
164>>>(0x3c.l+0x10)	lelong&0x300	0x300 (GUI)
165>>>(0x3c.l+0x10)	lelong&0x28300	<0x300 (console)
166>>>(0x3c.l+0x08)	leshort		1 i80286
167>>>(0x3c.l+0x08)	leshort		2 i80386
168>>>(0x3c.l+0x08)	leshort		3 i80486
169>>>(8.s*16)		string		emx \b, emx
170>>>>&1			string		x %s
171>>>&(&0x54.l-3)		string		arjsfx \b, ARJ self-extracting archive
172
173# MS Windows system file, supposedly a collection of LE executables
174>>(0x3c.l)		string		W3 \b, W3 for MS Windows
175!:mime	application/x-dosexec
176
177>>(0x3c.l)		string		LE\0\0 \b, LE executable
178!:mime	application/x-dosexec
179>>>(0x3c.l+0x0a)	leshort		1
180# some DOS extenders use LE files with OS/2 header
181>>>>0x240		search/0x100	DOS/4G for MS-DOS, DOS4GW DOS extender
182>>>>0x240		search/0x200	WATCOM\ C/C++ for MS-DOS, DOS4GW DOS extender
183>>>>0x440		search/0x100	CauseWay\ DOS\ Extender for MS-DOS, CauseWay DOS extender
184>>>>0x40		search/0x40	PMODE/W for MS-DOS, PMODE/W DOS extender
185>>>>0x40		search/0x40	STUB/32A for MS-DOS, DOS/32A DOS extender (stub)
186>>>>0x40		search/0x80	STUB/32C for MS-DOS, DOS/32A DOS extender (configurable stub)
187>>>>0x40		search/0x80	DOS/32A for MS-DOS, DOS/32A DOS extender (embedded)
188# this is a wild guess; hopefully it is a specific signature
189>>>>&0x24		lelong		<0x50
190>>>>>(&0x4c.l)		string		\xfc\xb8WATCOM
191>>>>>>&0		search/8	3\xdbf\xb9 \b, 32Lite compressed
192# another wild guess: if real OS/2 LE executables exist, they probably have higher start EIP
193#>>>>(0x3c.l+0x1c)	lelong		>0x10000 for OS/2
194# fails with DOS-Extenders.
195>>>(0x3c.l+0x0a)	leshort		2 for MS Windows
196>>>(0x3c.l+0x0a)	leshort		3 for DOS
197>>>(0x3c.l+0x0a)	leshort		4 for MS Windows (VxD)
198>>>(&0x7c.l+0x26)	string		UPX \b, UPX compressed
199>>>&(&0x54.l-3)		string		UNACE \b, ACE self-extracting archive
200
201# looks like ASCII, probably some embedded copyright message.
202# and definitely not NE/LE/LX/PE
203>>0x3c		lelong	>0x20000000
204>>>(4.s*512)	leshort !0x014c \b, MZ for MS-DOS
205!:mime	application/x-dosexec
206# header data too small for extended executable
207>2		long	!0
208>>0x18		leshort <0x40
209>>>(4.s*512)	leshort !0x014c
210
211>>>>&(2.s-514)	string	!LE
212>>>>>&-2	string	!BW \b, MZ for MS-DOS
213!:mime	application/x-dosexec
214>>>>&(2.s-514)	string	LE \b, LE
215>>>>>0x240	search/0x100	DOS/4G for MS-DOS, DOS4GW DOS extender
216# educated guess since indirection is still not capable enough for complex offset
217# calculations (next embedded executable would be at &(&2*512+&0-2)
218# I suspect there are only LE executables in these multi-exe files
219>>>>&(2.s-514)	string	BW
220>>>>>0x240	search/0x100	DOS/4G	\b, LE for MS-DOS, DOS4GW DOS extender (embedded)
221>>>>>0x240	search/0x100	!DOS/4G	\b, BW collection for MS-DOS
222
223# This sequence skips to the first COFF segment, usually .text
224>(4.s*512)	leshort		0x014c \b, COFF
225!:mime	application/x-dosexec
226>>(8.s*16)	string		go32stub for MS-DOS, DJGPP go32 DOS extender
227>>(8.s*16)	string		emx
228>>>&1		string		x for DOS, Win or OS/2, emx %s
229>>&(&0x42.l-3)	byte		x 
230>>>&0x26	string		UPX \b, UPX compressed
231# and yet another guess: small .text, and after large .data is unusal, could be 32lite
232>>&0x2c		search/0xa0	.text
233>>>&0x0b	lelong		<0x2000
234>>>>&0		lelong		>0x6000 \b, 32lite compressed
235
236>(8.s*16) string $WdX \b, WDos/X DOS extender
237
238# By now an executable type should have been printed out.  The executable
239# may be a self-uncompressing archive, so look for evidence of that and 
240# print it out.  
241#
242# Some signatures below from Greg Roelofs, newt@uchicago.edu.
243#
244>0x35	string	\x8e\xc0\xb9\x08\x00\xf3\xa5\x4a\x75\xeb\x8e\xc3\x8e\xd8\x33\xff\xbe\x30\x00\x05 \b, aPack compressed
245>0xe7	string	LH/2\ 	Self-Extract \b, %s
246>0x1c	string	UC2X	\b, UCEXE compressed
247>0x1c	string	WWP\ 	\b, WWPACK compressed
248>0x1c	string	RJSX 	\b, ARJ self-extracting archive
249>0x1c	string	diet 	\b, diet compressed
250>0x1c	string	LZ09 	\b, LZEXE v0.90 compressed
251>0x1c	string	LZ91 	\b, LZEXE v0.91 compressed
252>0x1c	string	tz 	\b, TinyProg compressed
253>0x1e	string	Copyright\ 1989-1990\ PKWARE\ Inc.	Self-extracting PKZIP archive
254!:mime	application/zip
255# Yes, this really is "Copr", not "Corp."
256>0x1e	string	PKLITE\ Copr.	Self-extracting PKZIP archive
257!:mime	application/zip
258# winarj stores a message in the stub instead of the sig in the MZ header
259>0x20	search/0xe0	aRJsfX \b, ARJ self-extracting archive
260>0x20	string AIN
261>>0x23	string 2	\b, AIN 2.x compressed
262>>0x23	string <2	\b, AIN 1.x compressed
263>>0x23	string >2	\b, AIN 1.x compressed
264>0x24	string	LHa's\ SFX \b, LHa self-extracting archive
265!:mime	application/x-lha
266>0x24	string	LHA's\ SFX \b, LHa self-extracting archive
267!:mime	application/x-lha
268>0x24	string	\ $ARX \b, ARX self-extracting archive
269>0x24	string	\ $LHarc \b, LHarc self-extracting archive
270>0x20	string	SFX\ by\ LARC \b, LARC self-extracting archive
271>0x40	string aPKG \b, aPackage self-extracting archive
272>0x64	string	W\ Collis\0\0 \b, Compack compressed
273>0x7a	string		Windows\ self-extracting\ ZIP	\b, ZIP self-extracting archive
274>>&0xf4 search/0x140 \x0\x40\x1\x0
275>>>(&0.l+(4)) string MSCF \b, WinHKI CAB self-extracting archive
276>1638	string	-lh5- \b, LHa self-extracting archive v2.13S
277>0x17888 string Rar! \b, RAR self-extracting archive
278
279# Skip to the end of the EXE.  This will usually work fine in the PE case
280# because the MZ image is hardcoded into the toolchain and almost certainly
281# won't match any of these signatures.
282>(4.s*512)	long	x 
283>>&(2.s-517)	byte	x 
284>>>&0	string		PK\3\4 \b, ZIP self-extracting archive
285>>>&0	string		Rar! \b, RAR self-extracting archive
286>>>&0	string		=!\x11 \b, AIN 2.x self-extracting archive
287>>>&0	string		=!\x12 \b, AIN 2.x self-extracting archive
288>>>&0	string		=!\x17 \b, AIN 1.x self-extracting archive
289>>>&0	string		=!\x18 \b, AIN 1.x self-extracting archive
290>>>&7	search/400	**ACE** \b, ACE self-extracting archive
291>>>&0	search/0x480	UC2SFX\ Header \b, UC2 self-extracting archive
292
293# a few unknown ZIP sfxes, no idea if they are needed or if they are
294# already captured by the generic patterns above
295>(8.s*16)	search/0x20	PKSFX \b, ZIP self-extracting archive (PKZIP)
296# TODO: how to add this? >FileSize-34 string Windows\ Self-Installing\ Executable \b, ZIP self-extracting archive
297#
298
299# TELVOX Teleinformatica CODEC self-extractor for OS/2:
300>49801	string	\x79\xff\x80\xff\x76\xff	\b, CODEC archive v3.21
301>>49824 leshort		=1			\b, 1 file
302>>49824 leshort		>1			\b, %u files
303
304# added by Joerg Jenderek of http://www.freedos.org/software/?prog=kc
305# and http://www.freedos.org/software/?prog=kpdos
306# for FreeDOS files like KEYBOARD.SYS, KEYBRD2.SYS, KEYBRD3.SYS, *.KBD
3070	string/b	KCF		FreeDOS KEYBoard Layout collection
308# only version=0x100 found
309>3	uleshort	x		\b, version 0x%x
310# length of string containing author,info and special characters
311>6	ubyte		>0		
312#>>6	pstring		x		\b, name=%s
313>>7	string		>\0		\b, author=%-.14s
314>>7	search/254	\xff		\b, info=
315#>>>&0	string		x		\b%-s
316>>>&0	string		x		\b%-.15s
317# for FreeDOS *.KL files 
3180	string/b	KLF		FreeDOS KEYBoard Layout file
319# only version=0x100 or 0x101 found
320>3	uleshort	x		\b, version 0x%x
321# stringlength
322>5	ubyte		>0		
323>>8	string		x		\b, name=%-.2s
3240	string	\xffKEYB\ \ \ \0\0\0\0	
325>12	string	\0\0\0\0`\004\360	MS-DOS KEYBoard Layout file
326
327# .COM formats (Daniel Quinlan, quinlan@yggdrasil.com)
328# Uncommenting only the first two lines will cover about 2/3 of COM files,
329# but it isn't feasible to match all COM files since there must be at least
330# two dozen different one-byte "magics".
331# test too generic ?
3320	byte		0xe9		DOS executable (COM)
333>0x1FE leshort		0xAA55		\b, boot code
334>6	string		SFX\ of\ LHarc	(%s)
335
336# DOS device driver updated by Joerg Jenderek at May 2011
337# http://maben.homeip.net/static/S100/IBM/software/DOS/DOS%20techref/CHAPTER.009
3380	ulequad&0x07a0ffffffff		0xffffffff		DOS executable (
339>40	search/7			UPX!			\bUPX compressed 
340# DOS device driver attributes
341>4	uleshort&0x8000			0x0000			\bblock device driver
342# character device
343>4	uleshort&0x8000			0x8000			\b
344>>4	uleshort&0x0008			0x0008			\bclock 
345# fast video output by int 29h
346>>4	uleshort&0x0010			0x0010			\bfast 
347# standard input/output device
348>>4	uleshort&0x0003			>0			\bstandard 
349>>>4	uleshort&0x0001			0x0001			\binput
350>>>4	uleshort&0x0003			0x0003			\b/
351>>>4	uleshort&0x0002			0x0002			\boutput 
352>>4	uleshort&0x8000			0x8000			\bcharacter device driver
353>0	ubyte				x			
354# upx compressed device driver has garbage instead of real in name field of header
355>>40	search/7			UPX!			
356>>40	default				x			
357# leading/trailing nulls, zeros or non ASCII characters in 8-byte name field at offset 10 are skipped
358>>>12		ubyte			>0x27			\b 
359>>>>10		ubyte			>0x20			
360>>>>>10		ubyte			!0x2E			
361>>>>>>10	ubyte			!0x2A			\b%c
362>>>>11		ubyte			>0x20			
363>>>>>11		ubyte			!0x2E			\b%c
364>>>>12		ubyte			>0x20			
365>>>>>12		ubyte			!0x39			
366>>>>>>12	ubyte			!0x2E			\b%c
367>>>13		ubyte			>0x20			
368>>>>13		ubyte			!0x2E			\b%c
369>>>>14		ubyte			>0x20			
370>>>>>14		ubyte			!0x2E			\b%c
371>>>>15		ubyte			>0x20			
372>>>>>15		ubyte			!0x2E			\b%c
373>>>>16		ubyte			>0x20			
374>>>>>16		ubyte			!0x2E			
375>>>>>>16	ubyte			<0xCB			\b%c
376>>>>17		ubyte			>0x20			
377>>>>>17		ubyte			!0x2E			
378>>>>>>17	ubyte			<0x90			\b%c
379# some character device drivers like ASPICD.SYS, btcdrom.sys and Cr_atapi.sys contain only spaces or points in name field
380>>>4		uleshort&0x8000		0x8000			
381>>>>12		ubyte			<0x2F			
382# they have their real name at offset 22
383>>>>>22		string			>\0			\b%-.5s
384>4	uleshort&0x8000			0x0000			
385# 32 bit sector addressing ( > 32 MB) for block devices
386>>4	uleshort&0x0002			0x0002			\b,32-bit sector-
387# support by driver functions 13h, 17h, 18h
388>4	uleshort&0x0040			0x0040			\b,IOCTL-
389# open, close, removable media support by driver functions 0Dh, 0Eh, 0Fh
390>4	uleshort&0x0800			0x0800			\b,close media-
391# output until busy support by int 10h for character device driver
392>4	uleshort&0x8000			0x8000			
393>>4	uleshort&0x2000			0x2000			\b,until busy-
394# direct read/write support by driver functions 03h,0Ch
395>4	uleshort&0x4000			0x4000			\b,control strings-
396>4	uleshort&0x8000			0x8000			
397>>4	uleshort&0x6840			>0			\bsupport
398>4	uleshort&0x8000			0x0000			
399>>4	uleshort&0x4842			>0			\bsupport
400>0	ubyte				x			\b)
401# DOS driver cmd640x.sys has 0x12 instead of 0xffffffff for pointer field to next device header 
402# Too weak, matches files that only contain 0's
403#0	ulequad&0x000007a0ffffffed	0x0000000000000000	DOS-executable (
404#>4	uleshort&0x8000			0x8000			\bcharacter device driver
405#>>10	string				x			%-.8s
406#>4	uleshort&0x4000			0x4000			\b,control strings-support)
407
408# test too generic ?
4090	byte		0x8c		DOS executable (COM)
410# updated by Joerg Jenderek at Oct 2008
4110	ulelong		0xffff10eb	DR-DOS executable (COM)
412# byte 0xeb conflicts with "sequent" magic leshort 0xn2eb
4130	ubeshort&0xeb8d	>0xeb00		
414# DR-DOS STACKER.COM SCREATE.SYS missed
415>0	byte		0xeb
416>>0x1FE leshort		0xAA55		DOS executable (COM), boot code
417>>85	string		UPX		DOS executable (COM), UPX compressed
418>>4	string		\ $ARX		DOS executable (COM), ARX self-extracting archive
419>>4	string		\ $LHarc	DOS executable (COM), LHarc self-extracting archive
420>>0x20e string		SFX\ by\ LARC	DOS executable (COM), LARC self-extracting archive
421# updated by Joerg Jenderek at Oct 2008
422#0	byte		0xb8		COM executable
4230	uleshort&0x80ff	0x00b8		
424# modified by Joerg Jenderek
425>1	lelong		!0x21cd4cff	COM executable for DOS
426# http://syslinux.zytor.com/comboot.php
427# (32-bit COMBOOT) programs *.C32 contain 32-bit code and run in flat-memory 32-bit protected mode
428# start with assembler instructions mov eax,21cd4cffh
4290	uleshort&0xc0ff	0xc0b8		
430>1	lelong		0x21cd4cff	COM executable (32-bit COMBOOT)
431# syslinux:doc/comboot.txt
432# A COM32R program must start with the byte sequence B8 FE 4C CD 21 (mov
433# eax,21cd4cfeh) as a magic number.
4340       string/b	\xb8\xfe\x4c\xcd\x21	COM executable (COM32R)
435# start with assembler instructions mov eax,21cd4cfeh
4360	uleshort&0xc0ff	0xc0b8		
437>1	lelong		0x21cd4cfe	COM executable (32-bit COMBOOT, relocatable)
4380	string/b	\x81\xfc		
439>4	string	\x77\x02\xcd\x20\xb9	
440>>36	string	UPX!			FREE-DOS executable (COM), UPX compressed
441252	string Must\ have\ DOS\ version DR-DOS executable (COM)
442# added by Joerg Jenderek at Oct 2008
443# GRR search is not working
444#34	search/2	UPX!		FREE-DOS executable (COM), UPX compressed
44534	string	UPX!			FREE-DOS executable (COM), UPX compressed
44635	string	UPX!			FREE-DOS executable (COM), UPX compressed
447# GRR search is not working
448#2	search/28	\xcd\x21	COM executable for MS-DOS
449#WHICHFAT.cOM
4502	string	\xcd\x21		COM executable for DOS
451#DELTREE.cOM DELTREE2.cOM
4524	string	\xcd\x21		COM executable for DOS
453#IFMEMDSK.cOM ASSIGN.cOM COMP.cOM
4545	string	\xcd\x21		COM executable for DOS
455#DELTMP.COm HASFAT32.cOM
4567	string	\xcd\x21		
457>0	byte	!0xb8			COM executable for DOS
458#COMP.cOM MORE.COm
45910	string	\xcd\x21		
460>5	string	!\xcd\x21		COM executable for DOS
461#comecho.com
46213	string	\xcd\x21		COM executable for DOS
463#HELP.COm EDIT.coM
46418	string	\xcd\x21		COM executable for MS-DOS
465#NWRPLTRM.COm
46623	string	\xcd\x21		COM executable for MS-DOS
467#LOADFIX.cOm LOADFIX.cOm
46830	string	\xcd\x21		COM executable for MS-DOS
469#syslinux.com 3.11
47070	string	\xcd\x21		COM executable for DOS
471# many compressed/converted COMs start with a copy loop instead of a jump
4720x6	search/0xa	\xfc\x57\xf3\xa5\xc3	COM executable for MS-DOS
4730x6	search/0xa	\xfc\x57\xf3\xa4\xc3	COM executable for DOS
474>0x18	search/0x10	\x50\xa4\xff\xd5\x73	\b, aPack compressed
4750x3c	string		W\ Collis\0\0		COM executable for MS-DOS, Compack compressed
476# FIXME: missing diet .com compression
477
478# miscellaneous formats
4790	string/b	LZ		MS-DOS executable (built-in)
480#0	byte		0xf0		MS-DOS program library data
481#
482
483# AAF files:
484# <stuartc@rd.bbc.co.uk> Stuart Cunningham
4850	string/b	\320\317\021\340\241\261\032\341AAFB\015\000OM\006\016\053\064\001\001\001\377			AAF legacy file using MS Structured Storage
486>30	byte	9		(512B sectors)
487>30	byte	12		(4kB sectors)
4880	string/b	\320\317\021\340\241\261\032\341\001\002\001\015\000\002\000\000\006\016\053\064\003\002\001\001			AAF file using MS Structured Storage
489>30	byte	9		(512B sectors)
490>30	byte	12		(4kB sectors)
491
492# Popular applications
4932080	string	Microsoft\ Word\ 6.0\ Document	%s
494!:mime	application/msword
4952080	string	Documento\ Microsoft\ Word\ 6 Spanish Microsoft Word 6 document data
496!:mime	application/msword
497# Pawel Wiecek <coven@i17linuxb.ists.pwr.wroc.pl> (for polish Word)
4982112	string	MSWordDoc			Microsoft Word document data
499!:mime	application/msword
500#
5010	belong	0x31be0000			Microsoft Word Document
502!:mime	application/msword
503#
5040	string/b	PO^Q`				Microsoft Word 6.0 Document
505!:mime	application/msword
506#
5070	string/b	\376\067\0\043			Microsoft Office Document
508!:mime	application/msword
5090	string/b	\333\245-\0\0\0			Microsoft Office Document
510!:mime	application/msword
511512	string/b	\354\245\301			Microsoft Word Document
512!:mime	application/msword
513
514#
5150	string/b	\xDB\xA5\x2D\x00		Microsoft WinWord 2.0 Document
516!:mime application/msword
517#
5182080	string	Microsoft\ Excel\ 5.0\ Worksheet	%s
519!:mime	application/vnd.ms-excel
520#
5210	string/b	\xDB\xA5\x2D\x00		Microsoft WinWord 2.0 Document
522!:mime application/msword
523
5242080	string	Foglio\ di\ lavoro\ Microsoft\ Exce	%s
525!:mime	application/vnd.ms-excel
526#
527# Pawel Wiecek <coven@i17linuxb.ists.pwr.wroc.pl> (for polish Excel)
5282114	string	Biff5		Microsoft Excel 5.0 Worksheet
529!:mime	application/vnd.ms-excel
530# Italian MS-Excel
5312121	string	Biff5		Microsoft Excel 5.0 Worksheet
532!:mime	application/vnd.ms-excel
5330	string/b	\x09\x04\x06\x00\x00\x00\x10\x00	Microsoft Excel Worksheet
534!:mime	application/vnd.ms-excel
535#
5360	belong	0x00001a00	Lotus 1-2-3
537!:mime	application/x-123
538>4	belong	0x00100400	wk3 document data
539>4	belong	0x02100400	wk4 document data
540>4	belong	0x07800100	fm3 or fmb document data
541>4	belong	0x07800000	fm3 or fmb document data
542#
5430	belong	0x00000200	Lotus 1-2-3
544!:mime	application/x-123
545>4	belong	0x06040600	wk1 document data
546>4	belong	0x06800200	fmt document data
5470	string/b		WordPro\0	Lotus WordPro
548!:mime	application/vnd.lotus-wordpro
5490	string/b		WordPro\r\373	Lotus WordPro
550!:mime	application/vnd.lotus-wordpro
551
552
553# Summary: Script used by InstallScield to uninstall applications
554# Extension: .isu
555# Submitted by: unknown
556# Modified by (1): Abel Cheung <abelcheung@gmail.com> (replace useless entry)
5570		string		\x71\xa8\x00\x00\x01\x02
558>12		string		Stirling\ Technologies,		InstallShield Uninstall Script
559
560# Winamp .avs
561#0	string	Nullsoft\ AVS\ Preset\ \060\056\061\032 A plug in for Winamp ms-windows Freeware media player
5620	string/b	Nullsoft\ AVS\ Preset\ 	Winamp plug in
563
564# Windows Metafont .WMF
5650	string/b	\327\315\306\232	ms-windows metafont .wmf
5660	string/b	\002\000\011\000	ms-windows metafont .wmf
5670	string/b	\001\000\011\000	ms-windows metafont .wmf
568
569#tz3 files whatever that is (MS Works files)
5700	string/b	\003\001\001\004\070\001\000\000	tz3 ms-works file
5710	string/b	\003\002\001\004\070\001\000\000	tz3 ms-works file
5720	string/b	\003\003\001\004\070\001\000\000	tz3 ms-works file
573
574# PGP sig files .sig
575#0 string \211\000\077\003\005\000\063\237\127 065 to  \027\266\151\064\005\045\101\233\021\002 PGP sig
5760 string \211\000\077\003\005\000\063\237\127\065\027\266\151\064\005\045\101\233\021\002 PGP sig
5770 string \211\000\077\003\005\000\063\237\127\066\027\266\151\064\005\045\101\233\021\002 PGP sig
5780 string \211\000\077\003\005\000\063\237\127\067\027\266\151\064\005\045\101\233\021\002 PGP sig
5790 string \211\000\077\003\005\000\063\237\127\070\027\266\151\064\005\045\101\233\021\002 PGP sig
5800 string \211\000\077\003\005\000\063\237\127\071\027\266\151\064\005\045\101\233\021\002 PGP sig
5810 string \211\000\225\003\005\000\062\122\207\304\100\345\042 PGP sig
582
583# windows zips files .dmf
5840	string/b	MDIF\032\000\010\000\000\000\372\046\100\175\001\000\001\036\001\000 MS Windows special zipped file
585
586
587#ico files
5880	string/b	\102\101\050\000\000\000\056\000\000\000\000\000\000\000	Icon for MS Windows
589
590# Windows icons
5910   name    ico-dir
592# not entirely accurate, the number of icons is part of the header
593>0  byte    1   - 1 icon
594>0  ubyte   >1  - %d icons
595>2  byte    0   \b, 256x
596>2  byte    !0  \b, %dx
597>3  byte    0   \b256
598>3  byte    !0  \b%d
599>4  ubyte   !0  \b, %d colors
600
6010   belong  0x00000100
602>9  byte    0
603>>0 byte    x           MS Windows icon resource
604!:mime	image/x-icon
605>>4 use     ico-dir
606>9  ubyte   0xff
607>>0 byte    x           MS Windows icon resource
608!:mime	image/x-icon
609>>4 use     ico-dir
610
611# Windows non-animated cursors
6120   name    cur-dir
613# not entirely accurate, the number of icons is part of the header
614>0  byte        1   - 1 icon
615>0  ubyte       >1  - %d icons
616>2  byte        0   \b, 256x
617>2  byte        !0  \b, %dx
618>3  byte        0   \b256
619>3  byte        !0  \b%d
620>6  uleshort    x   \b, hotspot @%dx
621>8  uleshort    x   \b%d
622
6230   belong  0x00000200
624>9  byte    0
625>>0 byte    x           MS Windows cursor resource
626!:mime image/x-cur
627>>4 use     cur-dir
628>9  ubyte   0xff
629>>0 byte    x           MS Windows cursor resource
630!:mime image/x-cur
631>>4 use     cur-dir
632
633# .chr files
6340	string/b	PK\010\010BGI	Borland font 
635>4	string	>\0	%s
636# then there is a copyright notice
637
638
639# .bgi files
6400	string/b	pk\010\010BGI	Borland device 
641>4	string	>\0	%s
642# then there is a copyright notice
643
644
645# Windows Recycle Bin record file (named INFO2)
646# By Abel Cheung (abelcheung AT gmail dot com)
647# Version 4 always has 280 bytes (0x118) per record, version 5 has 800 bytes
648# Since Vista uses another structure, INFO2 structure probably won't change
649# anymore. Detailed analysis in:
650# http://www.cybersecurityinstitute.biz/downloads/INFO2.pdf
6510	lelong		0x00000004
652>12	lelong		0x00000118	Windows Recycle Bin INFO2 file (Win98 or below)
653
6540	lelong		0x00000005
655>12	lelong		0x00000320	Windows Recycle Bin INFO2 file (Win2k - WinXP)
656
657
658##### put in Either Magic/font or Magic/news
659# Acroread or something	 files wrongly identified as G3	 .pfm
660# these have the form \000 \001 any? \002 \000 \000
661# or \000 \001 any? \022 \000 \000
6620	belong&0xffff00ff	0x00010012	PFM data
663>4	string			\000\000
664>6	string			>\060		- %s
665
6660	belong&0xffff00ff	0x00010002	PFM data
667>4	string			\000\000
668>6	string			>\060		- %s
669#0	string	\000\001 pfm?
670#>3	string	\022\000\000Copyright\	yes
671#>3	string	\002\000\000Copyright\	yes
672#>3	string	>\0	oops, not a font file. Cancel that.
673#it clashes with ttf files so put it lower down.
674
675# From Doug Lee via a FreeBSD pr
6769	string		GERBILDOC	First Choice document
6779	string		GERBILDB	First Choice database
6789	string		GERBILCLIP	First Choice database
6790	string		GERBIL		First Choice device file
6809	string		RABBITGRAPH	RabbitGraph file
6810	string		DCU1		Borland Delphi .DCU file
6820	string		=!<spell>	MKS Spell hash list (old format)
6830	string		=!<spell2>	MKS Spell hash list
684# Too simple - MPi
685#0	string		AH		Halo(TM) bitmapped font file
6860	lelong		0x08086b70	TurboC BGI file
6870	lelong		0x08084b50	TurboC Font file
688
689# Debian#712046: The magic below identifies "Delphi compiled form data". 
690# An additional source of information is available at:
691# http://www.woodmann.com/fravia/dafix_t1.htm
6920	string		TPF0
693>4	pstring		>\0		Delphi compiled form '%s'
694
695# tests for DBase files moved, updated and merged to database
696
6970	string		PMCC		Windows 3.x .GRP file
6981	string		RDC-meg		MegaDots 
699>8	byte		>0x2F		version %c
700>9	byte		>0x2F		\b.%c file
7010	lelong		0x4C
702>4	lelong		0x00021401	Windows shortcut file
703
704# .PIF files added by Joerg Jenderek from http://smsoft.ru/en/pifdoc.htm
705# only for windows versions equal or greater 3.0
7060x171	string	MICROSOFT\ PIFEX\0	Windows Program Information File
707!:mime	application/x-dosexec
708#>2	string	 	>\0		\b, Title:%.30s
709>0x24	string		>\0		\b for %.63s
710>0x65	string		>\0		\b, directory=%.64s
711>0xA5	string		>\0		\b, parameters=%.64s
712#>0x181	leshort	x	\b, offset %x
713#>0x183	leshort	x	\b, offsetdata %x
714#>0x185	leshort	x	\b, section length %x
715>0x187	search/0xB55	WINDOWS\ VMM\ 4.0\0	
716>>&0x5e		ubyte	>0			
717>>>&-1		string	<PIFMGR.DLL		\b, icon=%s
718#>>>&-1		string	PIFMGR.DLL		\b, icon=%s
719>>>&-1		string	>PIFMGR.DLL		\b, icon=%s
720>>&0xF0		ubyte	>0			
721>>>&-1		string	<Terminal		\b, font=%.32s
722#>>>&-1		string	=Terminal		\b, font=%.32s
723>>>&-1		string	>Terminal		\b, font=%.32s
724>>&0x110	ubyte	>0			
725>>>&-1		string	<Lucida\ Console	\b, TrueTypeFont=%.32s
726#>>>&-1		string	=Lucida\ Console	\b, TrueTypeFont=%.32s
727>>>&-1		string	>Lucida\ Console	\b, TrueTypeFont=%.32s
728#>0x187	search/0xB55	WINDOWS\ 286\ 3.0\0	\b, Windows 3.X standard mode-style
729#>0x187	search/0xB55	WINDOWS\ 386\ 3.0\0	\b, Windows 3.X enhanced mode-style
730>0x187	search/0xB55	WINDOWS\ NT\ \ 3.1\0	\b, Windows NT-style
731#>0x187	search/0xB55	WINDOWS\ NT\ \ 4.0\0	\b, Windows NT-style
732>0x187	search/0xB55	CONFIG\ \ SYS\ 4.0\0	\b +CONFIG.SYS
733#>>&06		string	x			\b:%s
734>0x187	search/0xB55	AUTOEXECBAT\ 4.0\0	\b +AUTOEXEC.BAT
735#>>&06		string	x			\b:%s
736
737# DOS EPS Binary File Header
738# From: Ed Sznyter <ews@Black.Market.NET>
7390	belong		0xC5D0D3C6	DOS EPS Binary File
740>4	long		>0		Postscript starts at byte %d
741>>8	long		>0		length %d
742>>>12	long		>0		Metafile starts at byte %d
743>>>>16	long		>0		length %d
744>>>20	long		>0		TIFF starts at byte %d
745>>>>24	long		>0		length %d
746
747# TNEF magic From "Joomy" <joomy@se-ed.net> 
748# Microsoft Outlook's Transport Neutral Encapsulation Format (TNEF)
7490	leshort		0x223e9f78	TNEF
750!:mime	application/vnd.ms-tnef
751
752# Norton Guide (.NG , .HLP) files added by Joerg Jenderek from source NG2HTML.C
753# of http://www.davep.org/norton-guides/ng2h-105.tgz
754# http://en.wikipedia.org/wiki/Norton_Guides
7550	string		NG\0\001	
756# only value 0x100 found at offset 2
757>2	ulelong		0x00000100	Norton Guide
758# Title[40]
759>>8	string		>\0		"%-.40s"
760#>>6	uleshort	x		\b, MenuCount=%u
761# szCredits[5][66]
762>>48	string		>\0		\b, %-.66s
763>>114	string		>\0		%-.66s
764
765# 4DOS help (.HLP) files added by Joerg Jenderek from source TPHELP.PAS 
766# of http://www.4dos.info/
767# pointer,HelpID[8]=4DHnnnmm
7680	ulelong	0x48443408		4DOS help file
769>4	string	x			\b, version %-4.4s
770
771# old binary Microsoft (.HLP) files added by Joerg Jenderek from http://file-extension.net/seeker/file_extension_hlp
7720	ulequad	0x3a000000024e4c	MS Advisor help file
773
774# HtmlHelp files (.chm)
7750	string/b	ITSF\003\000\000\000\x60\000\000\000	MS Windows HtmlHelp Data
776
777# GFA-BASIC (Wolfram Kleff)
7782	string/b	GFA-BASIC3	GFA-BASIC 3 data
779
780#------------------------------------------------------------------------------
781# From Stuart Caie <kyzer@4u.net> (developer of cabextract)
782# Microsoft Cabinet files
7830	string/b	MSCF\0\0\0\0	Microsoft Cabinet archive data
784!:mime application/vnd.ms-cab-compressed
785>8	lelong		x		\b, %u bytes
786>28	leshort		1		\b, 1 file
787>28	leshort		>1		\b, %u files
788
789# InstallShield Cabinet files
7900	string/b	ISc(		InstallShield Cabinet archive data
791>5	byte&0xf0	=0x60		version 6,
792>5	byte&0xf0	!0x60		version 4/5,
793>(12.l+40)	lelong	x		%u files
794
795# Windows CE package files
7960	string/b	MSCE\0\0\0\0	Microsoft WinCE install header
797>20	lelong		0		\b, architecture-independent
798>20	lelong		103		\b, Hitachi SH3
799>20	lelong		104		\b, Hitachi SH4
800>20	lelong		0xA11		\b, StrongARM
801>20	lelong		4000		\b, MIPS R4000
802>20	lelong		10003		\b, Hitachi SH3
803>20	lelong		10004		\b, Hitachi SH3E
804>20	lelong		10005		\b, Hitachi SH4
805>20	lelong		70001		\b, ARM 7TDMI
806>52	leshort		1		\b, 1 file
807>52	leshort		>1		\b, %u files
808>56	leshort		1		\b, 1 registry entry
809>56	leshort		>1		\b, %u registry entries
810
811
812# Windows Enhanced Metafile (EMF)
813# See msdn.microsoft.com/archive/en-us/dnargdi/html/msdn_enhmeta.asp 
814# for further information.
8150	ulelong 1
816>40	string	\ EMF		Windows Enhanced Metafile (EMF) image data
817>>44	ulelong x		version 0x%x
818
819# from http://filext.com by Derek M Jones <derek@knosof.co.uk>
820# False positive with PPT (also currently this string is too long)
821#0	string/b	\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3E\x00\x03\x00\xFE\xFF\x09\x00\x06	Microsoft Installer
8220	string/b	\320\317\021\340\241\261\032\341	Microsoft Office Document
823#>48	byte	0x1B					Excel Document
824#!:mime application/vnd.ms-excel
825>546	string	bjbj			Microsoft Word Document
826!:mime	application/msword
827>546	string	jbjb			Microsoft Word Document
828!:mime	application/msword
829
8300	string/b	\224\246\056		Microsoft Word Document
831!:mime	application/msword
832
833512	string	R\0o\0o\0t\0\ \0E\0n\0t\0r\0y	Microsoft Word Document
834!:mime	application/msword
835
836# From: "Nelson A. de Oliveira" <naoliv@gmail.com>
837# Magic type for Dell's BIOS .hdr files
838# Dell's .hdr
8390	string/b $RBU
840>23	string Dell			%s system BIOS
841>5	byte   2
842>>48	byte   x			version %d.
843>>49	byte   x			\b%d.
844>>50	byte   x			\b%d
845>5	byte   <2
846>>48	string x			version %.3s
847
848# Type: Microsoft DirectDraw Surface
849# URL:	http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/DDSFileReference/ddsfileformat.asp
850# From: Morten Hustveit <morten@debian.org>
8510	string/b	DDS\040\174\000\000\000 Microsoft DirectDraw Surface (DDS),
852>16	lelong	>0			%d x
853>12	lelong	>0			%d,
854>84	string	x			%.4s
855
856# Type: Microsoft Document Imaging Format (.mdi)
857# URL:	http://en.wikipedia.org/wiki/Microsoft_Document_Imaging_Format
858# From: Daniele Sempione <scrows@oziosi.org>
8590	short	0x5045			Microsoft Document Imaging Format
860
861# MS eBook format (.lit)
8620	string/b	ITOLITLS		Microsoft Reader eBook Data
863>8	lelong	x			\b, version %u
864!:mime					application/x-ms-reader
865
866# Windows CE Binary Image Data Format
867# From: Dr. Jesus <j@hug.gs>
8680	string/b	B000FF\n	Windows Embedded CE binary image
869
870# Windows Imaging (WIM) Image
8710	string/b	MSWIM\000\000\000	Windows imaging (WIM) image
872
873# The second byte of these signatures is a file version; I don't know what, 
874# if anything, produced files with version numbers 0-2.
875# From: John Elliott <johne@seasip.demon.co.uk>
8760	string	\xfc\x03\x00	Mallard BASIC program data (v1.11)
8770	string	\xfc\x04\x00	Mallard BASIC program data (v1.29+)
8780	string	\xfc\x03\x01	Mallard BASIC protected program data (v1.11)
8790	string	\xfc\x04\x01	Mallard BASIC protected program data (v1.29+)
880
8810	string	MIOPEN		Mallard BASIC Jetsam data
8820	string	Jetsam0		Mallard BASIC Jetsam index data
883
884